Source code for sliderule.gedi

# Copyright (c) 2021, University of Washington
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
#    this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
#    this list of conditions and the following disclaimer in the documentation
#    and/or other materials provided with the distribution.
#
# 3. Neither the name of the University of Washington nor the names of its
#    contributors may be used to endorse or promote products derived from this
#    software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON AND CONTRIBUTORS
# “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time
import logging
import numpy
import geopandas
import sliderule
from sliderule import earthdata, logger

###############################################################################
# GLOBALS
###############################################################################

# profiling times for each major function
profiles = {}

# gedi parameters
ALL_BEAMS = -1

###############################################################################
# LOCAL FUNCTIONS
###############################################################################

#
# Flatten Batches
#
def __flattenbatches(rsps, rectype, batch_column, parm, keep_id, as_numpy_array, height_key):

    # Latch Start Time
    tstart_flatten = time.perf_counter()

    # Check for Output Options
    if "output" in parm:
        gdf = sliderule.procoutputfile(parm, rsps)
        profiles["flatten"] = time.perf_counter() - tstart_flatten
        return gdf

    # Flatten Records
    columns = {}
    records = []
    num_records = 0
    field_dictionary = {} # [<field_name>] = {"shot_number": [], <field_name>: []}
    file_dictionary = {} # [id] = "filename"
    if len(rsps) > 0:
        # Sort Records
        for rsp in rsps:
            if rectype in rsp['__rectype']:
                records += rsp,
                num_records += len(rsp[batch_column])
            elif 'rsrec' == rsp['__rectype'] or 'zsrec' == rsp['__rectype']:
                if rsp["num_samples"] <= 0:
                    continue
                # Get field names and set
                sample = rsp["samples"][0]
                field_names = list(sample.keys())
                field_names.remove("__rectype")
                field_set = rsp['key']
                if rsp["num_samples"] > 1:
                    as_numpy_array = True
                # On first time, build empty dictionary for field set associated with raster
                if field_set not in field_dictionary:
                    field_dictionary[field_set] = {'shot_number': []}
                    for field in field_names:
                        field_dictionary[field_set][field_set + "." + field] = []
                # Populate dictionary for field set
                field_dictionary[field_set]['shot_number'] += numpy.uint64(rsp['index']),
                for field in field_names:
                    if as_numpy_array:
                        data = []
                        for s in rsp["samples"]:
                            data += s[field],
                        field_dictionary[field_set][field_set + "." + field] += numpy.array(data),
                    else:
                        field_dictionary[field_set][field_set + "." + field] += sample[field],
            elif 'fileidrec' == rsp['__rectype']:
                file_dictionary[rsp["file_id"]] = rsp["file_name"]

        # Build Columns
        if num_records > 0:
            # Initialize Columns
            sample_record = records[0][batch_column][0]
            for field in sample_record.keys():
                fielddef = sliderule.get_definition(sample_record['__rectype'], field)
                if len(fielddef) > 0:
                    if type(sample_record[field]) == tuple:
                        columns[field] = numpy.empty(num_records, dtype=object)
                    else:
                        columns[field] = numpy.empty(num_records, fielddef["nptype"])
            # Populate Columns
            cnt = 0
            for record in records:
                for batch in record[batch_column]:
                    for field in columns:
                        columns[field][cnt] = batch[field]
                    cnt += 1
    else:
        logger.debug("No response returned")

    # Build Initial GeoDataFrame
    gdf = sliderule.todataframe(columns, height_key=height_key)

    # Merge Ancillary Fields
    tstart_merge = time.perf_counter()
    for field_set in field_dictionary:
        df = geopandas.pd.DataFrame(field_dictionary[field_set])
        gdf = geopandas.pd.merge(gdf, df, how='left', on='shot_number').set_axis(gdf.index)
    profiles["merge"] = time.perf_counter() - tstart_merge

    # Delete Shot Number Column
    if len(gdf) > 0 and not keep_id:
        del gdf["shot_number"]

    # Attach Metadata
    if len(file_dictionary) > 0:
        gdf.attrs['file_directory'] = file_dictionary

    # Return GeoDataFrame
    profiles["flatten"] = time.perf_counter() - tstart_flatten
    return gdf

#
#  Perform Processing Request
#
def __processing_request(parm, asset, callbacks, resources, keep_id, as_numpy_array, api, rec, height_key, profile):
    try:
        tstart = time.perf_counter()

        # Default the Asset
        rqst_parm = parm.copy()
        if "asset" not in rqst_parm:
            rqst_parm["asset"] = asset

        # Get List of Resources from CMR (if not supplied)
        resources = earthdata.search(rqst_parm, resources)

        # Build GEDI Request
        rqst = {
            "resources": resources,
            "parms": rqst_parm
        }

        # Make API Processing Request
        rsps = sliderule.source(api, rqst, stream=True, callbacks=callbacks)

        # Flatten Responses
        gdf = __flattenbatches(rsps, rec, 'footprint', parm, keep_id, as_numpy_array, height_key)

        # Return Response
        profiles[profile] = time.perf_counter() - tstart
        return gdf

    # Handle Runtime Errors
    except RuntimeError as e:
        logger.critical(e)
        return sliderule.emptyframe()

###############################################################################
# APIs
###############################################################################

#
#  Initialize
#
[docs] def init (url=sliderule.service_url, verbose=False, loglevel=logging.CRITICAL, organization=sliderule.service_org, desired_nodes=None, time_to_live=60, bypass_dns=False): ''' Initializes the Python client for use with SlideRule and should be called before other GEDI API calls. This function is a wrapper for the `sliderule.init(...) function </web/rtds/api_reference/sliderule.html#init>`_. Examples -------- >>> from sliderule import gedi >>> gedi.init() ''' sliderule.init(url, verbose, loglevel, organization, desired_nodes, time_to_live, bypass_dns, plugins=['gedi'])
# # GEDI L4A #
[docs] def gedi04a (parm, resource): ''' Performs GEDI L4A subsetting of elevation footprints Parameters ---------- parms: dict parameters used to configure subsetting process resource: str GEDI HDF5 filename asset: str data source asset Returns ------- GeoDataFrame gridded footrpints ''' return gedi04ap(parm, resources=[resource])
# # Parallel GEDI04A #
[docs] def gedi04ap(parm, callbacks={}, resources=None, keep_id=False, as_numpy_array=False, height_key=None): ''' Performs subsetting in parallel on GEDI data and returns elevation footprints. This function expects that the **parm** argument includes a polygon which is used to fetch all available resources from the CMR system automatically. If **resources** is specified then any polygon or resource filtering options supplied in **parm** are ignored. Parameters ---------- parms: dict parameters used to configure subsetting process asset: str data source asset callbacks: dictionary a callback function that is called for each result record resources: list a list of granules to process (e.g. ["GEDI04_A_2019229131935_O03846_02_T03642_02_002_02_V002.h5", ...]) keep_id: bool whether to retain the "extent_id" column in the GeoDataFrame for future merges as_numpy_array: bool whether to provide all sampled values as numpy arrays even if there is only a single value height_key: str identifies the name of the column provided for the 3D CRS transformation Returns ------- GeoDataFrame geolocated footprints Examples -------- >>> from sliderule import gedi >>> gedi.init() >>> region = [ {"lon":-105.82971551223244, "lat": 39.81983728534918}, ... {"lon":-105.30742121965137, "lat": 39.81983728534918}, ... {"lon":-105.30742121965137, "lat": 40.164048017973755}, ... {"lon":-105.82971551223244, "lat": 40.164048017973755}, ... {"lon":-105.82971551223244, "lat": 39.81983728534918} ] >>> parms = { "poly": region } >>> resources = ["GEDI04_A_2019229131935_O03846_02_T03642_02_002_02_V002.h5"] >>> asset = "ornldaac-s3" >>> rsps = gedi.gedi04ap(parms, asset=asset, resources=resources) ''' return __processing_request(parm, "gedil4a", callbacks, resources, keep_id, as_numpy_array, 'gedi04ap', 'gedi04arec', height_key, gedi04ap.__name__)
# # GEDI L2A #
[docs] def gedi02a (parm, resource): ''' Performs GEDI L2A subsetting of elevation footprints Parameters ---------- parms: dict parameters used to configure subsetting process resource: str GEDI HDF5 filename asset: str data source asset Returns ------- GeoDataFrame gridded footrpints ''' return gedi02ap(parm, resources=[resource])
# # Parallel GEDI02A #
[docs] def gedi02ap(parm, callbacks={}, resources=None, keep_id=False, as_numpy_array=False, height_key=None): ''' Performs subsetting in parallel on GEDI data and returns geolocated footprints. This function expects that the **parm** argument includes a polygon which is used to fetch all available resources from the CMR system automatically. If **resources** is specified then any polygon or resource filtering options supplied in **parm** are ignored. Parameters ---------- parms: dict parameters used to configure subsetting process asset: str data source asset callbacks: dictionary a callback function that is called for each result record resources: list a list of granules to process (e.g. ["GEDI04_A_2019229131935_O03846_02_T03642_02_002_02_V002.h5", ...]) keep_id: bool whether to retain the "extent_id" column in the GeoDataFrame for future merges as_numpy_array: bool whether to provide all sampled values as numpy arrays even if there is only a single value height_key: str identifies the name of the column provided for the 3D CRS transformation Returns ------- GeoDataFrame geolocated footprints Examples -------- >>> from sliderule import gedi >>> gedi.init() >>> region = [ {"lon":-105.82971551223244, "lat": 39.81983728534918}, ... {"lon":-105.30742121965137, "lat": 39.81983728534918}, ... {"lon":-105.30742121965137, "lat": 40.164048017973755}, ... {"lon":-105.82971551223244, "lat": 40.164048017973755}, ... {"lon":-105.82971551223244, "lat": 39.81983728534918} ] >>> parms = { "poly": region } >>> resources = ["GEDI02_A_2019229131935_O03846_02_T03642_02_002_02_V002.h5"] >>> asset = "gedi-local" >>> rsps = gedi.gedi02ap(parms, asset=asset, resources=resources) ''' return __processing_request(parm, "gedil2a", callbacks, resources, keep_id, as_numpy_array, 'gedi02ap', 'gedi02arec', height_key, gedi02ap.__name__)
# # GEDI L1B #
[docs] def gedi01b (parm, resource): ''' Performs GEDI L1B subsetting of elevation waveforms Parameters ---------- parms: dict parameters used to configure subsetting process resource: str GEDI HDF5 filename asset: str data source asset Returns ------- GeoDataFrame gridded footrpints ''' return gedi01bp(parm, resources=[resource])
# # Parallel GEDI01B #
[docs] def gedi01bp(parm, callbacks={}, resources=None, keep_id=False, as_numpy_array=False, height_key=None): ''' Performs subsetting in parallel on GEDI data and returns geolocated footprints. This function expects that the **parm** argument includes a polygon which is used to fetch all available resources from the CMR system automatically. If **resources** is specified then any polygon or resource filtering options supplied in **parm** are ignored. Parameters ---------- parms: dict parameters used to configure subsetting process asset: str data source asset callbacks: dictionary a callback function that is called for each result record resources: list a list of granules to process (e.g. ["GEDI04_A_2019229131935_O03846_02_T03642_02_002_02_V002.h5", ...]) keep_id: bool whether to retain the "extent_id" column in the GeoDataFrame for future merges as_numpy_array: bool whether to provide all sampled values as numpy arrays even if there is only a single value height_key: str identifies the name of the column provided for the 3D CRS transformation Returns ------- GeoDataFrame geolocated footprints Examples -------- >>> from sliderule import gedi >>> gedi.init() >>> region = [ {"lon":-105.82971551223244, "lat": 39.81983728534918}, ... {"lon":-105.30742121965137, "lat": 39.81983728534918}, ... {"lon":-105.30742121965137, "lat": 40.164048017973755}, ... {"lon":-105.82971551223244, "lat": 40.164048017973755}, ... {"lon":-105.82971551223244, "lat": 39.81983728534918} ] >>> parms = { "poly": region } >>> resources = ["GEDI01_B_2019229131935_O03846_02_T03642_02_002_02_V002.h5"] >>> asset = "gedi-local" >>> rsps = gedi.gedi01bp(parms, asset=asset, resources=resources) ''' return __processing_request(parm, "gedil1b", callbacks, resources, keep_id, as_numpy_array, 'gedi01bp', 'gedi01brec', height_key, gedi01bp.__name__)