SlideRule CMR Debug¶

A simple tool for checking CMR queries

  • Creates a leaflet map for creating regions of interest
  • Queries CMR for granules and granule polygons
  • Plots granule polygons on map
  • Retrieves and plots granule tracks on map

Jupyter and SlideRule¶

Jupyter widgets are used to set parameters for the SlideRule API.

Regions of interest for submitting to SlideRule are drawn on a ipyleaflet map.

Load necessary packages¶

In [1]:
import re
import time
import logging
import posixpath
import numpy as np
import geopandas as gpd
import concurrent.futures
import matplotlib.cm as cm
import matplotlib.colors as colors
import ipywidgets as widgets
from shapely.geometry import LineString
from sliderule import sliderule, icesat2, ipysliderule, earthdata, h5
import sliderule.io
# autoreload
%load_ext autoreload
%autoreload 2
/home/jswinski/miniconda3/envs/sliderule_env/lib/python3.12/site-packages/h5py/__init__.py:36: UserWarning: h5py is running against HDF5 1.14.3 when it was built against 1.14.2, this may cause problems
  _warn(("h5py is running against HDF5 {0} when it was built against {1}, "

Set ICESat-2 Product¶

  • ATL03: Global Geolocated Photon Data
  • ATL06: Land Ice Height
  • ATL08: Land and Vegetation Height

Interactive Mapping with Leaflet¶

Interactive maps within the SlideRule python API are built upon ipyleaflet.

There are 3 projections available within SlideRule for mapping (Global, North and South).

In [2]:
# Configure ICESat-2 API
icesat2.init("slideruleearth.io", organization="bathy", loglevel=logging.WARNING)
sliderule.get_version()

# display widgets for setting ICESat-2 parameters
# and the interactive map projection
SRwidgets = ipysliderule.widgets()
widgets.VBox([
    SRwidgets.product,
    SRwidgets.release,
    SRwidgets.start_date,
    SRwidgets.end_date,
    SRwidgets.projection
])
Out[2]:
VBox(children=(Dropdown(description='Product:', options=('ATL03', 'ATL06', 'ATL08'), tooltip='Product: ICESat-…

Select regions of interest for querying CMR¶

In [3]:
# create ipyleaflet map in specified projection
m = ipysliderule.leaflet(SRwidgets.projection.value)
m.map
Out[3]:
Map(center=[39, -108], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_t…

Build and transmit CMR requests¶

In [4]:
%%time
# for each region of interest
granule_list = []
granule_polygons = []
for poly in m.regions:
    # polygon from map
    resources,metadata = earthdata.cmr(polygon=poly,
        short_name=SRwidgets.product.value,
        time_start=SRwidgets.time_start,
        time_end=SRwidgets.time_end,
        version=SRwidgets.release.value,
        return_metadata=True)
    # for each granule resource
    for i,resource in enumerate(resources):
        granule_list.append(resource)
        granule_polygons.append(metadata[i].geometry)
# print list of granules
num_granules = len(granule_list)
logging.info('Number of Granules: {0:d}'.format(num_granules))
logging.debug(granule_list)
CPU times: user 138 ms, sys: 0 ns, total: 138 ms
Wall time: 594 ms

Select Granules to Plot on Map¶

In [5]:
granule_select = widgets.SelectMultiple(
    options=granule_list,
    description='Granules:',
    layout=widgets.Layout(width='35%', height='200px'),
    disabled=False
)
display(granule_select)
SelectMultiple(description='Granules:', layout=Layout(height='200px', width='35%'), options=('ATL03_2018102010…

Add granule polygons to map¶

In [6]:
granule_indices = list(granule_select.index)
cmap = iter(cm.viridis(np.linspace(0,1,len(granule_indices))))
for g in granule_indices:
    color = colors.to_hex(next(cmap))
    geojson = ipysliderule.ipyleaflet.GeoJSON(
        data=granule_polygons[g].__geo_interface__,
        style=dict(
            color=color,
            fill_color=color,
            opacity=0.8,
            weight=1,
        )
    )
    m.map.add_layer(geojson)
        

Get Granules from NSIDC S3¶

In [7]:
def s3_retrieve(granule, **kwargs):
    # set default keyword arguments
    kwargs.setdefault('asset','icesat2')
    kwargs.setdefault('index_key','time')
    kwargs.setdefault('polygon',None)
    # regular expression operator for extracting information from files
    rx = re.compile(r'(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})(\d{2})'
        r'(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})(.*?).h5$')
    # extract parameters from ICESat-2 granule
    PRD,HEM,YY,MM,DD,HH,MN,SS,TRK,CYCL,GRN,RL,VRS,AUX=rx.findall(granule).pop()
    # variables of interest
    if (PRD == 'ATL03'):
        segment_group = "geolocation"
        segment_key = 'segment_id'
        lon_key = 'reference_photon_lon'
        lat_key = 'reference_photon_lat'
        vnames = ['segment_id','delta_time','reference_photon_lat',
            'reference_photon_lon']
    elif (PRD == 'ATL06'):
        segment_group = "land_ice_segments"
        segment_key = 'segment_id'
        lon_key = 'longitude'
        lat_key = 'latitude'
        vnames = ['segment_id','delta_time','latitude','longitude']
    elif (PRD == 'ATL08'):
        segment_group = "land_segments"
        segment_key = 'segment_id_beg'
        lon_key = 'longitude'
        lat_key = 'latitude'
        vnames = ['segment_id_beg','segment_id_end','delta_time',
            'latitude','longitude']
    # for each valid beam within the HDF5 file
    frames = []
    gt = dict(gt1l=10,gt1r=20,gt2l=30,gt2r=40,gt3l=50,gt3r=60)
    atlas_sdp_epoch = np.datetime64('2018-01-01T00:00:00')
    kwds = dict(startrow=0,numrows=-1)
    for gtx in ['gt1l','gt1r','gt2l','gt2r','gt3l','gt3r']:
        geodatasets = [dict(dataset=f'{gtx}/{segment_group}/{v}',**kwds) for v in vnames]
        try:
            # get datasets from s3
            hidatasets = h5.h5p(geodatasets, granule, kwargs['asset'])
            # copy to new "flattened" dictionary
            data = {posixpath.basename(key):var for key,var in hidatasets.items()}
            # Generate Time Column
            delta_time = (data['delta_time']*1e9).astype('timedelta64[ns]')
            data['time'] = gpd.pd.to_datetime(atlas_sdp_epoch + delta_time)
        except:
            pass
        else:
            # copy filename parameters
            data['rgt'] = [int(TRK)]*len(data['delta_time'])
            data['cycle'] = [int(CYCL)]*len(data['delta_time'])
            data['gt'] = [gt[gtx]]*len(data['delta_time'])
            # pandas dataframe from compiled dictionary
            frames.append(gpd.pd.DataFrame.from_dict(data))
    # concatenate pandas dataframe
    try:
        df = gpd.pd.concat(frames)
    except:
        return sliderule.emptyframe()
    # convert to a GeoDataFrame
    geometry = gpd.points_from_xy(df[lon_key], df[lat_key])
    gdf = gpd.GeoDataFrame(df.drop(columns=[lon_key,lat_key]),
        geometry=geometry,crs='EPSG:4326')
    # create global track variable
    track = 6*(gdf['rgt']-1) + (gdf['gt']/10)
    gdf = gdf.assign(track=track)
    # calculate global reference point
    global_ref_pt = 6*1387*gdf[segment_key] + track
    gdf = gdf.assign(global_ref_pt=global_ref_pt)
    # sort values for reproducible output despite async processing
    gdf.set_index(kwargs['index_key'], inplace=True)
    gdf.sort_index(inplace=True)
    # remove duplicate points
    gdf = gdf[~gdf.index.duplicated()]
    # intersect with geometry in projected reference system
    if kwargs['polygon'] is not None:
        gdf = gpd.overlay(gdf.to_crs(kwargs['polygon'].crs),
            kwargs['polygon'], how='intersection')
    # convert back to original coordinate reference system
    return gdf.to_crs('EPSG:4326')
In [8]:
%%time
results = []
# granule resources for selected segments
perf_start = time.perf_counter()
gdf = sliderule.emptyframe()
num_servers, _ = sliderule.update_available_servers()
with concurrent.futures.ThreadPoolExecutor(max_workers=num_servers) as executor:
    futures = [executor.submit(s3_retrieve, granule_list[g]) for g in granule_indices]
    # Wait for Results
    for future in concurrent.futures.as_completed(futures):
        # append to dataframe
        results.append(future.result())
gdf = gpd.pd.concat(results)
# Display Statistics
print("Reference Ground Tracks: {}".format(gdf["rgt"].unique()))
print("Cycles: {}".format(gdf["cycle"].unique()))
print("Received {} segments".format(gdf.shape[0]))
Reference Ground Tracks: [ 356 1301]
Cycles: [7 6]
Received 2089613 segments
CPU times: user 5.59 s, sys: 578 ms, total: 6.17 s
Wall time: 10.3 s

Add Granule Track Polylines to Map¶

In [9]:
# fix int columns that were converted in objects
fixed = gdf.drop(columns=['geometry'])
for column in fixed.select_dtypes(include='object').columns:
    fixed[column] = fixed[column].astype("int32")
fixed = gpd.GeoDataFrame(fixed,geometry=gdf.geometry,crs='EPSG:4326')
# convert from points to linestrings grouping by track
grouped = fixed.groupby(['track'])['geometry'].apply(
    lambda x: LineString(x.tolist()) if x.size > 1 else x.tolist())
geodata = ipysliderule.ipyleaflet.GeoData(geo_dataframe=gpd.GeoDataFrame(grouped),
    style={'color': 'black', 'opacity':1, 'weight':0.1})
m.map.add_layer(geodata)
In [ ]: