SlideRule CMR Debug¶
A simple tool for checking CMR queries
- Creates a leaflet map for creating regions of interest
- Queries CMR for granules and granule polygons
- Plots granule polygons on map
- Retrieves and plots granule tracks on map
Jupyter and SlideRule¶
Jupyter widgets are used to set parameters for the SlideRule API.
Regions of interest for submitting to SlideRule are drawn on a ipyleaflet map.
Load necessary packages¶
In [1]:
import re
import time
import logging
import posixpath
import numpy as np
import geopandas as gpd
import concurrent.futures
import matplotlib.cm as cm
import matplotlib.colors as colors
import ipywidgets as widgets
from shapely.geometry import LineString
from sliderule import sliderule, icesat2, ipysliderule, earthdata, h5
import sliderule.io
# autoreload
%load_ext autoreload
%autoreload 2
/home/jswinski/miniconda3/envs/sliderule_env/lib/python3.12/site-packages/h5py/__init__.py:36: UserWarning: h5py is running against HDF5 1.14.3 when it was built against 1.14.2, this may cause problems _warn(("h5py is running against HDF5 {0} when it was built against {1}, "
In [2]:
# Configure ICESat-2 API
icesat2.init("slideruleearth.io", organization="bathy", loglevel=logging.WARNING)
sliderule.get_version()
# display widgets for setting ICESat-2 parameters
# and the interactive map projection
SRwidgets = ipysliderule.widgets()
widgets.VBox([
SRwidgets.product,
SRwidgets.release,
SRwidgets.start_date,
SRwidgets.end_date,
SRwidgets.projection
])
Out[2]:
VBox(children=(Dropdown(description='Product:', options=('ATL03', 'ATL06', 'ATL08'), tooltip='Product: ICESat-…
Select regions of interest for querying CMR¶
In [3]:
# create ipyleaflet map in specified projection
m = ipysliderule.leaflet(SRwidgets.projection.value)
m.map
Out[3]:
Map(center=[39, -108], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_t…
Build and transmit CMR requests¶
In [4]:
%%time
# for each region of interest
granule_list = []
granule_polygons = []
for poly in m.regions:
# polygon from map
resources,metadata = earthdata.cmr(polygon=poly,
short_name=SRwidgets.product.value,
time_start=SRwidgets.time_start,
time_end=SRwidgets.time_end,
version=SRwidgets.release.value,
return_metadata=True)
# for each granule resource
for i,resource in enumerate(resources):
granule_list.append(resource)
granule_polygons.append(metadata[i].geometry)
# print list of granules
num_granules = len(granule_list)
logging.info('Number of Granules: {0:d}'.format(num_granules))
logging.debug(granule_list)
CPU times: user 138 ms, sys: 0 ns, total: 138 ms Wall time: 594 ms
Select Granules to Plot on Map¶
In [5]:
granule_select = widgets.SelectMultiple(
options=granule_list,
description='Granules:',
layout=widgets.Layout(width='35%', height='200px'),
disabled=False
)
display(granule_select)
SelectMultiple(description='Granules:', layout=Layout(height='200px', width='35%'), options=('ATL03_2018102010…
Add granule polygons to map¶
In [6]:
granule_indices = list(granule_select.index)
cmap = iter(cm.viridis(np.linspace(0,1,len(granule_indices))))
for g in granule_indices:
color = colors.to_hex(next(cmap))
geojson = ipysliderule.ipyleaflet.GeoJSON(
data=granule_polygons[g].__geo_interface__,
style=dict(
color=color,
fill_color=color,
opacity=0.8,
weight=1,
)
)
m.map.add_layer(geojson)
Get Granules from NSIDC S3¶
In [7]:
def s3_retrieve(granule, **kwargs):
# set default keyword arguments
kwargs.setdefault('asset','icesat2')
kwargs.setdefault('index_key','time')
kwargs.setdefault('polygon',None)
# regular expression operator for extracting information from files
rx = re.compile(r'(ATL\d{2})(-\d{2})?_(\d{4})(\d{2})(\d{2})(\d{2})'
r'(\d{2})(\d{2})_(\d{4})(\d{2})(\d{2})_(\d{3})_(\d{2})(.*?).h5$')
# extract parameters from ICESat-2 granule
PRD,HEM,YY,MM,DD,HH,MN,SS,TRK,CYCL,GRN,RL,VRS,AUX=rx.findall(granule).pop()
# variables of interest
if (PRD == 'ATL03'):
segment_group = "geolocation"
segment_key = 'segment_id'
lon_key = 'reference_photon_lon'
lat_key = 'reference_photon_lat'
vnames = ['segment_id','delta_time','reference_photon_lat',
'reference_photon_lon']
elif (PRD == 'ATL06'):
segment_group = "land_ice_segments"
segment_key = 'segment_id'
lon_key = 'longitude'
lat_key = 'latitude'
vnames = ['segment_id','delta_time','latitude','longitude']
elif (PRD == 'ATL08'):
segment_group = "land_segments"
segment_key = 'segment_id_beg'
lon_key = 'longitude'
lat_key = 'latitude'
vnames = ['segment_id_beg','segment_id_end','delta_time',
'latitude','longitude']
# for each valid beam within the HDF5 file
frames = []
gt = dict(gt1l=10,gt1r=20,gt2l=30,gt2r=40,gt3l=50,gt3r=60)
atlas_sdp_epoch = np.datetime64('2018-01-01T00:00:00')
kwds = dict(startrow=0,numrows=-1)
for gtx in ['gt1l','gt1r','gt2l','gt2r','gt3l','gt3r']:
geodatasets = [dict(dataset=f'{gtx}/{segment_group}/{v}',**kwds) for v in vnames]
try:
# get datasets from s3
hidatasets = h5.h5p(geodatasets, granule, kwargs['asset'])
# copy to new "flattened" dictionary
data = {posixpath.basename(key):var for key,var in hidatasets.items()}
# Generate Time Column
delta_time = (data['delta_time']*1e9).astype('timedelta64[ns]')
data['time'] = gpd.pd.to_datetime(atlas_sdp_epoch + delta_time)
except:
pass
else:
# copy filename parameters
data['rgt'] = [int(TRK)]*len(data['delta_time'])
data['cycle'] = [int(CYCL)]*len(data['delta_time'])
data['gt'] = [gt[gtx]]*len(data['delta_time'])
# pandas dataframe from compiled dictionary
frames.append(gpd.pd.DataFrame.from_dict(data))
# concatenate pandas dataframe
try:
df = gpd.pd.concat(frames)
except:
return sliderule.emptyframe()
# convert to a GeoDataFrame
geometry = gpd.points_from_xy(df[lon_key], df[lat_key])
gdf = gpd.GeoDataFrame(df.drop(columns=[lon_key,lat_key]),
geometry=geometry,crs='EPSG:4326')
# create global track variable
track = 6*(gdf['rgt']-1) + (gdf['gt']/10)
gdf = gdf.assign(track=track)
# calculate global reference point
global_ref_pt = 6*1387*gdf[segment_key] + track
gdf = gdf.assign(global_ref_pt=global_ref_pt)
# sort values for reproducible output despite async processing
gdf.set_index(kwargs['index_key'], inplace=True)
gdf.sort_index(inplace=True)
# remove duplicate points
gdf = gdf[~gdf.index.duplicated()]
# intersect with geometry in projected reference system
if kwargs['polygon'] is not None:
gdf = gpd.overlay(gdf.to_crs(kwargs['polygon'].crs),
kwargs['polygon'], how='intersection')
# convert back to original coordinate reference system
return gdf.to_crs('EPSG:4326')
In [8]:
%%time
results = []
# granule resources for selected segments
perf_start = time.perf_counter()
gdf = sliderule.emptyframe()
num_servers, _ = sliderule.update_available_servers()
with concurrent.futures.ThreadPoolExecutor(max_workers=num_servers) as executor:
futures = [executor.submit(s3_retrieve, granule_list[g]) for g in granule_indices]
# Wait for Results
for future in concurrent.futures.as_completed(futures):
# append to dataframe
results.append(future.result())
gdf = gpd.pd.concat(results)
# Display Statistics
print("Reference Ground Tracks: {}".format(gdf["rgt"].unique()))
print("Cycles: {}".format(gdf["cycle"].unique()))
print("Received {} segments".format(gdf.shape[0]))
Reference Ground Tracks: [ 356 1301] Cycles: [7 6] Received 2089613 segments CPU times: user 5.59 s, sys: 578 ms, total: 6.17 s Wall time: 10.3 s
Add Granule Track Polylines to Map¶
In [9]:
# fix int columns that were converted in objects
fixed = gdf.drop(columns=['geometry'])
for column in fixed.select_dtypes(include='object').columns:
fixed[column] = fixed[column].astype("int32")
fixed = gpd.GeoDataFrame(fixed,geometry=gdf.geometry,crs='EPSG:4326')
# convert from points to linestrings grouping by track
grouped = fixed.groupby(['track'])['geometry'].apply(
lambda x: LineString(x.tolist()) if x.size > 1 else x.tolist())
geodata = ipysliderule.ipyleaflet.GeoData(geo_dataframe=gpd.GeoDataFrame(grouped),
style={'color': 'black', 'opacity':1, 'weight':0.1})
m.map.add_layer(geodata)
In [ ]: