Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: generalized regular expressions for non-entered cases #84

Merged
merged 1 commit into from
Sep 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion icesat2_toolkit/time.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
import re
import copy
import logging
import pathlib
import warnings
import datetime
import traceback
Expand Down
48 changes: 33 additions & 15 deletions scripts/convert_ICESat2_format.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
u"""
convert_ICESat2_format.py
Written by Tyler Sutterley (12/2022)
Written by Tyler Sutterley (09/2023)

Converts ICESat-2 HDF5 datafiles to zarr or rechunked HDF5 datafiles

Expand Down Expand Up @@ -56,6 +56,7 @@
https://pandas.pydata.org/

UPDATE HISTORY:
Updated 09/2023: generalized regular expressions for non-entered cases
Updated 12/2022: single implicit import of altimetry tools
Updated 06/2022: use explicit import of convert functions
Updated 05/2022: use argparse descriptions within sphinx documentation
Expand All @@ -82,7 +83,7 @@
# PURPOSE: convert the ICESat-2 elevation data from HDF5 to zarr
# or rechunked HDF5 formats
def convert_ICESat2_format(DIRECTORY, PRODUCTS, RELEASE, VERSIONS, GRANULES,
TRACKS, YEARS=None, SUBDIRECTORY=None, FORMAT=None, CHUNKS=None,
TRACKS, YEARS=None, SUBDIRECTORY=None, CYCLES=None, FORMAT=None, CHUNKS=None,
PROCESSES=0, CLOBBER=False, VERBOSE=False, MODE=0o775):

# create logger
Expand All @@ -91,20 +92,33 @@ def convert_ICESat2_format(DIRECTORY, PRODUCTS, RELEASE, VERSIONS, GRANULES,

# regular expression operator for finding files of a particular granule
# find ICESat-2 HDF5 files in the subdirectory for product and release
regex_track = '|'.join([rf'{T:04d}' for T in TRACKS])
regex_granule = '|'.join([rf'{G:02d}' for G in GRANULES])
regex_version = '|'.join([rf'{V:02d}' for V in VERSIONS])
file_regex_pattern = (r'{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})(\d{{2}})'
r'(\d{{2}})(\d{{2}})_({1})(\d{{2}})({2})_({3})_({4})(.*?).(h5)$')
if TRACKS:
regex_track = r'|'.join([rf'{T:04d}' for T in TRACKS])
else:
regex_track = r'\d{4}'
if CYCLES:
regex_cycle = r'|'.join([rf'{C:02d}' for C in CYCLES])
else:
regex_cycle = r'\d{2}'
if GRANULES:
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
else:
regex_granule = r'\d{2}'
if VERSIONS:
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
else:
regex_version = r'\d{2}'
regex_pattern = (r'(processed_)?({0})(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})'
r'(\d{{2}})(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_({5})(.*?).h5$')

# regular expression operator for finding subdirectories
if SUBDIRECTORY:
# convert particular subdirectories for product
R2 = re.compile(r'('+r'|'.join(SUBDIRECTORY)+r')', re.VERBOSE)
elif YEARS:
# convert particular years for product
regex_pattern = '|'.join(rf'{y:d}' for y in YEARS)
R2 = re.compile(rf'({regex_pattern}).(\d+).(\d+)', re.VERBOSE)
regex_years = '|'.join(rf'{y:d}' for y in YEARS)
R2 = re.compile(rf'({regex_years}).(\d+).(\d+)', re.VERBOSE)
else:
# convert all available subdirectories for product
R2 = re.compile(r'(\d+).(\d+).(\d+)', re.VERBOSE)
Expand All @@ -118,8 +132,8 @@ def convert_ICESat2_format(DIRECTORY, PRODUCTS, RELEASE, VERSIONS, GRANULES,
ddir = os.path.join(DIRECTORY,f'{p}.{RELEASE}')
subdirectories = [sd for sd in os.listdir(ddir) if R2.match(sd)]
# compile regular expression operator for product, release and version
args = (p,regex_track,regex_granule,RELEASE,regex_version)
R1 = re.compile(file_regex_pattern.format(*args), re.VERBOSE)
args = (p,regex_track,regex_cycle,regex_granule,RELEASE,regex_version)
R1 = re.compile(regex_pattern.format(*args), re.VERBOSE)
# for each subdirectory
for sd in subdirectories:
# find matching files (for granule, release, version, track)
Expand Down Expand Up @@ -257,13 +271,17 @@ def arguments():
help='ICESat-2 Data Release')
# ICESat-2 data version
parser.add_argument('--version','-v',
type=int, nargs='+', default=range(1,10),
type=int, nargs='+',
help='ICESat-2 Data Version')
# ICESat-2 granule region
parser.add_argument('--granule','-g',
metavar='REGION', type=int, nargs='+',
choices=range(1,15), default=range(1,15),
help='ICESat-2 Granule Region')
# ICESat-2 orbital cycle
parser.add_argument('--cycle','-c',
type=int, nargs='+', default=None,
help='ICESat-2 orbital cycles to convert')
# ICESat-2 reference ground tracks
parser.add_argument('--track','-t',
metavar='RGT', type=int, nargs='+',
Expand Down Expand Up @@ -305,9 +323,9 @@ def main():
# convert HDF5 files for each data product
convert_ICESat2_format(args.directory, args.products, args.release,
args.version, args.granule, args.track, YEARS=args.year,
SUBDIRECTORY=args.subdirectory, FORMAT=args.format,
CHUNKS=args.chunks, PROCESSES=args.np, CLOBBER=args.clobber,
VERBOSE=args.verbose, MODE=args.mode)
SUBDIRECTORY=args.subdirectory, CYCLES=args.cycle,
FORMAT=args.format, CHUNKS=args.chunks, PROCESSES=args.np,
CLOBBER=args.clobber, VERBOSE=args.verbose, MODE=args.mode)

# run main program
if __name__ == '__main__':
Expand Down
29 changes: 19 additions & 10 deletions scripts/copy_scf_ICESat2_files.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
u"""
copy_scf_ICESat2_files.py
Written by Tyler Sutterley (12/2022)
Written by Tyler Sutterley (09/2023)
Copies ICESat-2 HDF5 files from the SCF server

CALLING SEQUENCE:
Expand Down Expand Up @@ -33,6 +33,7 @@
https://github.com/paramiko/paramiko

UPDATE HISTORY:
Updated 09/2023: generalized regular expressions for non-entered cases
Updated 12/2022: single implicit import of altimetry tools
Updated 05/2022: use argparse descriptions within sphinx documentation
Updated 10/2021: using python logging for handling verbose output
Expand Down Expand Up @@ -105,7 +106,7 @@ def arguments():
help='ICESat-2 data release to copy')
# ICESat-2 data version
parser.add_argument('--version','-v',
type=int, nargs='+', default=range(1,10),
type=int, nargs='+',
help='ICESat-2 data versions to copy')
# ICESat-2 granule region
parser.add_argument('--granule','-g',
Expand All @@ -115,7 +116,6 @@ def arguments():
# ICESat-2 orbital cycle
parser.add_argument('--cycle','-c',
type=int, nargs='+',
default=range(1,10),
help='ICESat-2 orbital cycles to copy')
# ICESat-2 reference ground tracks
parser.add_argument('--track','-t',
Expand Down Expand Up @@ -207,13 +207,22 @@ def copy_scf_files(client, client_ftp, base_dir, scf_incoming, scf_outgoing,
PRODUCT, RELEASE, VERSIONS, GRANULES, CYCLES, TRACKS, CLOBBER=False,
LIST=False, MODE=0o775):
# find ICESat-2 HDF5 files in the subdirectory for product and release
TRACKS = np.arange(1,1388) if not np.any(TRACKS) else TRACKS
CYCLES = np.arange(1,3) if not np.any(CYCLES) else CYCLES
GRANULES = np.arange(1,15) if not np.any(GRANULES) else GRANULES
regex_track = r'|'.join([rf'{T:04d}' for T in TRACKS])
regex_cycle = r'|'.join([rf'{C:02d}' for C in CYCLES])
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
if TRACKS:
regex_track = r'|'.join([rf'{T:04d}' for T in TRACKS])
else:
regex_track = r'\d{4}'
if CYCLES:
regex_cycle = r'|'.join([rf'{C:02d}' for C in CYCLES])
else:
regex_cycle = r'\d{2}'
if GRANULES:
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
else:
regex_granule = r'\d{2}'
if VERSIONS:
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
else:
regex_version = r'\d{2}'
# compile regular expression operator for extracting data from files
args = (PRODUCT,regex_track,regex_cycle,regex_granule,RELEASE,regex_version)
regex_pattern = (r'(processed_)?({0})(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})'
Expand Down
15 changes: 11 additions & 4 deletions scripts/nsidc_icesat2_convert.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
u"""
nsidc_icesat2_convert.py
Written by Tyler Sutterley (12/2022)
Written by Tyler Sutterley (09/2023)

Acquires ICESat-2 datafiles from NSIDC and directly converts to
zarr datafiles or rechunked HDF5 files
Expand Down Expand Up @@ -85,6 +85,7 @@
utilities.py: download and management utilities for syncing files

UPDATE HISTORY:
Updated 09/2023: generalized regular expressions for non-entered cases
Updated 12/2022: single implicit import of altimetry tools
Updated 05/2022: use argparse descriptions within sphinx documentation
Updated 03/2022: use attempt login function to check credentials
Expand Down Expand Up @@ -159,8 +160,14 @@ def nsidc_icesat2_convert(DIRECTORY, PRODUCTS, RELEASE, VERSIONS, GRANULES, TRAC
regex_cycle = r'|'.join([rf'{C:02d}' for C in CYCLES])
else:
regex_cycle = r'\d{2}'
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
if GRANULES:
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
else:
regex_granule = r'\d{2}'
if VERSIONS:
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
else:
regex_version = r'\d{2}'
regex_suffix = r'(.*?)' if AUXILIARY else r'(h5)'
remote_regex_pattern=(r'{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})(\d{{2}})'
r'(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_({5})(.*?).{6}$')
Expand Down Expand Up @@ -441,7 +448,7 @@ def arguments():
help='ICESat-2 Data Release')
# ICESat-2 data version
parser.add_argument('--version','-v',
type=int, nargs='+', default=range(1,10),
type=int, nargs='+',
help='ICESat-2 Data Version')
# ICESat-2 granule region
parser.add_argument('--granule','-g',
Expand Down
15 changes: 11 additions & 4 deletions scripts/nsidc_icesat2_dragann.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
u"""
nsidc_icesat2_dragann.py
Written by Tyler Sutterley (12/2022)
Written by Tyler Sutterley (09/2023)

Acquires the ATL03 geolocated photon height product and appends the
ATL08 DRAGANN classifications from NSIDC
Expand Down Expand Up @@ -57,6 +57,7 @@
utilities.py: download and management utilities for syncing files

UPDATE HISTORY:
Updated 09/2023: generalized regular expressions for non-entered cases
Updated 12/2022: single implicit import of altimetry tools
Updated 05/2022: use argparse descriptions within sphinx documentation
Updated 03/2022: use attempt login function to check credentials
Expand Down Expand Up @@ -127,8 +128,14 @@ def nsidc_icesat2_dragann(DIRECTORY, RELEASE, VERSIONS, GRANULES, TRACKS,
regex_cycle = r'|'.join([rf'{C:02d}' for C in CYCLES])
else:
regex_cycle = r'\d{2}'
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
if GRANULES:
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
else:
regex_granule = r'\d{2}'
if VERSIONS:
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
else:
regex_version = r'\d{2}'
regex_suffix = r'(h5)'
remote_regex_pattern=(r'({0})_(\d{{4}})(\d{{2}})(\d{{2}})(\d{{2}})'
r'(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_({5})(.*?).{6}$')
Expand Down Expand Up @@ -410,7 +417,7 @@ def arguments():
help='ICESat-2 Data Release')
# ICESat-2 data version
parser.add_argument('--version','-v',
type=int, nargs='+', default=range(1,10),
type=int, nargs='+',
help='ICESat-2 Data Version')
# ICESat-2 granule region
parser.add_argument('--granule','-g',
Expand Down
15 changes: 11 additions & 4 deletions scripts/nsidc_icesat2_sync.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
u"""
nsidc_icesat2_sync.py
Written by Tyler Sutterley (12/2022)
Written by Tyler Sutterley (09/2023)

Acquires ICESat-2 datafiles from the National Snow and Ice Data Center (NSIDC)

Expand Down Expand Up @@ -69,6 +69,7 @@
utilities.py: download and management utilities for syncing files

UPDATE HISTORY:
Updated 09/2023: generalized regular expressions for non-entered cases
Updated 12/2022: single implicit import of altimetry tools
Updated 05/2022: use argparse descriptions within sphinx documentation
Updated 03/2022: use attempt login function to check credentials
Expand Down Expand Up @@ -152,8 +153,14 @@ def nsidc_icesat2_sync(DIRECTORY, PRODUCTS, RELEASE, VERSIONS, GRANULES,
regex_cycle = r'|'.join([rf'{C:02d}' for C in CYCLES])
else:
regex_cycle = r'\d{2}'
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
if GRANULES:
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
else:
regex_granule = r'\d{2}'
if VERSIONS:
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
else:
regex_version = r'\d{2}'
regex_suffix = r'(.*?)' if AUXILIARY else r'(h5|nc)'
default_pattern = (r'{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})(\d{{2}})'
r'(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_({5})(.*?).{6}$')
Expand Down Expand Up @@ -471,7 +478,7 @@ def arguments():
help='ICESat-2 Data Release')
# ICESat-2 data version
parser.add_argument('--version','-v',
type=int, nargs='+', default=range(1,10),
type=int, nargs='+',
help='ICESat-2 Data Version')
# ICESat-2 granule region
region = parser.add_mutually_exclusive_group(required=False)
Expand Down
15 changes: 11 additions & 4 deletions scripts/nsidc_icesat2_sync_s3.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
u"""
nsidc_icesat2_sync_s3.py
Written by Tyler Sutterley (12/2022)
Written by Tyler Sutterley (09/2023)

Acquires ICESat-2 datafiles from the National Snow and Ice Data Center (NSIDC)
and transfers to an AWS S3 bucket using a local machine as pass through
Expand Down Expand Up @@ -72,6 +72,7 @@
utilities.py: download and management utilities for syncing files

UPDATE HISTORY:
Updated 09/2023: generalized regular expressions for non-entered cases
Updated 12/2022: single implicit import of altimetry tools
Updated 05/2022: use argparse descriptions within sphinx documentation
Updated 03/2022: use attempt login function to check credentials
Expand Down Expand Up @@ -157,8 +158,14 @@ def nsidc_icesat2_sync_s3(aws_access_key_id, aws_secret_access_key,
regex_cycle = r'|'.join([rf'{C:02d}' for C in CYCLES])
else:
regex_cycle = r'\d{2}'
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
if GRANULES:
regex_granule = r'|'.join([rf'{G:02d}' for G in GRANULES])
else:
regex_granule = r'\d{2}'
if VERSIONS:
regex_version = r'|'.join([rf'{V:02d}' for V in VERSIONS])
else:
regex_version = r'\d{2}'
regex_suffix = r'(.*?)' if AUXILIARY else r'(h5|nc)'
default_pattern = (r'{0}(-\d{{2}})?_(\d{{4}})(\d{{2}})(\d{{2}})(\d{{2}})'
r'(\d{{2}})(\d{{2}})_({1})({2})({3})_({4})_({5})(.*?).{6}$')
Expand Down Expand Up @@ -441,7 +448,7 @@ def arguments():
help='ICESat-2 Data Release')
# ICESat-2 data version
parser.add_argument('--version','-v',
type=int, nargs='+', default=range(1,10),
type=int, nargs='+',
help='ICESat-2 Data Version')
# ICESat-2 granule region
region = parser.add_mutually_exclusive_group(required=False)
Expand Down
Loading