From dc65265e5e9135bfd8b951859ee7d1d5e3ebbe96 Mon Sep 17 00:00:00 2001 From: micah johnson Date: Thu, 7 Jul 2022 08:51:25 -0600 Subject: [PATCH 1/7] Expanded flexibility to read datetimes in gpr formats --- scripts/download/nsidc_sources.txt | 1 + scripts/upload/{add_gpr.py => add_bsu_gpr.py} | 0 snowex_db/interpretation.py | 24 +++++++++++++++---- tests/test_interpretation.py | 1 + 4 files changed, 22 insertions(+), 4 deletions(-) rename scripts/upload/{add_gpr.py => add_bsu_gpr.py} (100%) diff --git a/scripts/download/nsidc_sources.txt b/scripts/download/nsidc_sources.txt index a26501a..d6ce759 100644 --- a/scripts/download/nsidc_sources.txt +++ b/scripts/download/nsidc_sources.txt @@ -3,3 +3,4 @@ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_BSU_GPR.001/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_GM_SP.001/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SMP.001/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD.001/ +https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_GM_CSU_GPR.001/2020.02.06/SNEX20_GM_CSU_GPR_1GHz_v01.csv diff --git a/scripts/upload/add_gpr.py b/scripts/upload/add_bsu_gpr.py similarity index 100% rename from scripts/upload/add_gpr.py rename to scripts/upload/add_bsu_gpr.py diff --git a/snowex_db/interpretation.py b/snowex_db/interpretation.py index 1422f77..34928c1 100644 --- a/snowex_db/interpretation.py +++ b/snowex_db/interpretation.py @@ -12,6 +12,7 @@ import pytz from .utilities import get_logger +from.string_management import parse_none def is_point_data(columns): @@ -201,7 +202,7 @@ def add_date_time_keys(data, in_timezone=None, out_timezone='UTC'): else: raise ValueError("We did not recieve a valid in_timezone") - # Look for a single header entry for date and time. + # Look for a single header entry containing date and time. for k in data.keys(): kl = k.lower() if 'date' in kl and 'time' in kl: @@ -211,9 +212,18 @@ def add_date_time_keys(data, in_timezone=None, out_timezone='UTC'): # If we didn't find date/time combined. if d is None: - # Handle SMP data dates and times + # Handle data dates and times if 'date' in keys and 'time' in keys: - dstr = ' '.join([str(data['date']), str(data['time'])]) + # Assume MMDDYY format + if len(data['date']) == 6: + dt = data['date'] + # Put into YY-MM-DD + data['date'] = f'20{dt[-2:]}-{dt[0:2]}-{dt[2:4]}' + # Allow for nan time + data['time'] = parse_none(data['time']) + + dstr = ' '.join([str(data[k]) for k in ['date', 'time'] + if data[k] is not None]) d = pd.to_datetime(dstr) elif 'date' in keys: @@ -260,7 +270,13 @@ def add_date_time_keys(data, in_timezone=None, out_timezone='UTC'): d.replace(tzinfo=out_tz) data['date'] = d.date() - data['time'] = d.timetz() + + # Dont add time to a time that was nan or none + if 'time' not in data.keys(): + data['time'] = d.timetz() + else: + if data['time'] is not None: + data['time'] = d.timetz() return data diff --git a/tests/test_interpretation.py b/tests/test_interpretation.py index c22d1a8..6544e4d 100644 --- a/tests/test_interpretation.py +++ b/tests/test_interpretation.py @@ -52,6 +52,7 @@ def test_cardinal_to_degrees_value_error(): time(hour=8, tzinfo=pytz.utc)), ({'date/local standard time': '2019-12-20T13:00'}, 'US/Pacific', date(2019, 12, 20), time(hour=21, minute=0, tzinfo=pytz.utc)), + ({'date': '020620', 'time': 'nan'}, 'US/Mountain', date(2020, 2, 6), None) ]) def test_add_date_time_keys(data, in_tz, expected_date, expected_time): """ From 1b4c5f327bcfe93c102a74a89273346a4fe60d19 Mon Sep 17 00:00:00 2001 From: micah johnson Date: Thu, 7 Jul 2022 08:52:22 -0600 Subject: [PATCH 2/7] Added urls for gpr datasets. Added some nice formatting for reading files --- scripts/download/nsidc_sources.txt | 1 + snowex_db/upload.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/download/nsidc_sources.txt b/scripts/download/nsidc_sources.txt index d6ce759..41d8d60 100644 --- a/scripts/download/nsidc_sources.txt +++ b/scripts/download/nsidc_sources.txt @@ -4,3 +4,4 @@ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_GM_SP.001/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SMP.001/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD.001/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_GM_CSU_GPR.001/2020.02.06/SNEX20_GM_CSU_GPR_1GHz_v01.csv +https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD_TLI.001/2019.09.29/SNEX20_SD_TLI_clean.csv \ No newline at end of file diff --git a/snowex_db/upload.py b/snowex_db/upload.py index 8bc041e..ca9467e 100644 --- a/snowex_db/upload.py +++ b/snowex_db/upload.py @@ -98,7 +98,7 @@ def _read(self, profile_filename): df['depth'] = new_depth delta = abs(df['depth'].max() - df['depth'].min()) - self.log.info('File contains {} profiles each with {} layers across ' + self.log.info('File contains {} profiles each with {:,} layers across ' '{:0.2f} cm'.format(len(self.hdr.data_names), len(df), delta)) return df @@ -354,7 +354,7 @@ def submit(self, session): for pt in self.hdr.data_names: objects = [] df = self.build_data(pt) - self.log.info('Submitting {} points of {} to the database...'.format( + self.log.info('Submitting {:,} points of {} to the database...'.format( len(df.index), pt)) for i, row in df.iterrows(): @@ -543,7 +543,7 @@ def submit(self, session): if len(tiles) > 1: # -1 because the first element is not a self.log.info( - 'Raster is split into {} tiles for uploading...'.format( + 'Raster is split into {:,} tiles for uploading...'.format( len(tiles))) # Allow for tiling, the first split is always psql statement we don't From f60653424e97509f76d8a19e16b55b07549c3a1d Mon Sep 17 00:00:00 2001 From: micah johnson Date: Thu, 7 Jul 2022 08:59:28 -0600 Subject: [PATCH 3/7] Added CSU gpr upload --- scripts/upload/add_csu_grp.py | 56 +++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 scripts/upload/add_csu_grp.py diff --git a/scripts/upload/add_csu_grp.py b/scripts/upload/add_csu_grp.py new file mode 100644 index 0000000..cc38bcf --- /dev/null +++ b/scripts/upload/add_csu_grp.py @@ -0,0 +1,56 @@ +""" +Read in the SnowEx 2020 Colorado State GPR data. Uploaded SWE, Two Way Travel, Depth, to +the database. + +1. Data must be downloaded via sh ../download/download_nsidc.sh +2A. python run.py # To run all together all at once +2B. python add_gpr.py # To run individually + +""" + +import time +from os.path import abspath, expanduser, join + +import pandas as pd + +from snowexsql.db import get_db +from snowex_db.upload import * + + +def main(): + file = '../download/data/SNOWEX/SNEX20_GM_CSU_GPR.001/2020.02.06/SNEX20_GM_CSU_GPR_1GHz_v01.csv' + + kwargs = { + # Keyword argument to upload depth measurements + 'depth_is_metadata': False, + + # Constant Metadata for the GPR data + 'site_name': 'Grand Mesa', + 'observers': 'Randall Bonnell', + 'instrument': 'pulse EKKO Pro multi-polarization 1 GHz GPR', + 'in_timezone': 'UTC', + 'out_timezone': 'UTC', + 'doi': 'https://doi.org/10.5067/S5EGFLCIAB18' + } + + # Break out the path and make it an absolute path + file = abspath(expanduser(file)) + + # Grab a db connection to a local db named snowex + db_name = 'localhost/snowex' + engine, session = get_db(db_name, credentials='./credentials.json') + + # Instantiate the point uploader + csv = PointDataCSV(file, **kwargs) + # Push it to the database + csv.submit(session) + + # Close out the session with the DB + session.close() + + # return the number of errors for run.py can report it + return len(csv.errors) + + +if __name__ == '__main__': + main() From 6083064c5b850cf6729a43bc19b0b006719899d5 Mon Sep 17 00:00:00 2001 From: micah johnson Date: Thu, 7 Jul 2022 09:30:04 -0600 Subject: [PATCH 4/7] Added some met renames to meet new gpr dataset needs, fixed a url mixup in the nsidc sources, added a UNM gpr dataset --- scripts/download/nsidc_sources.txt | 2 +- .../upload/{add_csu_grp.py => add_csu_gpr.py} | 0 scripts/upload/add_unm_gpr.py | 69 +++++++++++++++++++ snowex_db/metadata.py | 19 +++-- tests/test_interpretation.py | 6 +- 5 files changed, 87 insertions(+), 9 deletions(-) rename scripts/upload/{add_csu_grp.py => add_csu_gpr.py} (100%) create mode 100644 scripts/upload/add_unm_gpr.py diff --git a/scripts/download/nsidc_sources.txt b/scripts/download/nsidc_sources.txt index 41d8d60..7229435 100644 --- a/scripts/download/nsidc_sources.txt +++ b/scripts/download/nsidc_sources.txt @@ -4,4 +4,4 @@ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_GM_SP.001/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SMP.001/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD.001/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_GM_CSU_GPR.001/2020.02.06/SNEX20_GM_CSU_GPR_1GHz_v01.csv -https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD_TLI.001/2019.09.29/SNEX20_SD_TLI_clean.csv \ No newline at end of file +https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_UNM_GPR.001/2020.01.28/SNEX20_UNM_GPR.csv diff --git a/scripts/upload/add_csu_grp.py b/scripts/upload/add_csu_gpr.py similarity index 100% rename from scripts/upload/add_csu_grp.py rename to scripts/upload/add_csu_gpr.py diff --git a/scripts/upload/add_unm_gpr.py b/scripts/upload/add_unm_gpr.py new file mode 100644 index 0000000..fa93fdd --- /dev/null +++ b/scripts/upload/add_unm_gpr.py @@ -0,0 +1,69 @@ +""" +Read in the SnowEx 2020 UNM GPR. Upload SWE, Two Way Travel, Depth, to +the database. + +1. Data must be downloaded via sh ../download/download_nsidc.sh +2A. python run.py # To run all together all at once +2B. python add_gpr.py # To run individually + +""" + +import time +from os.path import abspath, expanduser, join + +import pandas as pd + +from snowexsql.db import get_db +from snowex_db.upload import * + + +def main(): + file = '../download/data/SNOWEX/SNEX20_UNM_GPR.001/2020.01.28/SNEX20_UNM_GPR.csv' + + kwargs = { + # Keyword argument to upload depth measurements + 'depth_is_metadata': False, + + # Constant Metadata for the GPR data + 'site_name': 'Grand Mesa', + 'observers': 'Ryan Webb', + 'instrument': 'Mala 800 MHz GPR', + 'in_timezone': 'UTC', + 'out_timezone': 'UTC', + 'doi': 'https://doi.org/10.5067/WE9GI1GVMQF6' + } + + # Break out the path and make it an absolute path + file = abspath(expanduser(file)) + + # Grab a db connection to a local db named snowex + db_name = 'localhost/snowex' + engine, session = get_db(db_name, credentials='./credentials.json') + + # Instantiate the point uploader + csv = PointDataCSV(file, **kwargs) + df_original = csv.df.copy() + + # Convert depth to centimeters + csv.log.info('Converting depth to centimeters...') + df_original['depth'] = df_original['depth'].mul(100) + + # Loop over the two insturments in the file and separate them for two submissions + for hz in ['800', '1600']: + # Change the instrument. + csv.hdr.info['instrument'] = f'Mala {hz} MHz GPR', + csv.log.info(f'Isolating {csv.hdr.info["instrument"]} data.') + csv.df = df_original[df_original['freq_mhz'] == hz].copy() + + # Push it to the database + csv.submit(session) + + # Close out the session with the DB + session.close() + + # return the number of errors for run.py can report it + return len(csv.errors) + + +if __name__ == '__main__': + main() diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py index ac26270..5c2f041 100644 --- a/snowex_db/metadata.py +++ b/snowex_db/metadata.py @@ -321,13 +321,18 @@ class DataHeader(object): 'long': 'longitude', 'lon': 'longitude', 'twt': 'two_way_travel', + 'twt_ns': 'two_way_travel', 'utmzone': 'utm_zone', 'measurement_tool': 'instrument', 'avgdensity': 'density', 'avg_density': 'density', 'dielectric_constant': 'permittivity', 'flag': 'flags', - 'hs': 'depth' + 'hs': 'depth', + 'swe_mm': 'swe', + 'depth_m': 'depth', + 'date_dd_mmm_yy': 'date', + 'time_gmt': 'time' } # Known possible profile types anything not in here will throw an error @@ -339,12 +344,12 @@ class DataHeader(object): # Defaults to keywords arguments defaults = { - 'in_timezone': None, - 'out_timezone': 'UTC', - 'epsg': None, - 'header_sep': ',', - 'northern_hemisphere': True, - 'depth_is_metadata': True} + 'in_timezone': None, + 'out_timezone': 'UTC', + 'epsg': None, + 'header_sep': ',', + 'northern_hemisphere': True, + 'depth_is_metadata': True} def __init__(self, filename, **kwargs): """ diff --git a/tests/test_interpretation.py b/tests/test_interpretation.py index 6544e4d..7af2604 100644 --- a/tests/test_interpretation.py +++ b/tests/test_interpretation.py @@ -52,7 +52,11 @@ def test_cardinal_to_degrees_value_error(): time(hour=8, tzinfo=pytz.utc)), ({'date/local standard time': '2019-12-20T13:00'}, 'US/Pacific', date(2019, 12, 20), time(hour=21, minute=0, tzinfo=pytz.utc)), - ({'date': '020620', 'time': 'nan'}, 'US/Mountain', date(2020, 2, 6), None) + # Test CSU GPR time + ({'date': '020620', 'time': 'nan'}, 'US/Mountain', date(2020, 2, 6), None), + # Test Mala GPR time + ({'date': '28-Jan-20', 'time': '16:07'}, 'UTC', date(2020, 1, 28), time(16,7,tzinfo=pytz.utc)), + ]) def test_add_date_time_keys(data, in_tz, expected_date, expected_time): """ From 029e3387694513c7fe5f3f7d7d70f9f8942a6fa3 Mon Sep 17 00:00:00 2001 From: micah johnson Date: Thu, 7 Jul 2022 13:05:49 -0600 Subject: [PATCH 5/7] Added a test to ensure we capture the special header nuances --- snowex_db/metadata.py | 3 ++- tests/test_metadata.py | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py index 5c2f041..09217ec 100644 --- a/snowex_db/metadata.py +++ b/snowex_db/metadata.py @@ -332,7 +332,8 @@ class DataHeader(object): 'swe_mm': 'swe', 'depth_m': 'depth', 'date_dd_mmm_yy': 'date', - 'time_gmt': 'time' + 'time_gmt': 'time', + 'elev_m': 'elevation' } # Known possible profile types anything not in here will throw an error diff --git a/tests/test_metadata.py b/tests/test_metadata.py index 75a18ce..0edef54 100644 --- a/tests/test_metadata.py +++ b/tests/test_metadata.py @@ -279,6 +279,26 @@ def setup_class(self): super().setup_class(self) +class TestUNMGPRHeader(DataHeaderTestBase): + """ + Test the header information can be interpreted correctly in the UNM GPR data + """ + depth_is_metadata = False + + def setup_class(self): + self.file = 'unm_gpr.csv' + self.data_names = ['depth', 'swe', 'two_way_travel'] + self.columns = ['date', 'time', 'utm_zone', 'easting', 'latitude','longitude', + 'northing', 'elevation', 'freq_mhz'] + self.data_names + + self.multi_sample_profiles = [] + + # no header in the GPR file + self.info = {} + + super().setup_class(self) + + class TestSMPHeader(DataHeaderTestBase): """ Test interpreting an SMP header without a SMP Log file From cc35a6cf9d765c29f55d71daaa6c6e13b5cf9402 Mon Sep 17 00:00:00 2001 From: micah johnson Date: Thu, 7 Jul 2022 13:17:13 -0600 Subject: [PATCH 6/7] Final versions of new gpr datasets with variable instruments. Fixed #2 --- scripts/upload/add_csu_gpr.py | 3 ++- scripts/upload/add_unm_gpr.py | 30 ++++++++++++++++++------------ 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/scripts/upload/add_csu_gpr.py b/scripts/upload/add_csu_gpr.py index cc38bcf..b2c30da 100644 --- a/scripts/upload/add_csu_gpr.py +++ b/scripts/upload/add_csu_gpr.py @@ -30,7 +30,8 @@ def main(): 'instrument': 'pulse EKKO Pro multi-polarization 1 GHz GPR', 'in_timezone': 'UTC', 'out_timezone': 'UTC', - 'doi': 'https://doi.org/10.5067/S5EGFLCIAB18' + 'doi': 'https://doi.org/10.5067/S5EGFLCIAB18', + 'epsg': 26912 } # Break out the path and make it an absolute path diff --git a/scripts/upload/add_unm_gpr.py b/scripts/upload/add_unm_gpr.py index fa93fdd..2d2b32e 100644 --- a/scripts/upload/add_unm_gpr.py +++ b/scripts/upload/add_unm_gpr.py @@ -18,7 +18,7 @@ def main(): - file = '../download/data/SNOWEX/SNEX20_UNM_GPR.001/2020.01.28/SNEX20_UNM_GPR.csv' + filename = '../download/data/SNOWEX/SNEX20_UNM_GPR.001/2020.01.28/SNEX20_UNM_GPR.csv' kwargs = { # Keyword argument to upload depth measurements @@ -27,34 +27,40 @@ def main(): # Constant Metadata for the GPR data 'site_name': 'Grand Mesa', 'observers': 'Ryan Webb', - 'instrument': 'Mala 800 MHz GPR', + 'instrument': None, # See loop below 'in_timezone': 'UTC', 'out_timezone': 'UTC', - 'doi': 'https://doi.org/10.5067/WE9GI1GVMQF6' + 'doi': 'https://doi.org/10.5067/WE9GI1GVMQF6', + 'epsg': 26912 } # Break out the path and make it an absolute path - file = abspath(expanduser(file)) + filename = abspath(expanduser(filename)) # Grab a db connection to a local db named snowex db_name = 'localhost/snowex' engine, session = get_db(db_name, credentials='./credentials.json') + # Read in for management of instruments + df_raw = pd.read_csv(filename) + low_freq = df_raw['FREQ_MHz'] == 800 + hi_freq = df_raw['FREQ_MHz'] == 1600 + # Instantiate the point uploader - csv = PointDataCSV(file, **kwargs) - df_original = csv.df.copy() + csv = PointDataCSV(filename, **kwargs) # Convert depth to centimeters csv.log.info('Converting depth to centimeters...') - df_original['depth'] = df_original['depth'].mul(100) + csv.df['depth'] = csv.df['depth'].mul(100) + df_original = csv.df.copy() # Loop over the two insturments in the file and separate them for two submissions - for hz in ['800', '1600']: + for hz, ind in [(800, low_freq), (1600, hi_freq)]: + instrument = f'Mala {hz} MHz GPR' + csv.log.info(f'Isolating {instrument} data for upload.') + csv.df = df_original[ind].copy() # Change the instrument. - csv.hdr.info['instrument'] = f'Mala {hz} MHz GPR', - csv.log.info(f'Isolating {csv.hdr.info["instrument"]} data.') - csv.df = df_original[df_original['freq_mhz'] == hz].copy() - + csv.df['instrument'] = instrument # Push it to the database csv.submit(session) From 82c655a4f83a69b37d51af145fc119b59803690b Mon Sep 17 00:00:00 2001 From: micah johnson Date: Thu, 7 Jul 2022 13:25:51 -0600 Subject: [PATCH 7/7] Added test file for unm gpr header --- tests/data/unm_gpr.csv | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 tests/data/unm_gpr.csv diff --git a/tests/data/unm_gpr.csv b/tests/data/unm_gpr.csv new file mode 100644 index 0000000..0be8121 --- /dev/null +++ b/tests/data/unm_gpr.csv @@ -0,0 +1,7 @@ +DATE_dd_mmm_yy,TIME_GMT,FREQ_MHz,LONG,LAT,ELEV_m,NORTHING,EASTING,UTMzone,TWT_ns,DEPTH_m,SWE_mm +28-Jan-20,16:43,800,-108.1340183,39.0296597,3157.1,4323978.711,748088.3947,12,8.97,1.09,299 +29-Jan-20,20:35,800,-108.139506,39.014194,3106.94,4322247.041,747667.2868,12,7.34,0.9,244 +30-Jan-20,22:20,800,-108.1622217,39.03095167,3110.8,4324045.489,745642.1231,12,9.55,1.17,318 +31-Jan-20,20:04,800,-108.1762593,39.02676284,3090.19,4323542.671,744441.2436,12,8.73,1.07,291 +4-Feb-20,16:00,1600,-108.1391578,39.03117433,3141.17,4324132.814,747638.1351,12,7.27,0.89,242 +5-Feb-20,19:04,1600,-108.1715773,39.02401467,3103.3,4323250.231,744856.1064,12,6.15,0.75,205