From a6e0b01e14bd06ea1e07a57912778c4cc8d6d637 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 3 Jul 2024 12:28:32 -0600 Subject: [PATCH 01/30] Issue #18 - start of updating the datasource for 2020 timeseries pits and some todos inthe file --- ...{add_time_series_pits.py => add_time_series_pits_2020.py} | 5 +++++ 1 file changed, 5 insertions(+) rename scripts/upload/{add_time_series_pits.py => add_time_series_pits_2020.py} (92%) diff --git a/scripts/upload/add_time_series_pits.py b/scripts/upload/add_time_series_pits_2020.py similarity index 92% rename from scripts/upload/add_time_series_pits.py rename to scripts/upload/add_time_series_pits_2020.py index da272b9..652e058 100644 --- a/scripts/upload/add_time_series_pits.py +++ b/scripts/upload/add_time_series_pits_2020.py @@ -20,12 +20,17 @@ def main(): Currenltly based on the preliminary downloaded zip which has not been submitted yet. Folder name is SNEX20_TS_SP_preliminary_v4 """ + # TODO: write script to clear out the timeseries pits + # * maybe delete all pits and then add them back in + # TODO: fill in this DOI doi = None debug = True # Point to the downloaded data from + # TODO: update local path data_dir = abspath('../download/data/SNEX20_TS_SP_preliminary_v5/') # read in the descriptor file + # TODO: check this path desc_df = pd.read_csv(join(data_dir, 'SNEX20_TS_SP_Summary_Environment_v01.csv')) error_msg = [] From aeac5d5bbed5f0b25602cd78be4a292b3822ead8 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 3 Jul 2024 12:34:43 -0600 Subject: [PATCH 02/30] new sources --- scripts/download/nsidc_sources.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/download/nsidc_sources.txt b/scripts/download/nsidc_sources.txt index 35d2b2e..43639ea 100644 --- a/scripts/download/nsidc_sources.txt +++ b/scripts/download/nsidc_sources.txt @@ -6,3 +6,5 @@ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD.001/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_GM_CSU_GPR.001/2020.02.06/SNEX20_GM_CSU_GPR_1GHz_v01.csv https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_UNM_GPR.001/2020.01.28/SNEX20_UNM_GPR.csv https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD_TLI.001/2019.09.29/SNEX20_SD_TLI_clean.csv +https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_TS_SP.002/ +https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX21_TS_SP.001/ From be74fc2b02ce87c86176541c7bbd73b2deb2db80 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 3 Jul 2024 13:31:43 -0600 Subject: [PATCH 03/30] issue #18 working towards modified 2020 timeseries pits upload script --- requirements.txt | 2 +- scripts/upload/add_time_series_pits_2020.py | 143 +++++++++++++------- snowex_db/__init__.py | 13 ++ 3 files changed, 107 insertions(+), 51 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9528b37..38900b2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ wheel>0.34.0, <0.35.0 -snowexsql>=0.3.0, <0.4.0 +snowexsql>=0.4.1, <0.5.0 snowmicropyn matplotlib>=3.2.2, <3.3.0 moto==3.1.11 diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py index 652e058..529fa35 100644 --- a/scripts/upload/add_time_series_pits_2020.py +++ b/scripts/upload/add_time_series_pits_2020.py @@ -3,12 +3,14 @@ """ import glob +import re from os.path import abspath, join -import pandas as pd +from pathlib import Path from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch from snowex_db.upload import PointDataCSV -from snowexsql.db import get_db +from snowex_db import db_session + tz_map = {'US/Pacific': ['CA', 'NV', 'WA'], 'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'], @@ -22,59 +24,100 @@ def main(): """ # TODO: write script to clear out the timeseries pits # * maybe delete all pits and then add them back in - # TODO: fill in this DOI - doi = None + # Version 2 DOI + # https://nsidc.org/data/snex20_ts_sp/versions/2 + doi = "https://doi.org/10.5067/KZ43HVLZV6G4" debug = True + # TODO: new header of + # Pit Comments + # Parameter Codes + # Point to the downloaded data from - # TODO: update local path - data_dir = abspath('../download/data/SNEX20_TS_SP_preliminary_v5/') - # read in the descriptor file - # TODO: check this path - desc_df = pd.read_csv(join(data_dir, 'SNEX20_TS_SP_Summary_Environment_v01.csv')) + data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.002/') error_msg = [] - # get unique site_ids - site_ids = desc_df['PitID'].unique() - - for site_id in site_ids: - abbrev = site_id[0:2] - tz = [k for k, states in tz_map.items() if abbrev in states][0] - - # Grab all the csvs in the pits folder - filenames = glob.glob(join(data_dir, 'pits', f'{site_id}*/*.csv')) - - # Grab all the site details files - sites = glob.glob(join(data_dir, 'pits', f'{site_id}*/*site*.csv')) - - # Grab all the perimeter depths and remove them for now. - perimeter_depths = glob.glob(join(data_dir, 'pits', f'{site_id}*/*perimeter*.csv')) - - # Remove the site details from the total file list to get only the - profiles = list(set(filenames) - set(sites) - set(perimeter_depths)) - - # Submit all profiles associated with pit at a time - b = UploadProfileBatch( - filenames=profiles, - debug=debug, doi=doi, - in_timezone=tz) - b.push() - error_msg += b.errors - - # Upload the site details - sd = UploadSiteDetailsBatch(filenames=sites, - debug=debug, - doi=doi, - in_timezone=tz) - sd.push() - error_msg += sd.errors - - # Submit all perimeters as point data - engine, session = get_db('localhost/snowex', credentials='credentials.json') - for fp in perimeter_depths: - pcsv = PointDataCSV(fp, doi=doi, debug=debug, depth_is_metadata=False, in_timezone=tz) - pcsv.submit(session) - session.close() + # OLD file name + # pits/CAAMCL_20191220_1300/SNEX20_TS_SP_20191220_1300_CAAMCL_LWC_v01.csv + # NEW FILE NAME + # 2019.10.24/SNEX20_TS_SP_20191024_1322_COFEJ2_data_LWC_v02.csv + + # Get all the date folders + unique_dt_olders = Path(data_dir).glob("20*.*.*") + for udf in unique_dt_olders: + # get all the csvs in the folder + dt_folder_files = list(udf.glob("*.csv")) + all_file_names = [f.name for f in dt_folder_files] + site_ids = [] + # Get the unique site ids for this date folder + compiled = re.compile( + r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v02\.csv' + ) + for file_name in all_file_names: + match = compiled.match(file_name) + if match: + code = match.group(1) + site_ids.append(code) + else: + raise RuntimeError(f"No site ID found for {file_name}") + + # Get the unique site ids + site_ids = list(set(site_ids)) + + for site_id in site_ids: + abbrev = site_id[0:2] + tz = [k for k, states in tz_map.items() if abbrev in states][0] + + # Grab all the csvs in the pits folder + filenames = glob.glob(join(str(udf), f'*_{site_id}_*.csv')) + + # Grab all the site details files + sites = glob.glob(join( + data_dir, 'pits', f'*_{site_id}_*siteDetails*.csv' + )) + + # Grab all the perimeter depths and remove them for now. + perimeter_depths = glob.glob(join( + data_dir, 'pits', f'*_{site_id}_*perimeterDepths*.csv' + )) + + # all non-gapped filled_density + non_filled_density = glob.glob(join( + data_dir, 'pits', f'*_{site_id}_*_density_*.csv' + )) + + # Remove the site details from the total file list to get only the + profiles = list( + set(filenames) - set(sites) - set(perimeter_depths) - + set(non_filled_density) # remove non-gap-filled denisty + ) + + # Submit all profiles associated with pit at a time + b = UploadProfileBatch( + filenames=profiles, + debug=debug, doi=doi, + in_timezone=tz) + b.push() + error_msg += b.errors + + # Upload the site details + sd = UploadSiteDetailsBatch(filenames=sites, + debug=debug, + doi=doi, + in_timezone=tz) + sd.push() + error_msg += sd.errors + + # Submit all perimeters as point data + with db_session( + 'localhost/snowex', credentials='credentials.json' + ) as session: + for fp in perimeter_depths: + pcsv = PointDataCSV( + fp, doi=doi, debug=debug, depth_is_metadata=False, + in_timezone=tz + ) + pcsv.submit(session) for f, m in error_msg: print(f) diff --git a/snowex_db/__init__.py b/snowex_db/__init__.py index 5f4adc5..5820abe 100644 --- a/snowex_db/__init__.py +++ b/snowex_db/__init__.py @@ -2,3 +2,16 @@ __author__ = """Micah Johnson""" __version__ = '0.1.0' + +from snowexsql.db import get_db +from snowexsql.api import DB_NAME +from contextlib import contextmanager + + +@contextmanager +def db_session(db_name, credentials): + # use default_name + db_name = db_name or DB_NAME + engine, session = get_db(db_name, credentials=credentials) + yield session, engine + session.close() From 8372291f092ebc79ba0b3d03afa7d0162cf32765 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 3 Jul 2024 13:34:17 -0600 Subject: [PATCH 04/30] path logic --- scripts/upload/add_time_series_pits_2020.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py index 529fa35..de94db5 100644 --- a/scripts/upload/add_time_series_pits_2020.py +++ b/scripts/upload/add_time_series_pits_2020.py @@ -43,7 +43,9 @@ def main(): # 2019.10.24/SNEX20_TS_SP_20191024_1322_COFEJ2_data_LWC_v02.csv # Get all the date folders - unique_dt_olders = Path(data_dir).glob("20*.*.*") + unique_dt_olders = Path( + data_dir + ).expanduser().absolute().glob("20*.*.*") for udf in unique_dt_olders: # get all the csvs in the folder dt_folder_files = list(udf.glob("*.csv")) @@ -73,17 +75,17 @@ def main(): # Grab all the site details files sites = glob.glob(join( - data_dir, 'pits', f'*_{site_id}_*siteDetails*.csv' + str(udf), f'*_{site_id}_*siteDetails*.csv' )) # Grab all the perimeter depths and remove them for now. perimeter_depths = glob.glob(join( - data_dir, 'pits', f'*_{site_id}_*perimeterDepths*.csv' + str(udf), f'*_{site_id}_*perimeterDepths*.csv' )) # all non-gapped filled_density non_filled_density = glob.glob(join( - data_dir, 'pits', f'*_{site_id}_*_density_*.csv' + str(udf), f'*_{site_id}_*_density_*.csv' )) # Remove the site details from the total file list to get only the From 3050ce798c2e2c0ef88a505b6d0c9122ad7d215f Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 3 Jul 2024 14:02:32 -0600 Subject: [PATCH 05/30] make sure to not use gap filled density at this point --- scripts/upload/add_time_series_pits_2020.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py index de94db5..b98e961 100644 --- a/scripts/upload/add_time_series_pits_2020.py +++ b/scripts/upload/add_time_series_pits_2020.py @@ -37,11 +37,6 @@ def main(): data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.002/') error_msg = [] - # OLD file name - # pits/CAAMCL_20191220_1300/SNEX20_TS_SP_20191220_1300_CAAMCL_LWC_v01.csv - # NEW FILE NAME - # 2019.10.24/SNEX20_TS_SP_20191024_1322_COFEJ2_data_LWC_v02.csv - # Get all the date folders unique_dt_olders = Path( data_dir @@ -84,14 +79,14 @@ def main(): )) # all non-gapped filled_density - non_filled_density = glob.glob(join( - str(udf), f'*_{site_id}_*_density_*.csv' + gap_filled_density = glob.glob(join( + str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv' )) # Remove the site details from the total file list to get only the profiles = list( set(filenames) - set(sites) - set(perimeter_depths) - - set(non_filled_density) # remove non-gap-filled denisty + set(gap_filled_density) # remove gap-filled denisty ) # Submit all profiles associated with pit at a time From d852f9857cf32c63a05fdcb63fdc304ee0904226 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 3 Jul 2024 14:09:16 -0600 Subject: [PATCH 06/30] Issue #18 - file for 2021 timeseries pits --- scripts/upload/add_time_series_pits_2021.py | 124 ++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 scripts/upload/add_time_series_pits_2021.py diff --git a/scripts/upload/add_time_series_pits_2021.py b/scripts/upload/add_time_series_pits_2021.py new file mode 100644 index 0000000..c5356d8 --- /dev/null +++ b/scripts/upload/add_time_series_pits_2021.py @@ -0,0 +1,124 @@ +""" +Script to upload the Snowex Time Series pits +""" + +import glob +import re +from os.path import abspath, join +from pathlib import Path + +from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch +from snowex_db.upload import PointDataCSV +from snowex_db import db_session + + +tz_map = {'US/Pacific': ['CA', 'NV', 'WA'], + 'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'], + } + + +def main(): + """ + Currenltly based on the preliminary downloaded zip which has not been submitted yet. + Folder name is SNEX20_TS_SP_preliminary_v4 + """ + # TODO: write script to clear out the timeseries pits + # * maybe delete all pits and then add them back in + # https://nsidc.org/data/snex21_ts_sp/versions/1 + doi = "https://doi.org/10.5067/QIANJYJGRWOV" + debug = True + + # TODO: new header of + # Pit Comments + # Parameter Codes + + # Point to the downloaded data from + data_dir = abspath('../download/data/SNOWEX/SNEX21_TS_SP.001/') + error_msg = [] + + # Get all the date folders + unique_dt_olders = Path( + data_dir + ).expanduser().absolute().glob("20*.*.*") + for udf in unique_dt_olders: + # get all the csvs in the folder + dt_folder_files = list(udf.glob("*.csv")) + all_file_names = [f.name for f in dt_folder_files] + site_ids = [] + # Get the unique site ids for this date folder + compiled = re.compile( + r'SNEX21_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v01\.csv' + ) + for file_name in all_file_names: + match = compiled.match(file_name) + if match: + code = match.group(1) + site_ids.append(code) + else: + raise RuntimeError(f"No site ID found for {file_name}") + + # Get the unique site ids + site_ids = list(set(site_ids)) + + for site_id in site_ids: + abbrev = site_id[0:2] + tz = [k for k, states in tz_map.items() if abbrev in states][0] + + # Grab all the csvs in the pits folder + filenames = glob.glob(join(str(udf), f'*_{site_id}_*.csv')) + + # Grab all the site details files + sites = glob.glob(join( + str(udf), f'*_{site_id}_*siteDetails*.csv' + )) + + # Grab all the perimeter depths and remove them for now. + perimeter_depths = glob.glob(join( + str(udf), f'*_{site_id}_*perimeterDepths*.csv' + )) + + # all non-gapped filled_density + gap_filled_density = glob.glob(join( + str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv' + )) + + # Remove the site details from the total file list to get only the + profiles = list( + set(filenames) - set(sites) - set(perimeter_depths) - + set(gap_filled_density) # remove gap-filled denisty + ) + + # Submit all profiles associated with pit at a time + b = UploadProfileBatch( + filenames=profiles, + debug=debug, doi=doi, + in_timezone=tz) + b.push() + error_msg += b.errors + + # Upload the site details + sd = UploadSiteDetailsBatch(filenames=sites, + debug=debug, + doi=doi, + in_timezone=tz) + sd.push() + error_msg += sd.errors + + # Submit all perimeters as point data + with db_session( + 'localhost/snowex', credentials='credentials.json' + ) as session: + for fp in perimeter_depths: + pcsv = PointDataCSV( + fp, doi=doi, debug=debug, depth_is_metadata=False, + in_timezone=tz + ) + pcsv.submit(session) + + for f, m in error_msg: + print(f) + return len(error_msg) + + +if __name__ == '__main__': + main() From 624c10bf76ff2d77c2114581700b1a6c82268496 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 3 Jul 2024 14:10:51 -0600 Subject: [PATCH 07/30] Issue #18 no perimeter depth files for 2021 TS pits --- scripts/upload/add_time_series_pits_2021.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/scripts/upload/add_time_series_pits_2021.py b/scripts/upload/add_time_series_pits_2021.py index c5356d8..8e2e721 100644 --- a/scripts/upload/add_time_series_pits_2021.py +++ b/scripts/upload/add_time_series_pits_2021.py @@ -72,11 +72,6 @@ def main(): str(udf), f'*_{site_id}_*siteDetails*.csv' )) - # Grab all the perimeter depths and remove them for now. - perimeter_depths = glob.glob(join( - str(udf), f'*_{site_id}_*perimeterDepths*.csv' - )) - # all non-gapped filled_density gap_filled_density = glob.glob(join( str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv' @@ -84,7 +79,7 @@ def main(): # Remove the site details from the total file list to get only the profiles = list( - set(filenames) - set(sites) - set(perimeter_depths) - + set(filenames) - set(sites) - set(gap_filled_density) # remove gap-filled denisty ) @@ -104,17 +99,6 @@ def main(): sd.push() error_msg += sd.errors - # Submit all perimeters as point data - with db_session( - 'localhost/snowex', credentials='credentials.json' - ) as session: - for fp in perimeter_depths: - pcsv = PointDataCSV( - fp, doi=doi, debug=debug, depth_is_metadata=False, - in_timezone=tz - ) - pcsv.submit(session) - for f, m in error_msg: print(f) return len(error_msg) From 4863e72382097ae2087adab92f324fe5be56246c Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 3 Jul 2024 15:16:44 -0600 Subject: [PATCH 08/30] having issues creating the test database --- .gitignore | 2 ++ scripts/upload/create.py | 10 ++++++---- snowex_db/upload.py | 1 - 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 49440f6..4febbca 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ scripts/upload/test*.txt .idea/* scripts/download/data/* venv/ + +credentials.json \ No newline at end of file diff --git a/scripts/upload/create.py b/scripts/upload/create.py index 8f737c6..c24442d 100644 --- a/scripts/upload/create.py +++ b/scripts/upload/create.py @@ -23,9 +23,10 @@ def main(overwrite=False, db='snowex', credentials='./credentials.json'): initialize(engine) log.warning('Database cleared!\n') try: - sql = "CREATE USER snow WITH PASSWORD 'hackweek';" - engine.execute(sql) - engine.execute("GRANT USAGE ON SCHEMA public TO snow;") + with engine.connect() as connection: + connection.execute("CREATE USER snow WITH PASSWORD 'hackweek';") + with engine.connect() as connection: + connection.execute("GRANT USAGE ON SCHEMA public TO snow;") except Exception as e: print(e) @@ -33,7 +34,8 @@ def main(overwrite=False, db='snowex', credentials='./credentials.json'): sql = f'GRANT SELECT ON {t} TO snow;' log.info(f'Adding read only permissions for table {t}...') - engine.execute(sql) + with engine.connect() as connection: + connection.execute(sql) else: log.warning('Aborted. Database has not been modified.\n') diff --git a/snowex_db/upload.py b/snowex_db/upload.py index ca9467e..a6e376a 100644 --- a/snowex_db/upload.py +++ b/snowex_db/upload.py @@ -6,7 +6,6 @@ from subprocess import STDOUT, check_output from pathlib import Path import pandas as pd -import progressbar from geoalchemy2.elements import RasterElement, WKTElement from os.path import basename, exists, join from os import makedirs, remove From a9065bfc03a0c6f5d480e819078e9a3cac772116 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Mon, 8 Jul 2024 14:22:21 -0600 Subject: [PATCH 09/30] Modify create script for sqlalchemy>2.0 --- scripts/upload/create.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/scripts/upload/create.py b/scripts/upload/create.py index c24442d..0733819 100644 --- a/scripts/upload/create.py +++ b/scripts/upload/create.py @@ -3,6 +3,7 @@ """ from snowexsql.db import get_db, initialize from snowex_db.utilities import get_logger +from sqlalchemy import text as sqltext import argparse @@ -24,18 +25,25 @@ def main(overwrite=False, db='snowex', credentials='./credentials.json'): log.warning('Database cleared!\n') try: with engine.connect() as connection: - connection.execute("CREATE USER snow WITH PASSWORD 'hackweek';") - with engine.connect() as connection: - connection.execute("GRANT USAGE ON SCHEMA public TO snow;") + # Autocommit so the user is created before granting access + connection = connection.execution_options( + isolation_level="AUTOCOMMIT") + connection.execute( + sqltext("CREATE USER snow WITH PASSWORD 'hackweek';") + ) + connection.execute( + sqltext("GRANT USAGE ON SCHEMA public TO snow;") + ) except Exception as e: - print(e) + log.error("Failed on user creation") + raise e for t in ['sites', 'points', 'layers', 'images']: sql = f'GRANT SELECT ON {t} TO snow;' log.info(f'Adding read only permissions for table {t}...') with engine.connect() as connection: - connection.execute(sql) + connection.execute(sqltext(sql)) else: log.warning('Aborted. Database has not been modified.\n') From 1d29427493d5bcfcb73a59f50459aece10dc253b Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Mon, 8 Jul 2024 15:43:38 -0600 Subject: [PATCH 10/30] Switch to 2020 V1 pits - there are some data format and header issues in the V2 data --- scripts/upload/add_time_series_pits_2020.py | 42 +++++++++++++-------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py index b98e961..ac53824 100644 --- a/scripts/upload/add_time_series_pits_2020.py +++ b/scripts/upload/add_time_series_pits_2020.py @@ -22,11 +22,12 @@ def main(): Currenltly based on the preliminary downloaded zip which has not been submitted yet. Folder name is SNEX20_TS_SP_preliminary_v4 """ + db_name = 'localhost/snowex' # TODO: write script to clear out the timeseries pits # * maybe delete all pits and then add them back in - # Version 2 DOI - # https://nsidc.org/data/snex20_ts_sp/versions/2 - doi = "https://doi.org/10.5067/KZ43HVLZV6G4" + # Version 1 DOI + # https://nsidc.org/data/snex20_ts_sp/versions/1 + doi = "https://doi.org/10.5067/POT9E0FFUUD1" debug = True # TODO: new header of @@ -34,9 +35,15 @@ def main(): # Parameter Codes # Point to the downloaded data from - data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.002/') + data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.001/') error_msg = [] + # Files to ignore + ignore_files = [ + "SNEX20_TS_SP_Summary_Environment_v01.csv", + "SNEX20_TS_SP_Summary_SWE_v01.csv" + ] + # Get all the date folders unique_dt_olders = Path( data_dir @@ -44,13 +51,16 @@ def main(): for udf in unique_dt_olders: # get all the csvs in the folder dt_folder_files = list(udf.glob("*.csv")) - all_file_names = [f.name for f in dt_folder_files] site_ids = [] # Get the unique site ids for this date folder compiled = re.compile( - r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v02\.csv' + r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v01\.csv' ) - for file_name in all_file_names: + for file_path in dt_folder_files: + file_name = file_path.name + if file_name in ignore_files: + print(f"Skipping {file_name}") + continue match = compiled.match(file_name) if match: code = match.group(1) @@ -91,24 +101,24 @@ def main(): # Submit all profiles associated with pit at a time b = UploadProfileBatch( - filenames=profiles, - debug=debug, doi=doi, - in_timezone=tz) + filenames=profiles, debug=debug, doi=doi, in_timezone=tz, + db_name=db_name + ) b.push() error_msg += b.errors # Upload the site details - sd = UploadSiteDetailsBatch(filenames=sites, - debug=debug, - doi=doi, - in_timezone=tz) + sd = UploadSiteDetailsBatch( + filenames=sites, debug=debug, doi=doi, in_timezone=tz, + db_name=db_name + ) sd.push() error_msg += sd.errors # Submit all perimeters as point data with db_session( - 'localhost/snowex', credentials='credentials.json' - ) as session: + db_name, credentials='credentials.json' + ) as (session, engine): for fp in perimeter_depths: pcsv = PointDataCSV( fp, doi=doi, debug=debug, depth_is_metadata=False, From c2c3e0046854739131b5711afd5c2a69ac7b882c Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Mon, 8 Jul 2024 15:43:49 -0600 Subject: [PATCH 11/30] Use db_session function --- snowex_db/batch.py | 20 ++++++++++---------- snowex_db/projection.py | 5 +++-- snowex_db/upload.py | 12 ++++++++---- 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/snowex_db/batch.py b/snowex_db/batch.py index 864aaca..fc547f0 100644 --- a/snowex_db/batch.py +++ b/snowex_db/batch.py @@ -7,7 +7,7 @@ import time from os.path import abspath, basename, expanduser, join -from snowexsql.db import get_db +from snowex_db import db_session from snowex_db.interpretation import get_InSar_flight_comment from snowex_db.metadata import (DataHeader, SMPMeasurementLog, read_InSar_annotation) @@ -71,10 +71,6 @@ def __init__(self, filenames, **kwargs): self.errors = [] self.uploaded = 0 - # Grab db using credentials - self.log.info('Accessing Database {}'.format(self.db_name)) - engine, self.session = get_db(self.db_name, credentials=self.credentials) - self.log.info('Preparing to upload {} files...'.format(len(filenames))) def push(self): @@ -111,8 +107,6 @@ def push(self): else: self._push_one(f, **self.meta) - self.session.close() - # Log the ending errors self.report(i + 1) @@ -127,7 +121,9 @@ def _push_one(self, f, **kwargs): d = self.UploaderClass(f, **kwargs) # Submit the data to the database - d.submit(self.session) + self.log.info('Accessing Database {}'.format(self.db_name)) + with db_session(self.db_name, self.credentials) as (session, engine): + d.submit(session) self.uploaded += 1 def report(self, files_attempted): @@ -151,7 +147,6 @@ def report(self, files_attempted): self.log.info('Finished! Elapsed {:d}s\n'.format( int(time.time() - self.start))) - self.session.close() class UploadSiteDetailsBatch(BatchBase): @@ -325,7 +320,12 @@ def _push_one(self, f, **kwargs): d = self.UploaderClass(r, **meta) # Submit the data to the database - d.submit(self.session) + # Grab db using credentials + self.log.info('Accessing Database {}'.format(self.db_name)) + with db_session( + self.db_name, self.credentials + ) as (session, engine): + d.submit(session) # Uploaded set self.uploaded += 1 diff --git a/snowex_db/projection.py b/snowex_db/projection.py index 75ac76e..256b18e 100644 --- a/snowex_db/projection.py +++ b/snowex_db/projection.py @@ -39,8 +39,9 @@ def reproject_point_in_dict(info, is_northern=True, zone_number=None): easting, northing, utm_zone, letter = utm.from_latlon( result['latitude'], result['longitude'], force_zone_number=zone_number) - result['easting'] = easting - result['northing'] = northing + # String representation should not be np.float64, so cast to float + result['easting'] = float(easting) + result['northing'] = float(northing) result['utm_zone'] = utm_zone # Secondarily use the utm to add lat long diff --git a/snowex_db/upload.py b/snowex_db/upload.py index a6e376a..ee342b7 100644 --- a/snowex_db/upload.py +++ b/snowex_db/upload.py @@ -62,10 +62,14 @@ def _read(self, profile_filename): df: pd.dataframe contain csv data with standardized column names """ # header=0 because docs say to if using skip rows and columns - df = pd.read_csv(profile_filename, header=0, - skiprows=self.hdr.header_pos, - names=self.hdr.columns, - encoding='latin') + try: + df = pd.read_csv( + profile_filename, header=0, skiprows=self.hdr.header_pos, + names=self.hdr.columns, encoding='latin' + ) + except pd.errors.ParserError as e: + LOG.error(e) + raise RuntimeError(f"Failed reading {profile_filename}") # Special SMP specific tasks depth_fmt = 'snow_height' From 07864cb66e4698de0d8e2025f533e12f6e94b9e1 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Tue, 9 Jul 2024 11:56:13 -0600 Subject: [PATCH 12/30] Slight tweaks to 2021 timeseries script --- scripts/upload/add_time_series_pits_2021.py | 26 +++++++-------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/scripts/upload/add_time_series_pits_2021.py b/scripts/upload/add_time_series_pits_2021.py index 8e2e721..2faaf14 100644 --- a/scripts/upload/add_time_series_pits_2021.py +++ b/scripts/upload/add_time_series_pits_2021.py @@ -8,8 +8,6 @@ from pathlib import Path from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch -from snowex_db.upload import PointDataCSV -from snowex_db import db_session tz_map = {'US/Pacific': ['CA', 'NV', 'WA'], @@ -19,19 +17,13 @@ def main(): """ - Currenltly based on the preliminary downloaded zip which has not been submitted yet. - Folder name is SNEX20_TS_SP_preliminary_v4 + Snowex 2021 timeseries pits """ - # TODO: write script to clear out the timeseries pits - # * maybe delete all pits and then add them back in + db_name = 'localhost/snowex' # https://nsidc.org/data/snex21_ts_sp/versions/1 doi = "https://doi.org/10.5067/QIANJYJGRWOV" debug = True - # TODO: new header of - # Pit Comments - # Parameter Codes - # Point to the downloaded data from data_dir = abspath('../download/data/SNOWEX/SNEX21_TS_SP.001/') error_msg = [] @@ -85,17 +77,17 @@ def main(): # Submit all profiles associated with pit at a time b = UploadProfileBatch( - filenames=profiles, - debug=debug, doi=doi, - in_timezone=tz) + filenames=profiles, debug=debug, doi=doi, in_timezone=tz, + db_name=db_name + ) b.push() error_msg += b.errors # Upload the site details - sd = UploadSiteDetailsBatch(filenames=sites, - debug=debug, - doi=doi, - in_timezone=tz) + sd = UploadSiteDetailsBatch( + filenames=sites, debug=debug, doi=doi, in_timezone=tz, + db_name=db_name + ) sd.push() error_msg += sd.errors From 9ec87fb0f95945a43c6598341d9b4aa1e7b220e4 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 10 Jul 2024 12:26:33 -0600 Subject: [PATCH 13/30] Script to delete pits --- scripts/remove_data/remove_pits.py | 68 ++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 scripts/remove_data/remove_pits.py diff --git a/scripts/remove_data/remove_pits.py b/scripts/remove_data/remove_pits.py new file mode 100644 index 0000000..99b6160 --- /dev/null +++ b/scripts/remove_data/remove_pits.py @@ -0,0 +1,68 @@ +""" +File to remove all snowpits from the database +""" +import argparse +from snowexsql.api import db_session +from snowexsql.data import LayerData +from snowexsql.db import get_db + + +def main(): + parser = argparse.ArgumentParser( + description='Script to create our databases using the python library') + parser.add_argument('--db', dest='db', default='snowex', + help='Name of the database locally to add tables to') + parser.add_argument('--dry_run', dest='dry_run', action='store_true', + help='Try a dry run or not') + parser.add_argument('--credentials', dest='credentials', + default='./credentials.json', + help='Past to a json containing') + args = parser.parse_args() + + credentials = args.credentials + db_name = f'localhost/{args.db}' + dry_run = args.dry_run + + # All measurement 'types' associate with pits + types_pit = [ + 'sample_signal', 'grain_size', 'density', 'reflectance', + 'permittivity', 'lwc_vol', 'manual_wetness', + 'equivalent_diameter', 'specific_surface_area', 'grain_type', + 'temperature', 'hand_hardness' + ] + # Start a session + engine, session = get_db(db_name, credentials=credentials) + print(f"Connected to {db_name}") + try: + q = session.query(LayerData).filter( + LayerData.pit_id is not None # Filter to results with pit id + ).filter( + LayerData.type.in_(types_pit) # Filter to correct type + ) + result = q.count() + # Rough count of pits + estimated_number = int(result / float(len(types_pit)) / 10.0) + print(f"Found {result} records") + print(f"This is roughly {estimated_number} pits") + if dry_run: + print("THIS IS A DRYRUN, not deleting") + else: + if result > 0: + print("Deleting pits from the database") + # Delete + q.delete() + session.commit() + else: + print("No results, nothing to delete") + session.close() + except Exception as e: + print("Errored out, rolling back") + print(e) + session.rollback() + raise e + + print("Done") + + +if __name__ == '__main__': + main() From 90ed14d76e4836f01394b648b79d421f2c339977 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Tue, 23 Jul 2024 13:02:44 -0600 Subject: [PATCH 14/30] start using insitupy for metadata handling --- requirements.txt | 3 +- snowex_db/metadata.py | 114 +++++++----------------------------------- 2 files changed, 19 insertions(+), 98 deletions(-) diff --git a/requirements.txt b/requirements.txt index 38900b2..0c5d68e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,10 @@ wheel>0.34.0, <0.35.0 snowexsql>=0.4.1, <0.5.0 snowmicropyn -matplotlib>=3.2.2, <3.3.0 +matplotlib>=3.2.2 moto==3.1.11 coloredlogs>=14.0 progressbar2>=3.51.3 rasterio>=1.1.5 boto3>=1.23.7,<1.24 +insitupy==0.1.0 diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py index 09217ec..d2e276c 100644 --- a/snowex_db/metadata.py +++ b/snowex_db/metadata.py @@ -5,10 +5,10 @@ from os.path import basename import pandas as pd - +from insitupy.campaigns.metadata import MetaDataParser from snowexsql.db import get_table_attributes - from snowexsql.data import SiteData + from .interpretation import * from .projection import add_geom, reproject_point_in_dict from .string_management import * @@ -429,78 +429,6 @@ def rename_sample_profiles(self, columns, data_names): result.append(c) return result - def parse_column_names(self, lines): - """ - A flexible mnethod that attempts to find and standardize column names - for csv data. Looks for a comma separated line with N entries == to the - last line in the file. If an entry is found with more commas than the - last line then we use that. This allows us to have data that doesn't - have all the commas in the data (SSA typically missing the comma for - veg unless it was notable) - - Assumptions: - - 1. The last line in file is of representative csv data - - 2. The header is the last column that has more chars than numbers - - Args: - lines: Complete list of strings from the file - - Returns: - columns: list of column names - """ - - # Minimum column size should match the last line of data (Assumption - # #2) - n_columns = len(lines[-1].split(',')) - - # Use these to monitor if a larger column count is found - header_pos = 0 - if lines[0][0] == '#': - header_indicator = '#' - else: - header_indicator = None - - for i, l in enumerate(lines): - if i == 0: - previous = get_alpha_ratio(lines[i]) - else: - previous = get_alpha_ratio(lines[i - 1]) - - if line_is_header(l, expected_columns=n_columns, - header_indicator=header_indicator, - previous_alpha_ratio=previous): - header_pos = i - - if i > header_pos: - break - - self.log.debug('Found end of header at line {}...'.format(header_pos)) - - # Parse the columns header based on the size of the last line - str_line = lines[header_pos] - # Remove units - for c in ['()', '[]']: - str_line = strip_encapsulated(str_line, c) - - raw_cols = str_line.strip('#').split(',') - standard_cols = [standardize_key(c) for c in raw_cols] - - # Rename any column names to more standard ones - columns = remap_data_names(standard_cols, self.rename) - - # Determine the profile type - (self.data_names, self.multi_sample_profiles) = \ - self.determine_data_names(columns) - - self.data_names = remap_data_names(self.data_names, self.rename) - - if self.multi_sample_profiles: - columns = self.rename_sample_profiles(columns, self.data_names) - - return columns, header_pos - def determine_data_names(self, raw_columns): """ Determine the names of the data to be uploaded from the raw column @@ -574,33 +502,25 @@ def _read(self, filename): read_csv """ - with open(filename, encoding='latin') as fp: - lines = fp.readlines() - fp.close() - - # Site description files have no need for column lists - if 'site' in filename.lower(): - self.log.info('Parsing site description header...') - columns = None - header_pos = None - - # Site location parses all of the file - - # Find the column names and where it is in the file - else: - columns, header_pos = self.parse_column_names(lines) - self.log.debug('Column Data found to be {} columns based on Line ' - '{}'.format(len(columns), header_pos)) + parser = MetaDataParser( + filename, timezone=self.in_timezone, + header_sep=self.header_sep + ) + str_data, columns, header_pos = parser.find_header_info() + # Determine the profile type + (self.data_names, self.multi_sample_profiles) = \ + self.determine_data_names(columns) - # Only parse what we know if the header - lines = lines[0:header_pos] + self.data_names = remap_data_names(self.data_names, self.rename) - # Clean up the lines from line returns to grab header info - lines = [ln.strip() for ln in lines] - str_data = " ".join(lines).split('#') + if self.multi_sample_profiles: + columns = self.rename_sample_profiles(columns, self.data_names) + self.log.debug('Column Data found to be {} columns based on Line ' + '{}'.format(len(columns), header_pos)) # Keep track of the number of lines with # in it for data opening - self.length = len(str_data) + # TODO: what do we do here? + # self.length = len(str_data) # Key value pairs are separate by some separator provided. data = {} From f949f72e86438bd9818abf777bfe4ae3288a097a Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Tue, 23 Jul 2024 14:26:48 -0600 Subject: [PATCH 15/30] working through handling metadata --- snowex_db/interpretation.py | 29 +++++++++++++++++++++-------- snowex_db/metadata.py | 37 +++++++++++++++++++++---------------- 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/snowex_db/interpretation.py b/snowex_db/interpretation.py index 34928c1..d8de691 100644 --- a/snowex_db/interpretation.py +++ b/snowex_db/interpretation.py @@ -100,6 +100,14 @@ def manage_aspect(info): return info +def is_number(s): + try: + float(s) # Try to convert the string to a float + return True + except ValueError: + return False + + def convert_cardinal_to_degree(cardinal): """ Converts cardinal directions to degrees. Also removes any / or - that @@ -136,16 +144,21 @@ def convert_cardinal_to_degree(cardinal): # Manage extra characters separating composite dirs, make it all upper case d = ''.join([c.upper() for c in cardinal if c not in '/-']) - # Assume West, East, South, Or North - if len(d) > 3: - d = d[0] - warnings.warn("Assuming {} is {}".format(cardinal, d)) + # Go straight to degrees if numeric + if is_number(d): + degrees = float(d) - if d in dirs: - i = dirs.index(d) - degrees = i * (360. / len(dirs)) else: - raise ValueError('Invalid cardinal direction {}!'.format(cardinal)) + # Assume West, East, South, Or North + if len(d) > 3: + d = d[0] + warnings.warn("Assuming {} is {}".format(cardinal, d)) + + if d in dirs: + i = dirs.index(d) + degrees = i * (360. / len(dirs)) + else: + raise ValueError('Invalid cardinal direction {}!'.format(cardinal)) return degrees diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py index d2e276c..387eece 100644 --- a/snowex_db/metadata.py +++ b/snowex_db/metadata.py @@ -5,7 +5,7 @@ from os.path import basename import pandas as pd -from insitupy.campaigns.metadata import MetaDataParser +from insitupy.campaigns.campaign import SnowExMetadataParser from snowexsql.db import get_table_attributes from snowexsql.data import SiteData @@ -350,7 +350,9 @@ class DataHeader(object): 'epsg': None, 'header_sep': ',', 'northern_hemisphere': True, - 'depth_is_metadata': True} + 'depth_is_metadata': True, + 'allow_split_lines': False + } def __init__(self, filename, **kwargs): """ @@ -502,25 +504,28 @@ def _read(self, filename): read_csv """ - parser = MetaDataParser( + parser = SnowExMetadataParser( filename, timezone=self.in_timezone, - header_sep=self.header_sep + header_sep=self.header_sep, + allow_split_lines=self.allow_split_lines ) - str_data, columns, header_pos = parser.find_header_info() - # Determine the profile type - (self.data_names, self.multi_sample_profiles) = \ - self.determine_data_names(columns) + str_data, standard_cols, header_pos = parser.find_header_info() - self.data_names = remap_data_names(self.data_names, self.rename) + if standard_cols is not None: + # handle name remapping + columns = remap_data_names(standard_cols, self.rename) + # Determine the profile type + (self.data_names, self.multi_sample_profiles) = \ + self.determine_data_names(columns) - if self.multi_sample_profiles: - columns = self.rename_sample_profiles(columns, self.data_names) - self.log.debug('Column Data found to be {} columns based on Line ' - '{}'.format(len(columns), header_pos)) + self.data_names = remap_data_names(self.data_names, self.rename) - # Keep track of the number of lines with # in it for data opening - # TODO: what do we do here? - # self.length = len(str_data) + if self.multi_sample_profiles: + columns = self.rename_sample_profiles(columns, self.data_names) + self.log.debug('Column Data found to be {} columns based on Line ' + '{}'.format(len(columns), header_pos)) + else: + columns = standard_cols # Key value pairs are separate by some separator provided. data = {} From a11c8413cafc5f25ef86cc045c64ed0710f98889 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Tue, 23 Jul 2024 14:29:59 -0600 Subject: [PATCH 16/30] 2020 V2 data, allow split header line logic. ALSO - use the non-gap-filled density because the gap filled density files break the logic as they don't show the profile at all --- scripts/upload/add_time_series_pits_2020.py | 35 +++++++++++---------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py index ac53824..a23bb40 100644 --- a/scripts/upload/add_time_series_pits_2020.py +++ b/scripts/upload/add_time_series_pits_2020.py @@ -25,23 +25,19 @@ def main(): db_name = 'localhost/snowex' # TODO: write script to clear out the timeseries pits # * maybe delete all pits and then add them back in - # Version 1 DOI - # https://nsidc.org/data/snex20_ts_sp/versions/1 - doi = "https://doi.org/10.5067/POT9E0FFUUD1" + # Version 2 DOI + # https://nsidc.org/data/snex20_ts_sp/versions/2 + doi = "https://doi.org/10.5067/KZ43HVLZV6G4" debug = True - # TODO: new header of - # Pit Comments - # Parameter Codes - # Point to the downloaded data from - data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.001/') + data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.002/') error_msg = [] # Files to ignore ignore_files = [ - "SNEX20_TS_SP_Summary_Environment_v01.csv", - "SNEX20_TS_SP_Summary_SWE_v01.csv" + "SNEX20_TS_SP_Summary_Environment_v02.csv", + "SNEX20_TS_SP_Summary_SWE_v02.csv" ] # Get all the date folders @@ -54,7 +50,7 @@ def main(): site_ids = [] # Get the unique site ids for this date folder compiled = re.compile( - r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v01\.csv' + r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v02\.csv' ) for file_path in dt_folder_files: file_name = file_path.name @@ -88,21 +84,25 @@ def main(): str(udf), f'*_{site_id}_*perimeterDepths*.csv' )) - # all non-gapped filled_density - gap_filled_density = glob.glob(join( + # Use no-gap-filled density for the sole reason that + # Gap filled density for profiles where the scale was broken + # are just an empty file after the headers. We should + # Record that Nan density was collected for the profile + density_files = glob.glob(join( str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv' )) # Remove the site details from the total file list to get only the profiles = list( set(filenames) - set(sites) - set(perimeter_depths) - - set(gap_filled_density) # remove gap-filled denisty + set(density_files) # remove non-gap-filled denisty ) # Submit all profiles associated with pit at a time b = UploadProfileBatch( filenames=profiles, debug=debug, doi=doi, in_timezone=tz, - db_name=db_name + db_name=db_name, + allow_split_lines=True # Logic for split header lines ) b.push() error_msg += b.errors @@ -115,6 +115,8 @@ def main(): sd.push() error_msg += sd.errors + # TODO: upload SWE file like the perimiter depths + # Submit all perimeters as point data with db_session( db_name, credentials='credentials.json' @@ -122,7 +124,8 @@ def main(): for fp in perimeter_depths: pcsv = PointDataCSV( fp, doi=doi, debug=debug, depth_is_metadata=False, - in_timezone=tz + in_timezone=tz, + allow_split_lines=True # Logic for split header lines ) pcsv.submit(session) From 90a20a5832c5a219a2e2b2b01bffa14223cf327d Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Tue, 23 Jul 2024 15:07:17 -0600 Subject: [PATCH 17/30] get rid of spaces in flags --- snowex_db/upload.py | 49 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/snowex_db/upload.py b/snowex_db/upload.py index ee342b7..ac97a08 100644 --- a/snowex_db/upload.py +++ b/snowex_db/upload.py @@ -25,6 +25,10 @@ LOG = logging.getLogger("snowex_db.upload") +class DataValidationError(ValueError): + pass + + class UploadProfileData: """ Class for submitting a single profile. Since layers are uploaded layer by layer this allows for submitting them @@ -50,6 +54,36 @@ def __init__(self, profile_filename, **kwargs): # Use the files creation date as the date accessed for NSIDC citation self.date_accessed = get_file_creation_date(self.filename) + def _handle_force(self, df, profile_filename): + if 'force' in df.columns: + # Convert depth from mm to cm + df['depth'] = df['depth'].div(10) + is_smp = True + # Make the data negative from snow surface + depth_fmt = 'surface_datum' + + # SMP serial number and original filename for provenance to the comment + f = basename(profile_filename) + serial_no = f.split('SMP_')[-1][1:3] + + df['comments'] = f"fname = {f}, " \ + f"serial no. = {serial_no}" + + return df + + def _handle_flags(self, df): + + if "flags" in df.columns: + # Max length of the flags column + max_len = LayerData.flags.type.length + df["flags"] = df["flags"].str.replace(" ", "") + str_len = df["flags"].str.len() + if any(str_len > max_len): + raise DataValidationError( + f"Flag column is too long" + ) + return df + def _read(self, profile_filename): """ Read in a profile file. Managing the number of lines to skip and @@ -74,19 +108,8 @@ def _read(self, profile_filename): # Special SMP specific tasks depth_fmt = 'snow_height' is_smp = False - if 'force' in df.columns: - # Convert depth from mm to cm - df['depth'] = df['depth'].div(10) - is_smp = True - # Make the data negative from snow surface - depth_fmt = 'surface_datum' - - # SMP serial number and original filename for provenance to the comment - f = basename(profile_filename) - serial_no = f.split('SMP_')[-1][1:3] - df['comments'] = f"fname = {f}, " \ - f"serial no. = {serial_no}" + df = self._handle_force(df, profile_filename) if not df.empty: # Standardize all depth data @@ -183,6 +206,8 @@ def build_data(self, data_name): df['comments'] = df['comments'].apply( lambda x: x.strip(' ') if isinstance(x, str) else x) + self._handle_flags(df) + return df def submit(self, session): From 94ddad031508158e604d6e7f85c6ebaf107ea824 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 24 Jul 2024 13:56:35 -0600 Subject: [PATCH 18/30] Script for 2021 pits is working --- scripts/upload/add_time_series_pits_2021.py | 29 ++++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/scripts/upload/add_time_series_pits_2021.py b/scripts/upload/add_time_series_pits_2021.py index 2faaf14..132ded6 100644 --- a/scripts/upload/add_time_series_pits_2021.py +++ b/scripts/upload/add_time_series_pits_2021.py @@ -28,6 +28,12 @@ def main(): data_dir = abspath('../download/data/SNOWEX/SNEX21_TS_SP.001/') error_msg = [] + # Files to ignore + ignore_files = [ + "SNEX21_TS_SP_Summary_Environment_v01.csv", + "SNEX21_TS_SP_Summary_SWE_v01.csv" + ] + # Get all the date folders unique_dt_olders = Path( data_dir @@ -35,13 +41,17 @@ def main(): for udf in unique_dt_olders: # get all the csvs in the folder dt_folder_files = list(udf.glob("*.csv")) - all_file_names = [f.name for f in dt_folder_files] site_ids = [] # Get the unique site ids for this date folder compiled = re.compile( r'SNEX21_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v01\.csv' ) - for file_name in all_file_names: + for file_path in dt_folder_files: + file_name = file_path.name + if file_name in ignore_files: + print(f"Skipping {file_name}") + continue + match = compiled.match(file_name) if match: code = match.group(1) @@ -64,21 +74,25 @@ def main(): str(udf), f'*_{site_id}_*siteDetails*.csv' )) - # all non-gapped filled_density - gap_filled_density = glob.glob(join( + # Use no-gap-filled density for the sole reason that + # Gap filled density for profiles where the scale was broken + # are just an empty file after the headers. We should + # Record that Nan density was collected for the profile + density_files = glob.glob(join( str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv' )) # Remove the site details from the total file list to get only the profiles = list( set(filenames) - set(sites) - - set(gap_filled_density) # remove gap-filled denisty + set(density_files) # remove non-gap-filled denisty ) # Submit all profiles associated with pit at a time b = UploadProfileBatch( filenames=profiles, debug=debug, doi=doi, in_timezone=tz, - db_name=db_name + db_name=db_name, + allow_split_lines=True # Logic for split header lines ) b.push() error_msg += b.errors @@ -86,7 +100,8 @@ def main(): # Upload the site details sd = UploadSiteDetailsBatch( filenames=sites, debug=debug, doi=doi, in_timezone=tz, - db_name=db_name + db_name=db_name, + allow_split_lines=True # Logic for split header lines ) sd.push() error_msg += sd.errors From dd1547fee5f6bd3f80b416c3cc3adb59eaa3cee4 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 24 Jul 2024 16:05:54 -0600 Subject: [PATCH 19/30] start working on SWE files for pits --- scripts/upload/add_pits_bulk_properties.py | 67 +++++++++++++++++++++ scripts/upload/add_time_series_pits_2020.py | 3 +- 2 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 scripts/upload/add_pits_bulk_properties.py diff --git a/scripts/upload/add_pits_bulk_properties.py b/scripts/upload/add_pits_bulk_properties.py new file mode 100644 index 0000000..c261427 --- /dev/null +++ b/scripts/upload/add_pits_bulk_properties.py @@ -0,0 +1,67 @@ +""" +Script to upload the Snowex Time Series pits +""" + +import glob +import re +from os.path import abspath, join +from pathlib import Path + +import pandas as pd + +from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch +from snowex_db.upload import PointDataCSV +from snowex_db import db_session + + +tz_map = {'US/Pacific': ['CA', 'NV', 'WA'], + 'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'], + } + + +def main(): + """ + Add bulk SWE, Depth, Density for 2020 and 2021 timeseires pits + """ + db_name = 'localhost/test' + debug = True + + # Point to the downloaded data from + data_dir = abspath('../download/data/SNOWEX/') + error_msg = [] + + path_details = [ + { + "DOI": "https://doi.org/10.5067/KZ43HVLZV6G4", + "path": "SNEX20_TS_SP.002/2019.10.24/SNEX20_TS_SP_Summary_SWE_v02.csv" + }, + { + "DOI": "https://doi.org/10.5067/QIANJYJGRWOV", + "path": "SNEX21_TS_SP.001/2020.11.16/SNEX21_TS_SP_Summary_SWE_v01.csv" + } + ] + for info in path_details: + doi = info["DOI"] + file_path = join(data_dir, info["path"]) + # Read csv and dump new one without the extra header lines + df = pd.read_csv( + file_path, + skiprows=list(range(32)) + [33] + ) + new_name = file_path.replace(".csv", "_modified.csv") + df.to_csv(new_name, index=False) + + # Submit SWE file data as point data + with db_session( + db_name, credentials='credentials.json' + ) as (session, engine): + # TODO: tz based on points + pcsv = PointDataCSV( + new_name, doi=doi, debug=debug, depth_is_metadata=False, + in_timezone="US/Mountain", + ) + pcsv.submit(session) + + +if __name__ == '__main__': + main() diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py index a23bb40..edb13b6 100644 --- a/scripts/upload/add_time_series_pits_2020.py +++ b/scripts/upload/add_time_series_pits_2020.py @@ -19,8 +19,7 @@ def main(): """ - Currenltly based on the preliminary downloaded zip which has not been submitted yet. - Folder name is SNEX20_TS_SP_preliminary_v4 + Add 2020 timeseries pits """ db_name = 'localhost/snowex' # TODO: write script to clear out the timeseries pits From afaaa5b4b480fadcf47ca0fd56af4f25a6c6166d Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Thu, 25 Jul 2024 11:44:49 -0600 Subject: [PATCH 20/30] move towards row based SRID and timezone ability --- scripts/upload/add_pits_bulk_properties.py | 16 +++++- snowex_db/interpretation.py | 1 + snowex_db/metadata.py | 19 +++++-- snowex_db/upload.py | 58 +++++++++++++++++----- 4 files changed, 75 insertions(+), 19 deletions(-) diff --git a/scripts/upload/add_pits_bulk_properties.py b/scripts/upload/add_pits_bulk_properties.py index c261427..4a5c6bd 100644 --- a/scripts/upload/add_pits_bulk_properties.py +++ b/scripts/upload/add_pits_bulk_properties.py @@ -49,6 +49,16 @@ def main(): skiprows=list(range(32)) + [33] ) new_name = file_path.replace(".csv", "_modified.csv") + # Filter to columns we want (density, swe, etc) + columns = [ + 'Location', 'Site', 'PitID', 'Date/Local Standard Time', 'UTM Zone', + 'Easting (m)', 'Northing (m)', 'Latitude (deg)', 'Longitude (deg)', + # 'Density A Mean (kg/m^3)', 'Density B Mean (kg/m^3)', + 'Density Mean (kg/m^3)', + # 'SWE A (mm)', 'SWE B (mm)', + 'SWE (mm)', 'HS (cm)', 'Flag' + ] + df = df.loc[:, columns] df.to_csv(new_name, index=False) # Submit SWE file data as point data @@ -57,8 +67,10 @@ def main(): ) as (session, engine): # TODO: tz based on points pcsv = PointDataCSV( - new_name, doi=doi, debug=debug, depth_is_metadata=False, - in_timezone="US/Mountain", + new_name, doi=doi, debug=debug, + depth_is_metadata=False, + row_based_crs=True, + row_based_timezone=True ) pcsv.submit(session) diff --git a/snowex_db/interpretation.py b/snowex_db/interpretation.py index d8de691..743d043 100644 --- a/snowex_db/interpretation.py +++ b/snowex_db/interpretation.py @@ -216,6 +216,7 @@ def add_date_time_keys(data, in_timezone=None, out_timezone='UTC'): raise ValueError("We did not recieve a valid in_timezone") # Look for a single header entry containing date and time. + # This would handle key of 'datetime' for k in data.keys(): kl = k.lower() if 'date' in kl and 'time' in kl: diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py index 387eece..17f9e88 100644 --- a/snowex_db/metadata.py +++ b/snowex_db/metadata.py @@ -326,6 +326,7 @@ class DataHeader(object): 'measurement_tool': 'instrument', 'avgdensity': 'density', 'avg_density': 'density', + 'density_mean': 'density', 'dielectric_constant': 'permittivity', 'flag': 'flags', 'hs': 'depth', @@ -374,12 +375,20 @@ def __init__(self, filename, **kwargs): self.extra_header = assign_default_kwargs( self, kwargs, self.defaults, leave=['epsg']) - # Validate that an intentionally good in timezone was given - in_timezone = kwargs.get('in_timezone') - if in_timezone is None or "local" in in_timezone.lower(): - raise ValueError("A valid in_timezone was not provided") + # Use a row based timezone + if kwargs.get("row_based_timezone", False): + if kwargs.get('in_timezone'): + raise ValueError( + "Cannot have row based and file based timezone" + ) + self.in_timezone = None else: - self.in_timezone = in_timezone + # Validate that an intentionally good in timezone was given + in_timezone = kwargs.get('in_timezone') + if in_timezone is None or "local" in in_timezone.lower(): + raise ValueError("A valid in_timezone was not provided") + else: + self.in_timezone = in_timezone self.log.info('Interpreting metadata in {}'.format(filename)) diff --git a/snowex_db/upload.py b/snowex_db/upload.py index ac97a08..3f7b75a 100644 --- a/snowex_db/upload.py +++ b/snowex_db/upload.py @@ -11,6 +11,7 @@ from os import makedirs, remove import boto3 import logging +from timezonefinder import TimezoneFinder from snowexsql.db import get_table_attributes from snowexsql.data import ImageData, LayerData, PointData @@ -271,11 +272,22 @@ def __init__(self, filename, **kwargs): # Assign defaults for this class self.kwargs = assign_default_kwargs(self, kwargs, self.defaults) + # Assign if details are row based (generally for the SWE files) + self._row_based_crs = self.kwargs.get("row_based_crs", False) + self._row_based_tz = self.kwargs.get("row_based_timezone", False) + if self._row_based_tz: + in_timezone = None + else: + in_timezone = kwargs['in_timezone'] + # Use the files creation date as the date accessed for NSIDC citation self.date_accessed = get_file_creation_date(filename) # NOTE: This will error if in_timezone is not provided - self.hdr = DataHeader(filename, in_timezone=kwargs['in_timezone'], **self.kwargs) + self.hdr = DataHeader( + filename, in_timezone=in_timezone, + **self.kwargs + ) self.df = self._read(filename) # Performance tracking @@ -307,9 +319,21 @@ def _read(self, filename): df['date'] = self.hdr.info['date'] df['time'] = self.hdr.info['time'] else: - # date/time was provided in the data - df = df.apply(lambda data: add_date_time_keys( - data, in_timezone=self.in_timezone), axis=1) + # date/time was provided in the + if self._row_based_tz: + # row based in timezone + df = df.apply( + lambda data: add_date_time_keys( + data, + in_timezone=TimezoneFinder().timezone_at( + lng=data['longitude'], lat=data['latitude'] + ) + ), axis=1 + ) + else: + # file based timezone + df = df.apply(lambda data: add_date_time_keys( + data, in_timezone=self.in_timezone), axis=1) # 1. Only submit valid columns to the DB self.log.info('Adding valid keyword arguments to metadata...') @@ -327,22 +351,33 @@ def _read(self, filename): df[k] = self.hdr.info[k] # Add geometry - df['geom'] = df.apply(lambda row: WKTElement( - 'POINT({} {})'.format( - row['easting'], - row['northing']), + if self._row_based_crs: + # EPSG at row level here (EPSG:269...) + df['geom'] = df.apply(lambda row: WKTElement( + 'POINT({} {})'.format( + row['easting'], + row['northing']), + srid=f"EPSG:269{row['utm_zone']}"), axis=1) + else: + # EPSG at the file level + df['geom'] = df.apply(lambda row: WKTElement( + 'POINT({} {})'.format( + row['easting'], + row['northing']), srid=self.hdr.info['epsg']), axis=1) - # 2. Add all kwargs that were valid for v in valid: if v in self.kwargs.keys(): df[v] = self.kwargs[v] - # Add a camera id to the description if camera is in the cols (For camera derived snow depths) + # Add a camera id to the description if camera is in the cols + # (For camera derived snow depths) if 'camera' in df.columns: self.log.info('Adding camera id to equipment column...') - df['equipment'] = df.apply(lambda row: f'camera id = {row["camera"]}', axis=1) + df['equipment'] = df.apply( + lambda row: f'camera id = {row["camera"]}', axis=1 + ) # 3. Remove columns that are not valid drops = \ @@ -384,7 +419,6 @@ def submit(self, session): df = self.build_data(pt) self.log.info('Submitting {:,} points of {} to the database...'.format( len(df.index), pt)) - for i, row in df.iterrows(): d = PointData(**row) objects.append(d) From 4376a41bd96fe89430d5d02b8e4436b8ba649613 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Thu, 25 Jul 2024 14:44:15 -0600 Subject: [PATCH 21/30] bulk swe property upload script working --- requirements.txt | 1 + scripts/upload/add_pits_bulk_properties.py | 11 +---------- snowex_db/upload.py | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0c5d68e..29e0d70 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ coloredlogs>=14.0 progressbar2>=3.51.3 rasterio>=1.1.5 boto3>=1.23.7,<1.24 +timezonefinder>=6.0,<7.0 insitupy==0.1.0 diff --git a/scripts/upload/add_pits_bulk_properties.py b/scripts/upload/add_pits_bulk_properties.py index 4a5c6bd..f2c6290 100644 --- a/scripts/upload/add_pits_bulk_properties.py +++ b/scripts/upload/add_pits_bulk_properties.py @@ -9,21 +9,15 @@ import pandas as pd -from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch from snowex_db.upload import PointDataCSV from snowex_db import db_session -tz_map = {'US/Pacific': ['CA', 'NV', 'WA'], - 'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'], - } - - def main(): """ Add bulk SWE, Depth, Density for 2020 and 2021 timeseires pits """ - db_name = 'localhost/test' + db_name = 'localhost/snowex' debug = True # Point to the downloaded data from @@ -53,9 +47,7 @@ def main(): columns = [ 'Location', 'Site', 'PitID', 'Date/Local Standard Time', 'UTM Zone', 'Easting (m)', 'Northing (m)', 'Latitude (deg)', 'Longitude (deg)', - # 'Density A Mean (kg/m^3)', 'Density B Mean (kg/m^3)', 'Density Mean (kg/m^3)', - # 'SWE A (mm)', 'SWE B (mm)', 'SWE (mm)', 'HS (cm)', 'Flag' ] df = df.loc[:, columns] @@ -65,7 +57,6 @@ def main(): with db_session( db_name, credentials='credentials.json' ) as (session, engine): - # TODO: tz based on points pcsv = PointDataCSV( new_name, doi=doi, debug=debug, depth_is_metadata=False, diff --git a/snowex_db/upload.py b/snowex_db/upload.py index 3f7b75a..43097c0 100644 --- a/snowex_db/upload.py +++ b/snowex_db/upload.py @@ -357,7 +357,7 @@ def _read(self, filename): 'POINT({} {})'.format( row['easting'], row['northing']), - srid=f"EPSG:269{row['utm_zone']}"), axis=1) + srid=int(row['epsg'])), axis=1) else: # EPSG at the file level df['geom'] = df.apply(lambda row: WKTElement( From 295493ae626a7714e029c150fdc0f3af0aff9946 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Mon, 29 Jul 2024 17:05:17 -0600 Subject: [PATCH 22/30] start script to add in met timeseries data --- scripts/download/nsidc_sources.txt | 1 + scripts/upload/add_met_timeseries.py | 117 ++++++++++++++++++ ...=> add_timeseries_pits_bulk_properties.py} | 0 snowex_db/metadata.py | 14 ++- 4 files changed, 126 insertions(+), 6 deletions(-) create mode 100644 scripts/upload/add_met_timeseries.py rename scripts/upload/{add_pits_bulk_properties.py => add_timeseries_pits_bulk_properties.py} (100%) diff --git a/scripts/download/nsidc_sources.txt b/scripts/download/nsidc_sources.txt index 43639ea..3b67c5b 100644 --- a/scripts/download/nsidc_sources.txt +++ b/scripts/download/nsidc_sources.txt @@ -8,3 +8,4 @@ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_UNM_GPR.001/2020.01.28/SNEX20_UNM_G https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD_TLI.001/2019.09.29/SNEX20_SD_TLI_clean.csv https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_TS_SP.002/ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX21_TS_SP.001/ +https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX_Met.001/ diff --git a/scripts/upload/add_met_timeseries.py b/scripts/upload/add_met_timeseries.py new file mode 100644 index 0000000..1b19e29 --- /dev/null +++ b/scripts/upload/add_met_timeseries.py @@ -0,0 +1,117 @@ +""" +Uploads SnowEx temporary met stations to the database + +Source: https://nsidc.org/data/snex_met/versions/1 +User guide: https://nsidc.org/sites/default/files/documents/user-guide/snex_met-v001-userguide.pdf + +1. Data must be downloaded via sh ../download/download_nsidc.sh +2A. python run.py # To run all together all at once +2B. python add_met_timeseries.py # To run individually +""" + +import glob +import time +from os.path import abspath, join + +import pandas as pd +from snowexsql.db import get_db +from snowex_db.upload import * +from snowex_db import db_session + + +def main(): + # Site name + start = time.time() + site_name = 'Grand Mesa' + timezone = 'MST' + + # Read in the Grand Mesa Snow Depths Data + base = abspath(join('../download/data/SNOWEX/SNEX_Met.001/')) + + # Start the Database + db_name = 'localhost/test' + + csvs = glob.glob(join(base, '*/*.csv')) + + # Location mapping from the user guide + location_mapping = { + "GMSP": [39.05084, -108.06144], + "LSOS": [39.05225, -108.09792], + "ME": [39.10358, -107.88383], + "MM": [39.03954, -107.94174], + "MW": [39.03388, -108.21399], + } + + variable_unit_map = { + "RH_10ft": "percent", + "RH_20ft": "percent", + "BP_kPa_Avg": "kPa", + "AirTC_20ft_Avg": "degrees Celcius", + "AirTC_10ft_Avg": "degrees Celcius", + "WSms_20ft_Avg": "m/s", + "WSms_10ft_Avg": "m/s", + "WindDir_10ft_D1_WVT": "degrees", + "WindDir_20ft_D1_WVT": "degrees", + "SUp_Avg": "W/m^2", + "SDn_Avg": "W/m^2", + "LUpCo_Avg": "W/m^2", + "LDnCo_Avg": "W/m^2", + "SM_5cm_Avg": None, + "SM_20cm_Avg": None, + "SM_50cm_Avg": None, + "TC_5cm_Avg": "degrees Celcius", + "TC_20cm_Avg": "degrees Celcius", + "TC_50cm_Avg": "degrees Celcius", + # "DistanceSensToGnd(m)", + "SnowDepthFilter(m)": "m" + } + + errors = 0 + with db_session( + db_name, credentials='credentials.json' + ) as (session, engine): + + for f in csvs: + # find the point relative to the file + point_id = f.split("Met_")[-1].split("_final")[0] + # get location info from the point id + lat, lon = location_mapping[point_id] + + # Read in the file + df = pd.read_csv(f) + # add location info + df["latitude"] = [lat] * len(df) + df["longitude"] = [lon] * len(df) + df = df.set_index("TIMESTAMP") + # TODO: what do we do with site_id? is MM the site id? + # we can add it as "site" to the df if it is + df["site"] = [point_id] * len(df) + + # TODO: how do we handle to different heights? + # use layer data? + + # Split variables into their own files + for v, unit in variable_unit_map.items(): + df_cut = df.loc[ + :, [v, "latitude", "longitude", "site"] + ] + + new_f = f.replace(".csv", f"local_mod_{v}.csv") + df_cut.to_csv(new_f, index_label="datetime") + csv = PointDataCSV( + new_f, + depth_is_metadata=False, + units=unit, + site_name=site_name, + in_timezone=timezone, + epsg=26912, + doi="https://doi.org/10.5067/497NQVJ0CBEX") + + csv.submit(session) + errors += len(csv.errors) + + return errors + + +if __name__ == '__main__': + main() diff --git a/scripts/upload/add_pits_bulk_properties.py b/scripts/upload/add_timeseries_pits_bulk_properties.py similarity index 100% rename from scripts/upload/add_pits_bulk_properties.py rename to scripts/upload/add_timeseries_pits_bulk_properties.py diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py index 17f9e88..a1ea4d0 100644 --- a/snowex_db/metadata.py +++ b/snowex_db/metadata.py @@ -338,11 +338,13 @@ class DataHeader(object): } # Known possible profile types anything not in here will throw an error - available_data_names = ['density', 'permittivity', 'lwc_vol', 'temperature', - 'force', 'reflectance', 'sample_signal', - 'specific_surface_area', 'equivalent_diameter', - 'grain_size', 'hand_hardness', 'grain_type', - 'manual_wetness', 'two_way_travel', 'depth', 'swe'] + available_data_names = [ + 'density', 'permittivity', 'lwc_vol', 'temperature', + 'force', 'reflectance', 'sample_signal', + 'specific_surface_area', 'equivalent_diameter', + 'grain_size', 'hand_hardness', 'grain_type', + 'manual_wetness', 'two_way_travel', 'depth', 'swe', + ] # Defaults to keywords arguments defaults = { @@ -406,7 +408,7 @@ def __init__(self, filename, **kwargs): def submit(self, session): """ - Submit meta data to the database as site info, Do not use on profile + Submit metadata to the database as site info, Do not use on profile headers. Only use on site_details files. Args: From 55b6326f767894acb793b3e274191238e1301a0e Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Tue, 30 Jul 2024 15:47:34 -0600 Subject: [PATCH 23/30] Script working to upload met data for GM to the database --- requirements.txt | 1 + scripts/upload/add_met_timeseries.py | 127 +++++++++++++++++---------- snowex_db/metadata.py | 18 +++- 3 files changed, 99 insertions(+), 47 deletions(-) diff --git a/requirements.txt b/requirements.txt index 29e0d70..8238ad8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ rasterio>=1.1.5 boto3>=1.23.7,<1.24 timezonefinder>=6.0,<7.0 insitupy==0.1.0 +metloom==0.6.1 diff --git a/scripts/upload/add_met_timeseries.py b/scripts/upload/add_met_timeseries.py index 1b19e29..b07e1cc 100644 --- a/scripts/upload/add_met_timeseries.py +++ b/scripts/upload/add_met_timeseries.py @@ -12,6 +12,7 @@ import glob import time from os.path import abspath, join +from metloom.pointdata.snowex import SnowExMetInfo import pandas as pd from snowexsql.db import get_db @@ -31,39 +32,77 @@ def main(): # Start the Database db_name = 'localhost/test' - csvs = glob.glob(join(base, '*/*.csv')) - - # Location mapping from the user guide - location_mapping = { - "GMSP": [39.05084, -108.06144], - "LSOS": [39.05225, -108.09792], - "ME": [39.10358, -107.88383], - "MM": [39.03954, -107.94174], - "MW": [39.03388, -108.21399], - } - + # Variables we will use variable_unit_map = { - "RH_10ft": "percent", - "RH_20ft": "percent", - "BP_kPa_Avg": "kPa", - "AirTC_20ft_Avg": "degrees Celcius", - "AirTC_10ft_Avg": "degrees Celcius", - "WSms_20ft_Avg": "m/s", - "WSms_10ft_Avg": "m/s", - "WindDir_10ft_D1_WVT": "degrees", - "WindDir_20ft_D1_WVT": "degrees", - "SUp_Avg": "W/m^2", - "SDn_Avg": "W/m^2", - "LUpCo_Avg": "W/m^2", - "LDnCo_Avg": "W/m^2", - "SM_5cm_Avg": None, - "SM_20cm_Avg": None, - "SM_50cm_Avg": None, - "TC_5cm_Avg": "degrees Celcius", - "TC_20cm_Avg": "degrees Celcius", - "TC_50cm_Avg": "degrees Celcius", + "RH_10ft": { + "units": "percent", + "notes": "Relative humidity measured at 10 ft tower level", + "instrument": "Campbell Scientific HC2S3" + }, + # "RH_20ft": "percent", + "BP_kPa_Avg": { + "units": "kPa", + "notes": "Barometric pressure", + "instrument": "Campbell Scientific CS106", + }, + # "AirTC_20ft_Avg": "degrees Celcius", + "AirTC_10ft_Avg": { + "units": "degrees Celcius", + "notes": "Air temperature measured at 10 ft tower level", + "instrument": "Campbell Scientific HC2S3" + }, + # "WSms_20ft_Avg": "m/s", + "WSms_10ft_Avg": { + "units": "m/s", + "notes": "Vector mean wind speed measured at 10 ft tower level", + "instrument": "R.M. Young 05103", + }, + "WindDir_10ft_D1_WVT": { + "units": "degrees", + "notes": "Vector mean wind direction measured at 10 ft tower level", + "instrument": "R.M. Young 05103", + }, + # "WindDir_20ft_D1_WVT": "degrees", + "SUp_Avg": { + "units": "W/m^2", + "notes": "Shortwave radiation measured with upward-facing sensor", + "instrument": "Kipp and Zonnen CNR4", + }, + "SDn_Avg": { + "units": "W/m^2", + "notes": "Shortwave radiation measured with downward-facing sensor", + "instrument": "Kipp and Zonnen CNR4", + }, + "LUpCo_Avg": { + "units": "W/m^2", + "notes": "Longwave radiation measured with upward-facing sensor", + "instrument": "Kipp and Zonnen CNR4", + }, + "LDnCo_Avg": { + "units": "W/m^2", + "notes": "Longwave radiation measured with downward-facing sensor", + "instrument": "Kipp and Zonnen CNR4", + }, + # "SM_5cm_Avg": None, + "SM_20cm_Avg": { + "units": None, + "notes": "Soil moisture measured at 10 cm below the soil", + "instrument": "Stevens Water Hydraprobe II", + }, + # "SM_50cm_Avg": None, + # "TC_5cm_Avg": "degrees Celcius", + "TC_20cm_Avg": { + "units": "degrees Celcius", + "notes": "Soil temperature measured at 10 cm below the soil", + "instrument": "Stevens Water Hydraprobe II", + }, + # "TC_50cm_Avg": "degrees Celcius", # "DistanceSensToGnd(m)", - "SnowDepthFilter(m)": "m" + "SnowDepthFilter(m)": { + "units": "m", + "notes": "Temperature corrected, derived snow surface height (filtered)", + "instrument": "Campbell Scientific SR50A", + }, } errors = 0 @@ -71,30 +110,26 @@ def main(): db_name, credentials='credentials.json' ) as (session, engine): - for f in csvs: - # find the point relative to the file - point_id = f.split("Met_")[-1].split("_final")[0] - # get location info from the point id - lat, lon = location_mapping[point_id] - + for stn_obj in SnowExMetInfo: + f = join(base, stn_obj.path) # Read in the file df = pd.read_csv(f) # add location info - df["latitude"] = [lat] * len(df) - df["longitude"] = [lon] * len(df) + df["latitude"] = [stn_obj.latitude] * len(df) + df["longitude"] = [stn_obj.longitude] * len(df) df = df.set_index("TIMESTAMP") - # TODO: what do we do with site_id? is MM the site id? - # we can add it as "site" to the df if it is - df["site"] = [point_id] * len(df) - - # TODO: how do we handle to different heights? - # use layer data? + # SITE ID - use station id + df["site"] = [stn_obj.station_id] * len(df) + df["observer"] = ["P. Houser"] * len(df) # Split variables into their own files - for v, unit in variable_unit_map.items(): + for v, info in variable_unit_map.items(): + unit = info["units"] + df_cut = df.loc[ :, [v, "latitude", "longitude", "site"] ] + df_cut["instrument"] = [info["instrument"]] * len(df_cut) new_f = f.replace(".csv", f"local_mod_{v}.csv") df_cut.to_csv(new_f, index_label="datetime") diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py index a1ea4d0..8b51906 100644 --- a/snowex_db/metadata.py +++ b/snowex_db/metadata.py @@ -334,7 +334,19 @@ class DataHeader(object): 'depth_m': 'depth', 'date_dd_mmm_yy': 'date', 'time_gmt': 'time', - 'elev_m': 'elevation' + 'elev_m': 'elevation', + 'rh_10ft': 'relative_humidity_10ft', + 'bp_kpa_avg': 'barometric_pressure', + 'airtc_10ft_avg': 'air_temperature_10ft', + 'wsms_10ft_avg': 'wind_speed_10ft', + 'winddir_10ft_d1_wvt': 'wind_direction_10ft', + 'sup_avg': 'incoming_shortwave', + 'sdn_avg': 'outgoing_shortwave', + 'lupco_avg': 'incoming_longwave', + 'ldnco_avg': 'outgoing_longwave', + 'soil_moisture_20cm': 'soil_moisture_20cm', + 'soil_temperature_20cm': 'soil_temperature_20cm', + 'snowdepthfilter(m)': 'depth' } # Known possible profile types anything not in here will throw an error @@ -344,6 +356,10 @@ class DataHeader(object): 'specific_surface_area', 'equivalent_diameter', 'grain_size', 'hand_hardness', 'grain_type', 'manual_wetness', 'two_way_travel', 'depth', 'swe', + 'relative_humidity_10ft', 'barometric_pressure', + 'air_temperature_10ft', 'wind_speed_10ft', 'wind_direction_10ft', + 'incoming_shortwave', 'outgoing_shortwave', 'incoming_longwave', + 'outgoing_longwave', 'soil_moisture_20cm', 'soil_temperature_20cm' ] # Defaults to keywords arguments From 5eb92d4d2120ced5df4cc01751854f00866e8294 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Tue, 30 Jul 2024 16:10:17 -0600 Subject: [PATCH 24/30] Issue #20 - bump insitupy for new variables --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8238ad8..4a652f3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,5 +8,5 @@ progressbar2>=3.51.3 rasterio>=1.1.5 boto3>=1.23.7,<1.24 timezonefinder>=6.0,<7.0 -insitupy==0.1.0 +insitupy==0.1.1 metloom==0.6.1 From 690873fe8686e4cfbce26f4f8cfd8ef7a2e7a63e Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Tue, 30 Jul 2024 17:00:34 -0600 Subject: [PATCH 25/30] adjustments to variable mapping --- snowex_db/metadata.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py index 8b51906..49d0141 100644 --- a/snowex_db/metadata.py +++ b/snowex_db/metadata.py @@ -337,15 +337,15 @@ class DataHeader(object): 'elev_m': 'elevation', 'rh_10ft': 'relative_humidity_10ft', 'bp_kpa_avg': 'barometric_pressure', - 'airtc_10ft_avg': 'air_temperature_10ft', + 'airtc_10ft_avg': 'air_temp_10ft', 'wsms_10ft_avg': 'wind_speed_10ft', 'winddir_10ft_d1_wvt': 'wind_direction_10ft', 'sup_avg': 'incoming_shortwave', 'sdn_avg': 'outgoing_shortwave', 'lupco_avg': 'incoming_longwave', 'ldnco_avg': 'outgoing_longwave', - 'soil_moisture_20cm': 'soil_moisture_20cm', - 'soil_temperature_20cm': 'soil_temperature_20cm', + 'sm_20cm_avg': 'soil_moisture_20cm', + 'tc_20cm_avg': 'soil_temp_20cm', 'snowdepthfilter(m)': 'depth' } @@ -357,9 +357,9 @@ class DataHeader(object): 'grain_size', 'hand_hardness', 'grain_type', 'manual_wetness', 'two_way_travel', 'depth', 'swe', 'relative_humidity_10ft', 'barometric_pressure', - 'air_temperature_10ft', 'wind_speed_10ft', 'wind_direction_10ft', + 'air_temp_10ft', 'wind_speed_10ft', 'wind_direction_10ft', 'incoming_shortwave', 'outgoing_shortwave', 'incoming_longwave', - 'outgoing_longwave', 'soil_moisture_20cm', 'soil_temperature_20cm' + 'outgoing_longwave', 'soil_moisture_20cm', 'soil_temp_20cm' ] # Defaults to keywords arguments @@ -389,6 +389,7 @@ def __init__(self, filename, **kwargs): kwargs: keyword values to pass to the database as metadata """ self.log = get_logger(__name__) + self._fname = filename self.extra_header = assign_default_kwargs( self, kwargs, self.defaults, leave=['epsg']) From 2d38b0a9cc46b616a9f35e2ed1e0bab6f1435f29 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 31 Jul 2024 10:58:58 -0600 Subject: [PATCH 26/30] bump insitupy for more depth mappings --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 4a652f3..bece44e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,5 +8,5 @@ progressbar2>=3.51.3 rasterio>=1.1.5 boto3>=1.23.7,<1.24 timezonefinder>=6.0,<7.0 -insitupy==0.1.1 +insitupy==0.1.2 metloom==0.6.1 From 356bfb35bdc71d24c2e18508d6cdbb441f07a144 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Wed, 31 Jul 2024 11:08:10 -0600 Subject: [PATCH 27/30] Remove Python 3.7 compatability --- .github/workflows/main.yml | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c457759..2667915 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9] + python-version: [3.8, 3.9, "3.10"] services: diff --git a/setup.py b/setup.py index 0713dd3..44171fc 100644 --- a/setup.py +++ b/setup.py @@ -18,14 +18,14 @@ setup( author="Micah Johnson", - python_requires='>=3.7', + python_requires='>=3.8', classifiers=[ 'Development Status :: 2 - Pre-Alpha', 'Intended Audience :: Developers', 'Natural Language :: English', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', ], description="Software for building and managing a SnowEx PostGIS database", From 47ff2b5bfb11bf924a3787ec997323422b49e341 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Mon, 5 Aug 2024 14:03:42 -0600 Subject: [PATCH 28/30] fixing reqs in build --- docs/requirements.txt | 1 + requirements_dev.txt | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 4d7fb78..d86dd2f 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -6,3 +6,4 @@ pandoc==1.0.2 sphinxcontrib-apidoc==0.3.0 ipython==7.31.1 MarkupSafe<2.1.0 +jupyterlab==2.2.10 diff --git a/requirements_dev.txt b/requirements_dev.txt index faafada..b4b33eb 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -8,5 +8,4 @@ coverage==4.5.4 twine==1.14.0 pytest==6.2.3 pytest-runner==5.1 -jupyterlab==2.2.10 moto==3.1.11 From bba3285eba32ed756ebfc4c15fbb44380bde6f8c Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Mon, 5 Aug 2024 14:40:18 -0600 Subject: [PATCH 29/30] Fixing tests and build. SMP profile depths were not inverted --- snowex_db/metadata.py | 103 +++++++++++++++++++++++++++++++++++++-- snowex_db/upload.py | 9 +++- tests/test_batch.py | 5 +- tests/test_projection.py | 2 +- 4 files changed, 111 insertions(+), 8 deletions(-) diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py index 49d0141..c1a812e 100644 --- a/snowex_db/metadata.py +++ b/snowex_db/metadata.py @@ -6,6 +6,8 @@ from os.path import basename import pandas as pd from insitupy.campaigns.campaign import SnowExMetadataParser +from insitupy.campaigns.variables import SnowExProfileVariables, \ + MeasurementDescription from snowexsql.db import get_table_attributes from snowexsql.data import SiteData @@ -156,9 +158,14 @@ def _read(self, filename): str_cols = remap_data_names(str_cols, DataHeader.rename) dtype = {k: str for k in str_cols} - df = pd.read_csv(filename, header=header_pos, names=str_cols, - usecols=range(n_cols), encoding='latin', - parse_dates=[0], dtype=dtype) + df = pd.read_csv( + filename, header=header_pos, names=str_cols, + usecols=range(n_cols), encoding='latin', + # parse_dates=[0], + dtype=dtype + ) + # WHY IS THIS NEEDED? + df["date"] = pd.to_datetime(df["date"]) # Insure all values are 4 digits. Seems like some were not by accident df['fname_sufix'] = df['fname_sufix'].apply(lambda v: v.zfill(4)) @@ -264,6 +271,94 @@ def get_metadata(self, smp_file): return meta.iloc[0].to_dict() +class ExtendedSnowExProfileVariables(SnowExProfileVariables): + """ + Extend variables to add a few relevant ones + """ + DEPTH = MeasurementDescription( + "depth", "top or center depth of measurement", + [ + "depth", "top", "sample_top_height", "hs", + "depth_m", 'snowdepthfilter(m)', 'snowdepthfilter', + 'height' + ], True + ) + PERMITTIVITY = MeasurementDescription( + "permittivity", "Permittivity", + ["permittivity_a", "permittivity_b", "permittivity", + 'dielectric_constant', 'dielectric_constant_a', + 'dielectric_constant_b'] + ) + IGNORE = MeasurementDescription( + "ignore", "Ignore this", + ["original_index", 'id', 'freq_mhz', 'camera', 'avgvelocity'] + ) + SAMPLE_SIGNAL = MeasurementDescription( + 'sample_signal', "Sample Signal", + ['sample_signal'] + ) + FORCE = MeasurementDescription( + 'force', "Force", ["force"] + ) + REFLECTANCE = MeasurementDescription( + 'reflectance', "Reflectance", ['reflectance'] + ) + SSA = MeasurementDescription( + 'specific_surface_area', "Specific Surface Area", + ['specific_surface_area'] + ) + DATETIME = MeasurementDescription( + 'datetime', "Combined date and time", + ["Date/Local Standard Time", "date/local_standard_time", "datetime", + "date&time"], + True + ) + DATE = MeasurementDescription( + 'date', "Measurement Date (only date column)", + ['date_dd_mmm_yy', 'date'] + ) + TIME = MeasurementDescription( + 'time', "Measurement time", + ['time_gmt', 'time'] + ) + UTCYEAR = MeasurementDescription( + 'utcyear', "UTC Year", ['utcyear'] + ) + UTCDOY = MeasurementDescription( + 'utcdoy', "UTC day of year", ['utcdoy'] + ) + UTCTOD = MeasurementDescription( + 'utctod', 'UTC Time of Day', ['utctod'] + ) + ELEVATION = MeasurementDescription( + 'elevation', "Elevation", + ['elev_m', 'elevation'] + ) + EQUIPMENT = MeasurementDescription( + 'equipment', "Equipment", + ['equipment'] + ) + VERSION_NUMBER = MeasurementDescription( + 'version_number', "Version Number", + ['version_number'] + ) + NORTHING = MeasurementDescription( + 'northing', "UTM Northing", + ['northing', 'utm_wgs84_northing'] + ) + EASTING = MeasurementDescription( + 'easting', "UTM Easting", + ['easting', 'utm_wgs84_easting'] + ) + + +class ExtendedSnowExMetadataParser(SnowExMetadataParser): + """ + Extend the parser to update the extended varaibles + """ + VARIABLES_CLASS = ExtendedSnowExProfileVariables + + class DataHeader(object): """ Class for managing information stored in files headers about a snow pit @@ -532,7 +627,7 @@ def _read(self, filename): read_csv """ - parser = SnowExMetadataParser( + parser = ExtendedSnowExMetadataParser( filename, timezone=self.in_timezone, header_sep=self.header_sep, allow_split_lines=self.allow_split_lines diff --git a/snowex_db/upload.py b/snowex_db/upload.py index 43097c0..b8315f0 100644 --- a/snowex_db/upload.py +++ b/snowex_db/upload.py @@ -110,7 +110,11 @@ def _read(self, profile_filename): depth_fmt = 'snow_height' is_smp = False - df = self._handle_force(df, profile_filename) + if 'force' in df.columns: + df = self._handle_force(df, profile_filename) + is_smp = True + # Make the data negative from snow surface + depth_fmt = 'surface_datum' if not df.empty: # Standardize all depth data @@ -173,7 +177,8 @@ def build_data(self, data_name): # Assign all meta data to every entry to the data frame for k, v in self.hdr.info.items(): - df[k] = v + if not pd.isna(v): + df[k] = v df['type'] = data_name df['date_accessed'] = self.date_accessed diff --git a/tests/test_batch.py b/tests/test_batch.py index 67352f9..38afaea 100644 --- a/tests/test_batch.py +++ b/tests/test_batch.py @@ -121,7 +121,10 @@ class TestUploadSMPBatch(TableTestBase): Test whether we can assign meta info from an smp log to 2 profiles """ args = [['S19M1013_5S21_20200201.CSV', 'S06M0874_2N12_20200131.CSV']] - kwargs = {'in_timezone': 'UTC', 'smp_log_f': 'smp_log.csv', 'units': 'Newtons'} + kwargs = { + 'in_timezone': 'UTC', + 'smp_log_f': 'smp_log.csv', + 'units': 'Newtons'} UploaderClass = UploadProfileBatch TableClass = LayerData attribute = 'depth' diff --git a/tests/test_projection.py b/tests/test_projection.py index 0512485..ea9863b 100644 --- a/tests/test_projection.py +++ b/tests/test_projection.py @@ -4,7 +4,7 @@ import pytest from geoalchemy2.shape import to_shape -from geoalchemy2.types import WKTElement +from geoalchemy2.elements import WKTElement from numpy.testing import assert_almost_equal from rasterio.crs import CRS From 620a18f0f5cb0194de78a591a21d7da62316f238 Mon Sep 17 00:00:00 2001 From: Micah Sandusky Date: Thu, 15 Aug 2024 10:53:16 -0600 Subject: [PATCH 30/30] Repeat script here --- .../add_timeseries_pits_bulk_properties.py | 70 ------------------- 1 file changed, 70 deletions(-) delete mode 100644 scripts/upload/add_timeseries_pits_bulk_properties.py diff --git a/scripts/upload/add_timeseries_pits_bulk_properties.py b/scripts/upload/add_timeseries_pits_bulk_properties.py deleted file mode 100644 index f2c6290..0000000 --- a/scripts/upload/add_timeseries_pits_bulk_properties.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -Script to upload the Snowex Time Series pits -""" - -import glob -import re -from os.path import abspath, join -from pathlib import Path - -import pandas as pd - -from snowex_db.upload import PointDataCSV -from snowex_db import db_session - - -def main(): - """ - Add bulk SWE, Depth, Density for 2020 and 2021 timeseires pits - """ - db_name = 'localhost/snowex' - debug = True - - # Point to the downloaded data from - data_dir = abspath('../download/data/SNOWEX/') - error_msg = [] - - path_details = [ - { - "DOI": "https://doi.org/10.5067/KZ43HVLZV6G4", - "path": "SNEX20_TS_SP.002/2019.10.24/SNEX20_TS_SP_Summary_SWE_v02.csv" - }, - { - "DOI": "https://doi.org/10.5067/QIANJYJGRWOV", - "path": "SNEX21_TS_SP.001/2020.11.16/SNEX21_TS_SP_Summary_SWE_v01.csv" - } - ] - for info in path_details: - doi = info["DOI"] - file_path = join(data_dir, info["path"]) - # Read csv and dump new one without the extra header lines - df = pd.read_csv( - file_path, - skiprows=list(range(32)) + [33] - ) - new_name = file_path.replace(".csv", "_modified.csv") - # Filter to columns we want (density, swe, etc) - columns = [ - 'Location', 'Site', 'PitID', 'Date/Local Standard Time', 'UTM Zone', - 'Easting (m)', 'Northing (m)', 'Latitude (deg)', 'Longitude (deg)', - 'Density Mean (kg/m^3)', - 'SWE (mm)', 'HS (cm)', 'Flag' - ] - df = df.loc[:, columns] - df.to_csv(new_name, index=False) - - # Submit SWE file data as point data - with db_session( - db_name, credentials='credentials.json' - ) as (session, engine): - pcsv = PointDataCSV( - new_name, doi=doi, debug=debug, - depth_is_metadata=False, - row_based_crs=True, - row_based_timezone=True - ) - pcsv.submit(session) - - -if __name__ == '__main__': - main()