From a6e0b01e14bd06ea1e07a57912778c4cc8d6d637 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 3 Jul 2024 12:28:32 -0600
Subject: [PATCH 01/30] Issue #18 - start of updating the datasource for 2020
 timeseries pits and some todos inthe file

---
 ...{add_time_series_pits.py => add_time_series_pits_2020.py} | 5 +++++
 1 file changed, 5 insertions(+)
 rename scripts/upload/{add_time_series_pits.py => add_time_series_pits_2020.py} (92%)

diff --git a/scripts/upload/add_time_series_pits.py b/scripts/upload/add_time_series_pits_2020.py
similarity index 92%
rename from scripts/upload/add_time_series_pits.py
rename to scripts/upload/add_time_series_pits_2020.py
index da272b9..652e058 100644
--- a/scripts/upload/add_time_series_pits.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -20,12 +20,17 @@ def main():
     Currenltly based on the preliminary downloaded zip which has not been submitted yet.
     Folder name is SNEX20_TS_SP_preliminary_v4
     """
+    # TODO: write script to clear out the timeseries pits
+    #       * maybe delete all pits and then add them back in
+    # TODO: fill in this DOI
     doi = None
     debug = True
 
     # Point to the downloaded data from
+    # TODO: update local path
     data_dir = abspath('../download/data/SNEX20_TS_SP_preliminary_v5/')
     # read in the descriptor file
+    # TODO: check this path
     desc_df = pd.read_csv(join(data_dir, 'SNEX20_TS_SP_Summary_Environment_v01.csv'))
     error_msg = []
 

From aeac5d5bbed5f0b25602cd78be4a292b3822ead8 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 3 Jul 2024 12:34:43 -0600
Subject: [PATCH 02/30] new sources

---
 scripts/download/nsidc_sources.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scripts/download/nsidc_sources.txt b/scripts/download/nsidc_sources.txt
index 35d2b2e..43639ea 100644
--- a/scripts/download/nsidc_sources.txt
+++ b/scripts/download/nsidc_sources.txt
@@ -6,3 +6,5 @@ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD.001/
 https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_GM_CSU_GPR.001/2020.02.06/SNEX20_GM_CSU_GPR_1GHz_v01.csv
 https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_UNM_GPR.001/2020.01.28/SNEX20_UNM_GPR.csv
 https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD_TLI.001/2019.09.29/SNEX20_SD_TLI_clean.csv
+https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_TS_SP.002/
+https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX21_TS_SP.001/

From be74fc2b02ce87c86176541c7bbd73b2deb2db80 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 3 Jul 2024 13:31:43 -0600
Subject: [PATCH 03/30] issue #18 working towards modified 2020 timeseries pits
 upload script

---
 requirements.txt                            |   2 +-
 scripts/upload/add_time_series_pits_2020.py | 143 +++++++++++++-------
 snowex_db/__init__.py                       |  13 ++
 3 files changed, 107 insertions(+), 51 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 9528b37..38900b2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 wheel>0.34.0, <0.35.0
-snowexsql>=0.3.0, <0.4.0
+snowexsql>=0.4.1, <0.5.0
 snowmicropyn
 matplotlib>=3.2.2, <3.3.0
 moto==3.1.11
diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index 652e058..529fa35 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -3,12 +3,14 @@
 """
 
 import glob
+import re
 from os.path import abspath, join
-import pandas as pd
+from pathlib import Path
 
 from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch
 from snowex_db.upload import PointDataCSV
-from snowexsql.db import get_db
+from snowex_db import db_session
+
 
 tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
           'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
@@ -22,59 +24,100 @@ def main():
     """
     # TODO: write script to clear out the timeseries pits
     #       * maybe delete all pits and then add them back in
-    # TODO: fill in this DOI
-    doi = None
+    # Version 2 DOI
+    # https://nsidc.org/data/snex20_ts_sp/versions/2
+    doi = "https://doi.org/10.5067/KZ43HVLZV6G4"
     debug = True
 
+    # TODO: new header of
+    #    Pit Comments
+    #    Parameter Codes
+
     # Point to the downloaded data from
-    # TODO: update local path
-    data_dir = abspath('../download/data/SNEX20_TS_SP_preliminary_v5/')
-    # read in the descriptor file
-    # TODO: check this path
-    desc_df = pd.read_csv(join(data_dir, 'SNEX20_TS_SP_Summary_Environment_v01.csv'))
+    data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.002/')
     error_msg = []
 
-    # get unique site_ids
-    site_ids = desc_df['PitID'].unique()
-
-    for site_id in site_ids:
-        abbrev = site_id[0:2]
-        tz = [k for k, states in tz_map.items() if abbrev in states][0]
-
-        # Grab all the csvs in the pits folder
-        filenames = glob.glob(join(data_dir, 'pits', f'{site_id}*/*.csv'))
-
-        # Grab all the site details files
-        sites = glob.glob(join(data_dir, 'pits', f'{site_id}*/*site*.csv'))
-
-        # Grab all the perimeter depths and remove them for now.
-        perimeter_depths = glob.glob(join(data_dir, 'pits', f'{site_id}*/*perimeter*.csv'))
-
-        # Remove the site details from the total file list to get only the
-        profiles = list(set(filenames) - set(sites) - set(perimeter_depths))
-
-        # Submit all profiles associated with pit at a time
-        b = UploadProfileBatch(
-            filenames=profiles,
-            debug=debug, doi=doi,
-            in_timezone=tz)
-        b.push()
-        error_msg += b.errors
-
-        # Upload the site details
-        sd = UploadSiteDetailsBatch(filenames=sites,
-                                    debug=debug,
-                                    doi=doi,
-                                    in_timezone=tz)
-        sd.push()
-        error_msg += sd.errors
-
-        # Submit all perimeters as point data
-        engine, session = get_db('localhost/snowex', credentials='credentials.json')
-        for fp in perimeter_depths:
-            pcsv = PointDataCSV(fp, doi=doi, debug=debug, depth_is_metadata=False, in_timezone=tz)
-            pcsv.submit(session)
-        session.close()
+    # OLD file name
+    #   pits/CAAMCL_20191220_1300/SNEX20_TS_SP_20191220_1300_CAAMCL_LWC_v01.csv
+    # NEW FILE NAME
+    #   2019.10.24/SNEX20_TS_SP_20191024_1322_COFEJ2_data_LWC_v02.csv
+
+    # Get all the date folders
+    unique_dt_olders = Path(data_dir).glob("20*.*.*")
+    for udf in unique_dt_olders:
+        # get all the csvs in the folder
+        dt_folder_files = list(udf.glob("*.csv"))
+        all_file_names = [f.name for f in dt_folder_files]
+        site_ids = []
+        # Get the unique site ids for this date folder
+        compiled = re.compile(
+            r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v02\.csv'
+        )
+        for file_name in all_file_names:
+            match = compiled.match(file_name)
+            if match:
+                code = match.group(1)
+                site_ids.append(code)
+            else:
+                raise RuntimeError(f"No site ID found for {file_name}")
+
+        # Get the unique site ids
+        site_ids = list(set(site_ids))
+
+        for site_id in site_ids:
+            abbrev = site_id[0:2]
+            tz = [k for k, states in tz_map.items() if abbrev in states][0]
+
+            # Grab all the csvs in the pits folder
+            filenames = glob.glob(join(str(udf), f'*_{site_id}_*.csv'))
+
+            # Grab all the site details files
+            sites = glob.glob(join(
+                data_dir, 'pits', f'*_{site_id}_*siteDetails*.csv'
+            ))
+
+            # Grab all the perimeter depths and remove them for now.
+            perimeter_depths = glob.glob(join(
+                data_dir, 'pits', f'*_{site_id}_*perimeterDepths*.csv'
+            ))
+
+            # all non-gapped filled_density
+            non_filled_density = glob.glob(join(
+                data_dir, 'pits', f'*_{site_id}_*_density_*.csv'
+            ))
+
+            # Remove the site details from the total file list to get only the
+            profiles = list(
+                set(filenames) - set(sites) - set(perimeter_depths) -
+                set(non_filled_density)  # remove non-gap-filled denisty
+            )
+
+            # Submit all profiles associated with pit at a time
+            b = UploadProfileBatch(
+                filenames=profiles,
+                debug=debug, doi=doi,
+                in_timezone=tz)
+            b.push()
+            error_msg += b.errors
+
+            # Upload the site details
+            sd = UploadSiteDetailsBatch(filenames=sites,
+                                        debug=debug,
+                                        doi=doi,
+                                        in_timezone=tz)
+            sd.push()
+            error_msg += sd.errors
+
+            # Submit all perimeters as point data
+            with db_session(
+                'localhost/snowex', credentials='credentials.json'
+            ) as session:
+                for fp in perimeter_depths:
+                    pcsv = PointDataCSV(
+                        fp, doi=doi, debug=debug, depth_is_metadata=False,
+                        in_timezone=tz
+                    )
+                    pcsv.submit(session)
 
     for f, m in error_msg:
         print(f)
diff --git a/snowex_db/__init__.py b/snowex_db/__init__.py
index 5f4adc5..5820abe 100644
--- a/snowex_db/__init__.py
+++ b/snowex_db/__init__.py
@@ -2,3 +2,16 @@
 
 __author__ = """Micah Johnson"""
 __version__ = '0.1.0'
+
+from snowexsql.db import get_db
+from snowexsql.api import DB_NAME
+from contextlib import contextmanager
+
+
+@contextmanager
+def db_session(db_name, credentials):
+    # use default_name
+    db_name = db_name or DB_NAME
+    engine, session = get_db(db_name, credentials=credentials)
+    yield session, engine
+    session.close()

From 8372291f092ebc79ba0b3d03afa7d0162cf32765 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 3 Jul 2024 13:34:17 -0600
Subject: [PATCH 04/30] path logic

---
 scripts/upload/add_time_series_pits_2020.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index 529fa35..de94db5 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -43,7 +43,9 @@ def main():
     #   2019.10.24/SNEX20_TS_SP_20191024_1322_COFEJ2_data_LWC_v02.csv
 
     # Get all the date folders
-    unique_dt_olders = Path(data_dir).glob("20*.*.*")
+    unique_dt_olders = Path(
+        data_dir
+    ).expanduser().absolute().glob("20*.*.*")
     for udf in unique_dt_olders:
         # get all the csvs in the folder
         dt_folder_files = list(udf.glob("*.csv"))
@@ -73,17 +75,17 @@ def main():
 
             # Grab all the site details files
             sites = glob.glob(join(
-                data_dir, 'pits', f'*_{site_id}_*siteDetails*.csv'
+                str(udf), f'*_{site_id}_*siteDetails*.csv'
             ))
 
             # Grab all the perimeter depths and remove them for now.
             perimeter_depths = glob.glob(join(
-                data_dir, 'pits', f'*_{site_id}_*perimeterDepths*.csv'
+                str(udf), f'*_{site_id}_*perimeterDepths*.csv'
             ))
 
             # all non-gapped filled_density
             non_filled_density = glob.glob(join(
-                data_dir, 'pits', f'*_{site_id}_*_density_*.csv'
+                str(udf), f'*_{site_id}_*_density_*.csv'
             ))
 
             # Remove the site details from the total file list to get only the

From 3050ce798c2e2c0ef88a505b6d0c9122ad7d215f Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 3 Jul 2024 14:02:32 -0600
Subject: [PATCH 05/30] make sure to not use gap filled density at this point

---
 scripts/upload/add_time_series_pits_2020.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index de94db5..b98e961 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -37,11 +37,6 @@ def main():
     data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.002/')
     error_msg = []
 
-    # OLD file name
-    #   pits/CAAMCL_20191220_1300/SNEX20_TS_SP_20191220_1300_CAAMCL_LWC_v01.csv
-    # NEW FILE NAME
-    #   2019.10.24/SNEX20_TS_SP_20191024_1322_COFEJ2_data_LWC_v02.csv
-
     # Get all the date folders
     unique_dt_olders = Path(
         data_dir
@@ -84,14 +79,14 @@ def main():
             ))
 
             # all non-gapped filled_density
-            non_filled_density = glob.glob(join(
-                str(udf), f'*_{site_id}_*_density_*.csv'
+            gap_filled_density = glob.glob(join(
+                str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv'
             ))
 
             # Remove the site details from the total file list to get only the
             profiles = list(
                 set(filenames) - set(sites) - set(perimeter_depths) -
-                set(non_filled_density)  # remove non-gap-filled denisty
+                set(gap_filled_density)  # remove gap-filled denisty
             )
 
             # Submit all profiles associated with pit at a time

From d852f9857cf32c63a05fdcb63fdc304ee0904226 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 3 Jul 2024 14:09:16 -0600
Subject: [PATCH 06/30] Issue #18 - file for 2021 timeseries pits

---
 scripts/upload/add_time_series_pits_2021.py | 124 ++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 scripts/upload/add_time_series_pits_2021.py

diff --git a/scripts/upload/add_time_series_pits_2021.py b/scripts/upload/add_time_series_pits_2021.py
new file mode 100644
index 0000000..c5356d8
--- /dev/null
+++ b/scripts/upload/add_time_series_pits_2021.py
@@ -0,0 +1,124 @@
+"""
+Script to upload the Snowex Time Series pits
+"""
+
+import glob
+import re
+from os.path import abspath, join
+from pathlib import Path
+
+from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch
+from snowex_db.upload import PointDataCSV
+from snowex_db import db_session
+
+
+tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
+          'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
+          }
+
+
+def main():
+    """
+    Currenltly based on the preliminary downloaded zip which has not been submitted yet.
+    Folder name is SNEX20_TS_SP_preliminary_v4
+    """
+    # TODO: write script to clear out the timeseries pits
+    #       * maybe delete all pits and then add them back in
+    # https://nsidc.org/data/snex21_ts_sp/versions/1
+    doi = "https://doi.org/10.5067/QIANJYJGRWOV"
+    debug = True
+
+    # TODO: new header of
+    #    Pit Comments
+    #    Parameter Codes
+
+    # Point to the downloaded data from
+    data_dir = abspath('../download/data/SNOWEX/SNEX21_TS_SP.001/')
+    error_msg = []
+
+    # Get all the date folders
+    unique_dt_olders = Path(
+        data_dir
+    ).expanduser().absolute().glob("20*.*.*")
+    for udf in unique_dt_olders:
+        # get all the csvs in the folder
+        dt_folder_files = list(udf.glob("*.csv"))
+        all_file_names = [f.name for f in dt_folder_files]
+        site_ids = []
+        # Get the unique site ids for this date folder
+        compiled = re.compile(
+            r'SNEX21_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v01\.csv'
+        )
+        for file_name in all_file_names:
+            match = compiled.match(file_name)
+            if match:
+                code = match.group(1)
+                site_ids.append(code)
+            else:
+                raise RuntimeError(f"No site ID found for {file_name}")
+
+        # Get the unique site ids
+        site_ids = list(set(site_ids))
+
+        for site_id in site_ids:
+            abbrev = site_id[0:2]
+            tz = [k for k, states in tz_map.items() if abbrev in states][0]
+
+            # Grab all the csvs in the pits folder
+            filenames = glob.glob(join(str(udf), f'*_{site_id}_*.csv'))
+
+            # Grab all the site details files
+            sites = glob.glob(join(
+                str(udf), f'*_{site_id}_*siteDetails*.csv'
+            ))
+
+            # Grab all the perimeter depths and remove them for now.
+            perimeter_depths = glob.glob(join(
+                str(udf), f'*_{site_id}_*perimeterDepths*.csv'
+            ))
+
+            # all non-gapped filled_density
+            gap_filled_density = glob.glob(join(
+                str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv'
+            ))
+
+            # Remove the site details from the total file list to get only the
+            profiles = list(
+                set(filenames) - set(sites) - set(perimeter_depths) -
+                set(gap_filled_density)  # remove gap-filled denisty
+            )
+
+            # Submit all profiles associated with pit at a time
+            b = UploadProfileBatch(
+                filenames=profiles,
+                debug=debug, doi=doi,
+                in_timezone=tz)
+            b.push()
+            error_msg += b.errors
+
+            # Upload the site details
+            sd = UploadSiteDetailsBatch(filenames=sites,
+                                        debug=debug,
+                                        doi=doi,
+                                        in_timezone=tz)
+            sd.push()
+            error_msg += sd.errors
+
+            # Submit all perimeters as point data
+            with db_session(
+                'localhost/snowex', credentials='credentials.json'
+            ) as session:
+                for fp in perimeter_depths:
+                    pcsv = PointDataCSV(
+                        fp, doi=doi, debug=debug, depth_is_metadata=False,
+                        in_timezone=tz
+                    )
+                    pcsv.submit(session)
+
+    for f, m in error_msg:
+        print(f)
+    return len(error_msg)
+
+
+if __name__ == '__main__':
+    main()

From 624c10bf76ff2d77c2114581700b1a6c82268496 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 3 Jul 2024 14:10:51 -0600
Subject: [PATCH 07/30] Issue #18 no perimeter depth files for 2021 TS pits

---
 scripts/upload/add_time_series_pits_2021.py | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2021.py b/scripts/upload/add_time_series_pits_2021.py
index c5356d8..8e2e721 100644
--- a/scripts/upload/add_time_series_pits_2021.py
+++ b/scripts/upload/add_time_series_pits_2021.py
@@ -72,11 +72,6 @@ def main():
                 str(udf), f'*_{site_id}_*siteDetails*.csv'
             ))
 
-            # Grab all the perimeter depths and remove them for now.
-            perimeter_depths = glob.glob(join(
-                str(udf), f'*_{site_id}_*perimeterDepths*.csv'
-            ))
-
             # all non-gapped filled_density
             gap_filled_density = glob.glob(join(
                 str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv'
@@ -84,7 +79,7 @@ def main():
 
             # Remove the site details from the total file list to get only the
             profiles = list(
-                set(filenames) - set(sites) - set(perimeter_depths) -
+                set(filenames) - set(sites) -
                 set(gap_filled_density)  # remove gap-filled denisty
             )
 
@@ -104,17 +99,6 @@ def main():
             sd.push()
             error_msg += sd.errors
 
-            # Submit all perimeters as point data
-            with db_session(
-                'localhost/snowex', credentials='credentials.json'
-            ) as session:
-                for fp in perimeter_depths:
-                    pcsv = PointDataCSV(
-                        fp, doi=doi, debug=debug, depth_is_metadata=False,
-                        in_timezone=tz
-                    )
-                    pcsv.submit(session)
-
     for f, m in error_msg:
         print(f)
     return len(error_msg)

From 4863e72382097ae2087adab92f324fe5be56246c Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 3 Jul 2024 15:16:44 -0600
Subject: [PATCH 08/30] having issues creating the test database

---
 .gitignore               |  2 ++
 scripts/upload/create.py | 10 ++++++----
 snowex_db/upload.py      |  1 -
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 49440f6..4febbca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,5 @@ scripts/upload/test*.txt
 .idea/*
 scripts/download/data/*
 venv/
+
+credentials.json
\ No newline at end of file
diff --git a/scripts/upload/create.py b/scripts/upload/create.py
index 8f737c6..c24442d 100644
--- a/scripts/upload/create.py
+++ b/scripts/upload/create.py
@@ -23,9 +23,10 @@ def main(overwrite=False, db='snowex', credentials='./credentials.json'):
         initialize(engine)
         log.warning('Database cleared!\n')
         try:
-            sql = "CREATE USER snow WITH PASSWORD 'hackweek';"
-            engine.execute(sql)
-            engine.execute("GRANT USAGE ON SCHEMA public TO snow;")
+            with engine.connect() as connection:
+                connection.execute("CREATE USER snow WITH PASSWORD 'hackweek';")
+            with engine.connect() as connection:
+                connection.execute("GRANT USAGE ON SCHEMA public TO snow;")
         except Exception as e:
             print(e)
 
@@ -33,7 +34,8 @@ def main(overwrite=False, db='snowex', credentials='./credentials.json'):
 
             sql = f'GRANT SELECT ON {t} TO snow;'
             log.info(f'Adding read only permissions for table {t}...')
-            engine.execute(sql)
+            with engine.connect() as connection:
+                connection.execute(sql)
     else:
         log.warning('Aborted. Database has not been modified.\n')
 
diff --git a/snowex_db/upload.py b/snowex_db/upload.py
index ca9467e..a6e376a 100644
--- a/snowex_db/upload.py
+++ b/snowex_db/upload.py
@@ -6,7 +6,6 @@
 from subprocess import STDOUT, check_output
 from pathlib import Path
 import pandas as pd
-import progressbar
 from geoalchemy2.elements import RasterElement, WKTElement
 from os.path import basename, exists, join
 from os import makedirs, remove

From a9065bfc03a0c6f5d480e819078e9a3cac772116 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Mon, 8 Jul 2024 14:22:21 -0600
Subject: [PATCH 09/30] Modify create script for sqlalchemy>2.0

---
 scripts/upload/create.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/scripts/upload/create.py b/scripts/upload/create.py
index c24442d..0733819 100644
--- a/scripts/upload/create.py
+++ b/scripts/upload/create.py
@@ -3,6 +3,7 @@
 """
 from snowexsql.db import get_db, initialize
 from snowex_db.utilities import get_logger
+from sqlalchemy import text as sqltext
 import argparse
 
 
@@ -24,18 +25,25 @@ def main(overwrite=False, db='snowex', credentials='./credentials.json'):
         log.warning('Database cleared!\n')
         try:
             with engine.connect() as connection:
-                connection.execute("CREATE USER snow WITH PASSWORD 'hackweek';")
-            with engine.connect() as connection:
-                connection.execute("GRANT USAGE ON SCHEMA public TO snow;")
+                # Autocommit so the user is created before granting access
+                connection = connection.execution_options(
+                    isolation_level="AUTOCOMMIT")
+                connection.execute(
+                    sqltext("CREATE USER snow WITH PASSWORD 'hackweek';")
+                )
+                connection.execute(
+                    sqltext("GRANT USAGE ON SCHEMA public TO snow;")
+                )
         except Exception as e:
-            print(e)
+            log.error("Failed on user creation")
+            raise e
 
         for t in ['sites', 'points', 'layers', 'images']:
 
             sql = f'GRANT SELECT ON {t} TO snow;'
             log.info(f'Adding read only permissions for table {t}...')
             with engine.connect() as connection:
-                connection.execute(sql)
+                connection.execute(sqltext(sql))
     else:
         log.warning('Aborted. Database has not been modified.\n')
 

From 1d29427493d5bcfcb73a59f50459aece10dc253b Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Mon, 8 Jul 2024 15:43:38 -0600
Subject: [PATCH 10/30] Switch to 2020 V1 pits - there are some data format and
 header issues in the V2 data

---
 scripts/upload/add_time_series_pits_2020.py | 42 +++++++++++++--------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index b98e961..ac53824 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -22,11 +22,12 @@ def main():
     Currenltly based on the preliminary downloaded zip which has not been submitted yet.
     Folder name is SNEX20_TS_SP_preliminary_v4
     """
+    db_name = 'localhost/snowex'
     # TODO: write script to clear out the timeseries pits
     #       * maybe delete all pits and then add them back in
-    # Version 2 DOI
-    # https://nsidc.org/data/snex20_ts_sp/versions/2
-    doi = "https://doi.org/10.5067/KZ43HVLZV6G4"
+    # Version 1 DOI
+    # https://nsidc.org/data/snex20_ts_sp/versions/1
+    doi = "https://doi.org/10.5067/POT9E0FFUUD1"
     debug = True
 
     # TODO: new header of
@@ -34,9 +35,15 @@ def main():
     #    Parameter Codes
 
     # Point to the downloaded data from
-    data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.002/')
+    data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.001/')
     error_msg = []
 
+    # Files to ignore
+    ignore_files = [
+        "SNEX20_TS_SP_Summary_Environment_v01.csv",
+        "SNEX20_TS_SP_Summary_SWE_v01.csv"
+    ]
+
     # Get all the date folders
     unique_dt_olders = Path(
         data_dir
@@ -44,13 +51,16 @@ def main():
     for udf in unique_dt_olders:
         # get all the csvs in the folder
         dt_folder_files = list(udf.glob("*.csv"))
-        all_file_names = [f.name for f in dt_folder_files]
         site_ids = []
         # Get the unique site ids for this date folder
         compiled = re.compile(
-            r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v02\.csv'
+            r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v01\.csv'
         )
-        for file_name in all_file_names:
+        for file_path in dt_folder_files:
+            file_name = file_path.name
+            if file_name in ignore_files:
+                print(f"Skipping {file_name}")
+                continue
             match = compiled.match(file_name)
             if match:
                 code = match.group(1)
@@ -91,24 +101,24 @@ def main():
 
             # Submit all profiles associated with pit at a time
             b = UploadProfileBatch(
-                filenames=profiles,
-                debug=debug, doi=doi,
-                in_timezone=tz)
+                filenames=profiles, debug=debug, doi=doi, in_timezone=tz,
+                db_name=db_name
+            )
             b.push()
             error_msg += b.errors
 
             # Upload the site details
-            sd = UploadSiteDetailsBatch(filenames=sites,
-                                        debug=debug,
-                                        doi=doi,
-                                        in_timezone=tz)
+            sd = UploadSiteDetailsBatch(
+                filenames=sites, debug=debug, doi=doi, in_timezone=tz,
+                db_name=db_name
+            )
             sd.push()
             error_msg += sd.errors
 
             # Submit all perimeters as point data
             with db_session(
-                'localhost/snowex', credentials='credentials.json'
-            ) as session:
+                db_name, credentials='credentials.json'
+            ) as (session, engine):
                 for fp in perimeter_depths:
                     pcsv = PointDataCSV(
                         fp, doi=doi, debug=debug, depth_is_metadata=False,

From c2c3e0046854739131b5711afd5c2a69ac7b882c Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Mon, 8 Jul 2024 15:43:49 -0600
Subject: [PATCH 11/30] Use db_session function

---
 snowex_db/batch.py      | 20 ++++++++++----------
 snowex_db/projection.py |  5 +++--
 snowex_db/upload.py     | 12 ++++++++----
 3 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/snowex_db/batch.py b/snowex_db/batch.py
index 864aaca..fc547f0 100644
--- a/snowex_db/batch.py
+++ b/snowex_db/batch.py
@@ -7,7 +7,7 @@
 import time
 from os.path import abspath, basename, expanduser, join
 
-from snowexsql.db import get_db
+from snowex_db import db_session
 from snowex_db.interpretation import get_InSar_flight_comment
 from snowex_db.metadata import (DataHeader, SMPMeasurementLog,
                                 read_InSar_annotation)
@@ -71,10 +71,6 @@ def __init__(self, filenames, **kwargs):
         self.errors = []
         self.uploaded = 0
 
-        # Grab db using credentials
-        self.log.info('Accessing Database {}'.format(self.db_name))
-        engine, self.session = get_db(self.db_name, credentials=self.credentials)
-
         self.log.info('Preparing to upload {} files...'.format(len(filenames)))
 
     def push(self):
@@ -111,8 +107,6 @@ def push(self):
             else:
                 self._push_one(f, **self.meta)
 
-        self.session.close()
-
         # Log the ending errors
         self.report(i + 1)
 
@@ -127,7 +121,9 @@ def _push_one(self, f, **kwargs):
         d = self.UploaderClass(f, **kwargs)
 
         # Submit the data to the database
-        d.submit(self.session)
+        self.log.info('Accessing Database {}'.format(self.db_name))
+        with db_session(self.db_name, self.credentials) as (session, engine):
+            d.submit(session)
         self.uploaded += 1
 
     def report(self, files_attempted):
@@ -151,7 +147,6 @@ def report(self, files_attempted):
 
         self.log.info('Finished! Elapsed {:d}s\n'.format(
             int(time.time() - self.start)))
-        self.session.close()
 
 
 class UploadSiteDetailsBatch(BatchBase):
@@ -325,7 +320,12 @@ def _push_one(self, f, **kwargs):
             d = self.UploaderClass(r, **meta)
 
             # Submit the data to the database
-            d.submit(self.session)
+            # Grab db using credentials
+            self.log.info('Accessing Database {}'.format(self.db_name))
+            with db_session(
+                    self.db_name, self.credentials
+            ) as (session, engine):
+                d.submit(session)
 
         # Uploaded set
         self.uploaded += 1
diff --git a/snowex_db/projection.py b/snowex_db/projection.py
index 75ac76e..256b18e 100644
--- a/snowex_db/projection.py
+++ b/snowex_db/projection.py
@@ -39,8 +39,9 @@ def reproject_point_in_dict(info, is_northern=True, zone_number=None):
         easting, northing, utm_zone, letter = utm.from_latlon(
             result['latitude'],
             result['longitude'],  force_zone_number=zone_number)
-        result['easting'] = easting
-        result['northing'] = northing
+        # String representation should not be np.float64, so cast to float
+        result['easting'] = float(easting)
+        result['northing'] = float(northing)
         result['utm_zone'] = utm_zone
 
     # Secondarily use the utm to add lat long
diff --git a/snowex_db/upload.py b/snowex_db/upload.py
index a6e376a..ee342b7 100644
--- a/snowex_db/upload.py
+++ b/snowex_db/upload.py
@@ -62,10 +62,14 @@ def _read(self, profile_filename):
             df: pd.dataframe contain csv data with standardized column names
         """
         # header=0 because docs say to if using skip rows and columns
-        df = pd.read_csv(profile_filename, header=0,
-                         skiprows=self.hdr.header_pos,
-                         names=self.hdr.columns,
-                         encoding='latin')
+        try:
+            df = pd.read_csv(
+                profile_filename, header=0, skiprows=self.hdr.header_pos,
+                names=self.hdr.columns, encoding='latin'
+            )
+        except pd.errors.ParserError as e:
+            LOG.error(e)
+            raise RuntimeError(f"Failed reading {profile_filename}")
 
         # Special SMP specific tasks
         depth_fmt = 'snow_height'

From 07864cb66e4698de0d8e2025f533e12f6e94b9e1 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Tue, 9 Jul 2024 11:56:13 -0600
Subject: [PATCH 12/30] Slight tweaks to 2021 timeseries script

---
 scripts/upload/add_time_series_pits_2021.py | 26 +++++++--------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2021.py b/scripts/upload/add_time_series_pits_2021.py
index 8e2e721..2faaf14 100644
--- a/scripts/upload/add_time_series_pits_2021.py
+++ b/scripts/upload/add_time_series_pits_2021.py
@@ -8,8 +8,6 @@
 from pathlib import Path
 
 from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch
-from snowex_db.upload import PointDataCSV
-from snowex_db import db_session
 
 
 tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
@@ -19,19 +17,13 @@
 
 def main():
     """
-    Currenltly based on the preliminary downloaded zip which has not been submitted yet.
-    Folder name is SNEX20_TS_SP_preliminary_v4
+    Snowex 2021 timeseries pits
     """
-    # TODO: write script to clear out the timeseries pits
-    #       * maybe delete all pits and then add them back in
+    db_name = 'localhost/snowex'
     # https://nsidc.org/data/snex21_ts_sp/versions/1
     doi = "https://doi.org/10.5067/QIANJYJGRWOV"
     debug = True
 
-    # TODO: new header of
-    #    Pit Comments
-    #    Parameter Codes
-
     # Point to the downloaded data from
     data_dir = abspath('../download/data/SNOWEX/SNEX21_TS_SP.001/')
     error_msg = []
@@ -85,17 +77,17 @@ def main():
 
             # Submit all profiles associated with pit at a time
             b = UploadProfileBatch(
-                filenames=profiles,
-                debug=debug, doi=doi,
-                in_timezone=tz)
+                filenames=profiles, debug=debug, doi=doi, in_timezone=tz,
+                db_name=db_name
+            )
             b.push()
             error_msg += b.errors
 
             # Upload the site details
-            sd = UploadSiteDetailsBatch(filenames=sites,
-                                        debug=debug,
-                                        doi=doi,
-                                        in_timezone=tz)
+            sd = UploadSiteDetailsBatch(
+                filenames=sites, debug=debug, doi=doi, in_timezone=tz,
+                db_name=db_name
+            )
             sd.push()
             error_msg += sd.errors
 

From 9ec87fb0f95945a43c6598341d9b4aa1e7b220e4 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 10 Jul 2024 12:26:33 -0600
Subject: [PATCH 13/30] Script to delete pits

---
 scripts/remove_data/remove_pits.py | 68 ++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 scripts/remove_data/remove_pits.py

diff --git a/scripts/remove_data/remove_pits.py b/scripts/remove_data/remove_pits.py
new file mode 100644
index 0000000..99b6160
--- /dev/null
+++ b/scripts/remove_data/remove_pits.py
@@ -0,0 +1,68 @@
+"""
+File to remove all snowpits from the database
+"""
+import argparse
+from snowexsql.api import db_session
+from snowexsql.data import LayerData
+from snowexsql.db import get_db
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Script to create our databases using the python library')
+    parser.add_argument('--db', dest='db', default='snowex',
+                        help='Name of the database locally to add tables to')
+    parser.add_argument('--dry_run', dest='dry_run', action='store_true',
+                        help='Try a dry run or not')
+    parser.add_argument('--credentials', dest='credentials',
+                        default='./credentials.json',
+                        help='Past to a json containing')
+    args = parser.parse_args()
+
+    credentials = args.credentials
+    db_name = f'localhost/{args.db}'
+    dry_run = args.dry_run
+
+    # All measurement 'types' associate with pits
+    types_pit = [
+        'sample_signal', 'grain_size', 'density', 'reflectance',
+        'permittivity', 'lwc_vol', 'manual_wetness',
+        'equivalent_diameter', 'specific_surface_area', 'grain_type',
+        'temperature', 'hand_hardness'
+    ]
+    # Start a session
+    engine, session = get_db(db_name, credentials=credentials)
+    print(f"Connected to {db_name}")
+    try:
+        q = session.query(LayerData).filter(
+            LayerData.pit_id is not None  # Filter to results with pit id
+        ).filter(
+            LayerData.type.in_(types_pit)  # Filter to correct type
+        )
+        result = q.count()
+        # Rough count of pits
+        estimated_number = int(result / float(len(types_pit)) / 10.0)
+        print(f"Found {result} records")
+        print(f"This is roughly {estimated_number} pits")
+        if dry_run:
+            print("THIS IS A DRYRUN, not deleting")
+        else:
+            if result > 0:
+                print("Deleting pits from the database")
+                # Delete
+                q.delete()
+                session.commit()
+            else:
+                print("No results, nothing to delete")
+        session.close()
+    except Exception as e:
+        print("Errored out, rolling back")
+        print(e)
+        session.rollback()
+        raise e
+
+    print("Done")
+
+
+if __name__ == '__main__':
+    main()

From 90ed14d76e4836f01394b648b79d421f2c339977 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Tue, 23 Jul 2024 13:02:44 -0600
Subject: [PATCH 14/30] start using insitupy for metadata handling

---
 requirements.txt      |   3 +-
 snowex_db/metadata.py | 114 +++++++-----------------------------------
 2 files changed, 19 insertions(+), 98 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 38900b2..0c5d68e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,10 @@
 wheel>0.34.0, <0.35.0
 snowexsql>=0.4.1, <0.5.0
 snowmicropyn
-matplotlib>=3.2.2, <3.3.0
+matplotlib>=3.2.2
 moto==3.1.11
 coloredlogs>=14.0
 progressbar2>=3.51.3
 rasterio>=1.1.5
 boto3>=1.23.7,<1.24
+insitupy==0.1.0
diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py
index 09217ec..d2e276c 100644
--- a/snowex_db/metadata.py
+++ b/snowex_db/metadata.py
@@ -5,10 +5,10 @@
 
 from os.path import basename
 import pandas as pd
-
+from insitupy.campaigns.metadata import MetaDataParser
 from snowexsql.db import get_table_attributes
-
 from snowexsql.data import SiteData
+
 from .interpretation import *
 from .projection import add_geom, reproject_point_in_dict
 from .string_management import *
@@ -429,78 +429,6 @@ def rename_sample_profiles(self, columns, data_names):
                     result.append(c)
         return result
 
-    def parse_column_names(self, lines):
-        """
-        A flexible mnethod that attempts to find and standardize column names
-        for csv data. Looks for a comma separated line with N entries == to the
-        last line in the file. If an entry is found with more commas than the
-        last line then we use that. This allows us to have data that doesn't
-        have all the commas in the data (SSA typically missing the comma for
-        veg unless it was notable)
-
-        Assumptions:
-
-        1. The last line in file is of representative csv data
-
-        2. The header is the last column that has more chars than numbers
-
-        Args:
-            lines: Complete list of strings from the file
-
-        Returns:
-            columns: list of column names
-        """
-
-        # Minimum column size should match the last line of data (Assumption
-        # #2)
-        n_columns = len(lines[-1].split(','))
-
-        # Use these to monitor if a larger column count is found
-        header_pos = 0
-        if lines[0][0] == '#':
-            header_indicator = '#'
-        else:
-            header_indicator = None
-
-        for i, l in enumerate(lines):
-            if i == 0:
-                previous = get_alpha_ratio(lines[i])
-            else:
-                previous = get_alpha_ratio(lines[i - 1])
-
-            if line_is_header(l, expected_columns=n_columns,
-                              header_indicator=header_indicator,
-                              previous_alpha_ratio=previous):
-                header_pos = i
-
-            if i > header_pos:
-                break
-
-        self.log.debug('Found end of header at line {}...'.format(header_pos))
-
-        # Parse the columns header based on the size of the last line
-        str_line = lines[header_pos]
-        # Remove units
-        for c in ['()', '[]']:
-            str_line = strip_encapsulated(str_line, c)
-
-        raw_cols = str_line.strip('#').split(',')
-        standard_cols = [standardize_key(c) for c in raw_cols]
-
-        # Rename any column names to more standard ones
-        columns = remap_data_names(standard_cols, self.rename)
-
-        # Determine the profile type
-        (self.data_names, self.multi_sample_profiles) = \
-            self.determine_data_names(columns)
-
-        self.data_names = remap_data_names(self.data_names, self.rename)
-
-        if self.multi_sample_profiles:
-            columns = self.rename_sample_profiles(columns, self.data_names)
-
-        return columns, header_pos
-
     def determine_data_names(self, raw_columns):
         """
         Determine the names of the data to be uploaded from the raw column
@@ -574,33 +502,25 @@ def _read(self, filename):
                                     read_csv
        """
 
-        with open(filename, encoding='latin') as fp:
-            lines = fp.readlines()
-            fp.close()
-
-        # Site description files have no need for column lists
-        if 'site' in filename.lower():
-            self.log.info('Parsing site description header...')
-            columns = None
-            header_pos = None
-
-            # Site location parses all of the file
-
-        # Find the column names and where it is in the file
-        else:
-            columns, header_pos = self.parse_column_names(lines)
-            self.log.debug('Column Data found to be {} columns based on Line '
-                           '{}'.format(len(columns), header_pos))
+        parser = MetaDataParser(
+            filename, timezone=self.in_timezone,
+            header_sep=self.header_sep
+        )
+        str_data, columns, header_pos = parser.find_header_info()
+        # Determine the profile type
+        (self.data_names, self.multi_sample_profiles) = \
+            self.determine_data_names(columns)
 
-            # Only parse what we know if the header
-            lines = lines[0:header_pos]
+        self.data_names = remap_data_names(self.data_names, self.rename)
 
-        # Clean up the lines from line returns to grab header info
-        lines = [ln.strip() for ln in lines]
-        str_data = " ".join(lines).split('#')
+        if self.multi_sample_profiles:
+            columns = self.rename_sample_profiles(columns, self.data_names)
+        self.log.debug('Column Data found to be {} columns based on Line '
+                       '{}'.format(len(columns), header_pos))
 
         # Keep track of the number of lines with # in it for data opening
-        self.length = len(str_data)
+        # TODO: what do we do here?
+        # self.length = len(str_data)
 
         # Key value pairs are separate by some separator provided.
         data = {}

From f949f72e86438bd9818abf777bfe4ae3288a097a Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Tue, 23 Jul 2024 14:26:48 -0600
Subject: [PATCH 15/30] working through handling metadata

---
 snowex_db/interpretation.py | 29 +++++++++++++++++++++--------
 snowex_db/metadata.py       | 37 +++++++++++++++++++++----------------
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/snowex_db/interpretation.py b/snowex_db/interpretation.py
index 34928c1..d8de691 100644
--- a/snowex_db/interpretation.py
+++ b/snowex_db/interpretation.py
@@ -100,6 +100,14 @@ def manage_aspect(info):
     return info
 
 
+def is_number(s):
+    try:
+        float(s)  # Try to convert the string to a float
+        return True
+    except ValueError:
+        return False
+
+
 def convert_cardinal_to_degree(cardinal):
     """
     Converts cardinal directions to degrees. Also removes any / or - that
@@ -136,16 +144,21 @@ def convert_cardinal_to_degree(cardinal):
     # Manage extra characters separating composite dirs, make it all upper case
     d = ''.join([c.upper() for c in cardinal if c not in '/-'])
 
-    # Assume West, East, South, Or North
-    if len(d) > 3:
-        d = d[0]
-        warnings.warn("Assuming {} is {}".format(cardinal, d))
+    # Go straight to degrees if numeric
+    if is_number(d):
+        degrees = float(d)
 
-    if d in dirs:
-        i = dirs.index(d)
-        degrees = i * (360. / len(dirs))
     else:
-        raise ValueError('Invalid cardinal direction {}!'.format(cardinal))
+        # Assume West, East, South, Or North
+        if len(d) > 3:
+            d = d[0]
+            warnings.warn("Assuming {} is {}".format(cardinal, d))
+
+        if d in dirs:
+            i = dirs.index(d)
+            degrees = i * (360. / len(dirs))
+        else:
+            raise ValueError('Invalid cardinal direction {}!'.format(cardinal))
 
     return degrees
 
diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py
index d2e276c..387eece 100644
--- a/snowex_db/metadata.py
+++ b/snowex_db/metadata.py
@@ -5,7 +5,7 @@
 
 from os.path import basename
 import pandas as pd
-from insitupy.campaigns.metadata import MetaDataParser
+from insitupy.campaigns.campaign import SnowExMetadataParser
 from snowexsql.db import get_table_attributes
 from snowexsql.data import SiteData
 
@@ -350,7 +350,9 @@ class DataHeader(object):
         'epsg': None,
         'header_sep': ',',
         'northern_hemisphere': True,
-        'depth_is_metadata': True}
+        'depth_is_metadata': True,
+        'allow_split_lines': False
+    }
 
     def __init__(self, filename, **kwargs):
         """
@@ -502,25 +504,28 @@ def _read(self, filename):
                                     read_csv
        """
 
-        parser = MetaDataParser(
+        parser = SnowExMetadataParser(
             filename, timezone=self.in_timezone,
-            header_sep=self.header_sep
+            header_sep=self.header_sep,
+            allow_split_lines=self.allow_split_lines
         )
-        str_data, columns, header_pos = parser.find_header_info()
-        # Determine the profile type
-        (self.data_names, self.multi_sample_profiles) = \
-            self.determine_data_names(columns)
+        str_data, standard_cols, header_pos = parser.find_header_info()
 
-        self.data_names = remap_data_names(self.data_names, self.rename)
+        if standard_cols is not None:
+            # handle name remapping
+            columns = remap_data_names(standard_cols, self.rename)
+            # Determine the profile type
+            (self.data_names, self.multi_sample_profiles) = \
+                self.determine_data_names(columns)
 
-        if self.multi_sample_profiles:
-            columns = self.rename_sample_profiles(columns, self.data_names)
-        self.log.debug('Column Data found to be {} columns based on Line '
-                       '{}'.format(len(columns), header_pos))
+            self.data_names = remap_data_names(self.data_names, self.rename)
 
-        # Keep track of the number of lines with # in it for data opening
-        # TODO: what do we do here?
-        # self.length = len(str_data)
+            if self.multi_sample_profiles:
+                columns = self.rename_sample_profiles(columns, self.data_names)
+            self.log.debug('Column Data found to be {} columns based on Line '
+                           '{}'.format(len(columns), header_pos))
+        else:
+            columns = standard_cols
 
         # Key value pairs are separate by some separator provided.
         data = {}

From a11c8413cafc5f25ef86cc045c64ed0710f98889 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Tue, 23 Jul 2024 14:29:59 -0600
Subject: [PATCH 16/30] 2020 V2 data, allow split header line logic. ALSO - use
 the non-gap-filled density because the gap filled density files break the
 logic as they don't show the profile at all

---
 scripts/upload/add_time_series_pits_2020.py | 35 +++++++++++----------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index ac53824..a23bb40 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -25,23 +25,19 @@ def main():
     db_name = 'localhost/snowex'
     # TODO: write script to clear out the timeseries pits
     #       * maybe delete all pits and then add them back in
-    # Version 1 DOI
-    # https://nsidc.org/data/snex20_ts_sp/versions/1
-    doi = "https://doi.org/10.5067/POT9E0FFUUD1"
+    # Version 2 DOI
+    # https://nsidc.org/data/snex20_ts_sp/versions/2
+    doi = "https://doi.org/10.5067/KZ43HVLZV6G4"
     debug = True
 
-    # TODO: new header of
-    #    Pit Comments
-    #    Parameter Codes
-
     # Point to the downloaded data from
-    data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.001/')
+    data_dir = abspath('../download/data/SNOWEX/SNEX20_TS_SP.002/')
     error_msg = []
 
     # Files to ignore
     ignore_files = [
-        "SNEX20_TS_SP_Summary_Environment_v01.csv",
-        "SNEX20_TS_SP_Summary_SWE_v01.csv"
+        "SNEX20_TS_SP_Summary_Environment_v02.csv",
+        "SNEX20_TS_SP_Summary_SWE_v02.csv"
     ]
 
     # Get all the date folders
@@ -54,7 +50,7 @@ def main():
         site_ids = []
         # Get the unique site ids for this date folder
         compiled = re.compile(
-            r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v01\.csv'
+            r'SNEX20_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v02\.csv'
         )
         for file_path in dt_folder_files:
             file_name = file_path.name
@@ -88,21 +84,25 @@ def main():
                 str(udf), f'*_{site_id}_*perimeterDepths*.csv'
             ))
 
-            # all non-gapped filled_density
-            gap_filled_density = glob.glob(join(
+            # Use no-gap-filled density for the sole reason that
+            # Gap filled density for profiles where the scale was broken
+            # are just an empty file after the headers. We should
+            # Record that Nan density was collected for the profile
+            density_files = glob.glob(join(
                 str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv'
             ))
 
             # Remove the site details from the total file list to get only the
             profiles = list(
                 set(filenames) - set(sites) - set(perimeter_depths) -
-                set(gap_filled_density)  # remove gap-filled denisty
+                set(density_files)  # remove non-gap-filled denisty
             )
 
             # Submit all profiles associated with pit at a time
             b = UploadProfileBatch(
                 filenames=profiles, debug=debug, doi=doi, in_timezone=tz,
-                db_name=db_name
+                db_name=db_name,
+                allow_split_lines=True  # Logic for split header lines
             )
             b.push()
             error_msg += b.errors
@@ -115,6 +115,8 @@ def main():
             sd.push()
             error_msg += sd.errors
 
+            # TODO: upload SWE file like the perimiter depths
+
             # Submit all perimeters as point data
             with db_session(
                 db_name, credentials='credentials.json'
@@ -122,7 +124,8 @@ def main():
                 for fp in perimeter_depths:
                     pcsv = PointDataCSV(
                         fp, doi=doi, debug=debug, depth_is_metadata=False,
-                        in_timezone=tz
+                        in_timezone=tz,
+                        allow_split_lines=True  # Logic for split header lines
                     )
                     pcsv.submit(session)
 

From 90a20a5832c5a219a2e2b2b01bffa14223cf327d Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Tue, 23 Jul 2024 15:07:17 -0600
Subject: [PATCH 17/30] get rid of spaces in flags

---
 snowex_db/upload.py | 49 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 37 insertions(+), 12 deletions(-)

diff --git a/snowex_db/upload.py b/snowex_db/upload.py
index ee342b7..ac97a08 100644
--- a/snowex_db/upload.py
+++ b/snowex_db/upload.py
@@ -25,6 +25,10 @@
 LOG = logging.getLogger("snowex_db.upload")
 
 
+class DataValidationError(ValueError):
+    pass
+
+
 class UploadProfileData:
     """
     Class for submitting a single profile. Since layers are uploaded layer by layer this allows for submitting them
@@ -50,6 +54,36 @@ def __init__(self, profile_filename, **kwargs):
         # Use the files creation date as the date accessed for NSIDC citation
         self.date_accessed = get_file_creation_date(self.filename)
 
+    def _handle_force(self, df, profile_filename):
+        if 'force' in df.columns:
+            # Convert depth from mm to cm
+            df['depth'] = df['depth'].div(10)
+            is_smp = True
+            # Make the data negative from snow surface
+            depth_fmt = 'surface_datum'
+
+            # SMP serial number and original filename for provenance to the comment
+            f = basename(profile_filename)
+            serial_no = f.split('SMP_')[-1][1:3]
+
+            df['comments'] = f"fname = {f}, " \
+                             f"serial no. = {serial_no}"
+
+        return df
+
+    def _handle_flags(self, df):
+
+        if "flags" in df.columns:
+            # Max length of the flags column
+            max_len = LayerData.flags.type.length
+            df["flags"] = df["flags"].str.replace(" ", "")
+            str_len = df["flags"].str.len()
+            if any(str_len > max_len):
+                raise DataValidationError(
+                    f"Flag column is too long"
+                )
+        return df
+
     def _read(self, profile_filename):
         """
         Read in a profile file. Managing the number of lines to skip and
@@ -74,19 +108,8 @@ def _read(self, profile_filename):
         # Special SMP specific tasks
         depth_fmt = 'snow_height'
         is_smp = False
-        if 'force' in df.columns:
-            # Convert depth from mm to cm
-            df['depth'] = df['depth'].div(10)
-            is_smp = True
-            # Make the data negative from snow surface
-            depth_fmt = 'surface_datum'
-
-            # SMP serial number and original filename for provenance to the comment
-            f = basename(profile_filename)
-            serial_no = f.split('SMP_')[-1][1:3]
 
-            df['comments'] = f"fname = {f}, " \
-                             f"serial no. = {serial_no}"
+        df = self._handle_force(df, profile_filename)
 
         if not df.empty:
             # Standardize all depth data
@@ -183,6 +206,8 @@ def build_data(self, data_name):
             df['comments'] = df['comments'].apply(
                 lambda x: x.strip(' ') if isinstance(x, str) else x)
 
+        self._handle_flags(df)
+
         return df
 
     def submit(self, session):

From 94ddad031508158e604d6e7f85c6ebaf107ea824 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 24 Jul 2024 13:56:35 -0600
Subject: [PATCH 18/30] Script for 2021 pits is working

---
 scripts/upload/add_time_series_pits_2021.py | 29 ++++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/scripts/upload/add_time_series_pits_2021.py b/scripts/upload/add_time_series_pits_2021.py
index 2faaf14..132ded6 100644
--- a/scripts/upload/add_time_series_pits_2021.py
+++ b/scripts/upload/add_time_series_pits_2021.py
@@ -28,6 +28,12 @@ def main():
     data_dir = abspath('../download/data/SNOWEX/SNEX21_TS_SP.001/')
     error_msg = []
 
+    # Files to ignore
+    ignore_files = [
+        "SNEX21_TS_SP_Summary_Environment_v01.csv",
+        "SNEX21_TS_SP_Summary_SWE_v01.csv"
+    ]
+
     # Get all the date folders
     unique_dt_olders = Path(
         data_dir
@@ -35,13 +41,17 @@ def main():
     for udf in unique_dt_olders:
         # get all the csvs in the folder
         dt_folder_files = list(udf.glob("*.csv"))
-        all_file_names = [f.name for f in dt_folder_files]
         site_ids = []
         # Get the unique site ids for this date folder
         compiled = re.compile(
             r'SNEX21_TS_SP_\d{8}_\d{4}_([a-zA-Z0-9]*)_data_.*_v01\.csv'
         )
-        for file_name in all_file_names:
+        for file_path in dt_folder_files:
+            file_name = file_path.name
+            if file_name in ignore_files:
+                print(f"Skipping {file_name}")
+                continue
+
             match = compiled.match(file_name)
             if match:
                 code = match.group(1)
@@ -64,21 +74,25 @@ def main():
                 str(udf), f'*_{site_id}_*siteDetails*.csv'
             ))
 
-            # all non-gapped filled_density
-            gap_filled_density = glob.glob(join(
+            # Use no-gap-filled density for the sole reason that
+            # Gap filled density for profiles where the scale was broken
+            # are just an empty file after the headers. We should
+            # Record that Nan density was collected for the profile
+            density_files = glob.glob(join(
                 str(udf), f'*_{site_id}_*_gapFilledDensity_*.csv'
             ))
 
             # Remove the site details from the total file list to get only the
             profiles = list(
                 set(filenames) - set(sites) -
-                set(gap_filled_density)  # remove gap-filled denisty
+                set(density_files)  # remove non-gap-filled denisty
             )
 
             # Submit all profiles associated with pit at a time
             b = UploadProfileBatch(
                 filenames=profiles, debug=debug, doi=doi, in_timezone=tz,
-                db_name=db_name
+                db_name=db_name,
+                allow_split_lines=True  # Logic for split header lines
             )
             b.push()
             error_msg += b.errors
@@ -86,7 +100,8 @@ def main():
             # Upload the site details
             sd = UploadSiteDetailsBatch(
                 filenames=sites, debug=debug, doi=doi, in_timezone=tz,
-                db_name=db_name
+                db_name=db_name,
+                allow_split_lines=True  # Logic for split header lines
             )
             sd.push()
             error_msg += sd.errors

From dd1547fee5f6bd3f80b416c3cc3adb59eaa3cee4 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 24 Jul 2024 16:05:54 -0600
Subject: [PATCH 19/30] start working on SWE files for pits

---
 scripts/upload/add_pits_bulk_properties.py  | 67 +++++++++++++++++++++
 scripts/upload/add_time_series_pits_2020.py |  3 +-
 2 files changed, 68 insertions(+), 2 deletions(-)
 create mode 100644 scripts/upload/add_pits_bulk_properties.py

diff --git a/scripts/upload/add_pits_bulk_properties.py b/scripts/upload/add_pits_bulk_properties.py
new file mode 100644
index 0000000..c261427
--- /dev/null
+++ b/scripts/upload/add_pits_bulk_properties.py
@@ -0,0 +1,67 @@
+"""
+Script to upload the Snowex Time Series pits
+"""
+
+import glob
+import re
+from os.path import abspath, join
+from pathlib import Path
+
+import pandas as pd
+
+from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch
+from snowex_db.upload import PointDataCSV
+from snowex_db import db_session
+
+
+tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
+          'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
+          }
+
+
+def main():
+    """
+    Add bulk SWE, Depth, Density for 2020 and 2021 timeseires pits
+    """
+    db_name = 'localhost/test'
+    debug = True
+
+    # Point to the downloaded data from
+    data_dir = abspath('../download/data/SNOWEX/')
+    error_msg = []
+
+    path_details = [
+        {
+            "DOI": "https://doi.org/10.5067/KZ43HVLZV6G4",
+            "path": "SNEX20_TS_SP.002/2019.10.24/SNEX20_TS_SP_Summary_SWE_v02.csv"
+        },
+        {
+            "DOI": "https://doi.org/10.5067/QIANJYJGRWOV",
+            "path": "SNEX21_TS_SP.001/2020.11.16/SNEX21_TS_SP_Summary_SWE_v01.csv"
+        }
+    ]
+    for info in path_details:
+        doi = info["DOI"]
+        file_path = join(data_dir, info["path"])
+        # Read csv and dump new one without the extra header lines
+        df = pd.read_csv(
+            file_path,
+            skiprows=list(range(32)) + [33]
+        )
+        new_name = file_path.replace(".csv", "_modified.csv")
+        df.to_csv(new_name, index=False)
+
+        # Submit SWE file data as point data
+        with db_session(
+            db_name, credentials='credentials.json'
+        ) as (session, engine):
+            # TODO: tz based on points
+            pcsv = PointDataCSV(
+                new_name, doi=doi, debug=debug, depth_is_metadata=False,
+                in_timezone="US/Mountain",
+            )
+            pcsv.submit(session)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/upload/add_time_series_pits_2020.py b/scripts/upload/add_time_series_pits_2020.py
index a23bb40..edb13b6 100644
--- a/scripts/upload/add_time_series_pits_2020.py
+++ b/scripts/upload/add_time_series_pits_2020.py
@@ -19,8 +19,7 @@
 
 def main():
     """
-    Currenltly based on the preliminary downloaded zip which has not been submitted yet.
-    Folder name is SNEX20_TS_SP_preliminary_v4
+    Add 2020 timeseries pits
     """
     db_name = 'localhost/snowex'
     # TODO: write script to clear out the timeseries pits

From afaaa5b4b480fadcf47ca0fd56af4f25a6c6166d Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Thu, 25 Jul 2024 11:44:49 -0600
Subject: [PATCH 20/30] move towards row based SRID and timezone ability

---
 scripts/upload/add_pits_bulk_properties.py | 16 +++++-
 snowex_db/interpretation.py                |  1 +
 snowex_db/metadata.py                      | 19 +++++--
 snowex_db/upload.py                        | 58 +++++++++++++++++-----
 4 files changed, 75 insertions(+), 19 deletions(-)

diff --git a/scripts/upload/add_pits_bulk_properties.py b/scripts/upload/add_pits_bulk_properties.py
index c261427..4a5c6bd 100644
--- a/scripts/upload/add_pits_bulk_properties.py
+++ b/scripts/upload/add_pits_bulk_properties.py
@@ -49,6 +49,16 @@ def main():
             skiprows=list(range(32)) + [33]
         )
         new_name = file_path.replace(".csv", "_modified.csv")
+        # Filter to columns we want (density, swe, etc)
+        columns = [
+            'Location', 'Site', 'PitID', 'Date/Local Standard Time', 'UTM Zone',
+            'Easting (m)', 'Northing (m)', 'Latitude (deg)', 'Longitude (deg)',
+            # 'Density A Mean (kg/m^3)', 'Density B Mean (kg/m^3)',
+            'Density Mean (kg/m^3)',
+            # 'SWE A (mm)', 'SWE B (mm)',
+            'SWE (mm)', 'HS (cm)', 'Flag'
+        ]
+        df = df.loc[:, columns]
         df.to_csv(new_name, index=False)
 
         # Submit SWE file data as point data
@@ -57,8 +67,10 @@ def main():
         ) as (session, engine):
             # TODO: tz based on points
             pcsv = PointDataCSV(
-                new_name, doi=doi, debug=debug, depth_is_metadata=False,
-                in_timezone="US/Mountain",
+                new_name, doi=doi, debug=debug,
+                depth_is_metadata=False,
+                row_based_crs=True,
+                row_based_timezone=True
             )
             pcsv.submit(session)
 
diff --git a/snowex_db/interpretation.py b/snowex_db/interpretation.py
index d8de691..743d043 100644
--- a/snowex_db/interpretation.py
+++ b/snowex_db/interpretation.py
@@ -216,6 +216,7 @@ def add_date_time_keys(data, in_timezone=None, out_timezone='UTC'):
         raise ValueError("We did not recieve a valid in_timezone")
 
     # Look for a single header entry containing date and time.
+    # This would handle key of 'datetime'
     for k in data.keys():
         kl = k.lower()
         if 'date' in kl and 'time' in kl:
diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py
index 387eece..17f9e88 100644
--- a/snowex_db/metadata.py
+++ b/snowex_db/metadata.py
@@ -326,6 +326,7 @@ class DataHeader(object):
               'measurement_tool': 'instrument',
               'avgdensity': 'density',
               'avg_density': 'density',
+              'density_mean': 'density',
               'dielectric_constant': 'permittivity',
               'flag': 'flags',
               'hs': 'depth',
@@ -374,12 +375,20 @@ def __init__(self, filename, **kwargs):
         self.extra_header = assign_default_kwargs(
             self, kwargs, self.defaults, leave=['epsg'])
 
-        # Validate that an intentionally good in timezone was given
-        in_timezone = kwargs.get('in_timezone')
-        if in_timezone is None or "local" in in_timezone.lower():
-            raise ValueError("A valid in_timezone was not provided")
+        # Use a row based timezone
+        if kwargs.get("row_based_timezone", False):
+            if kwargs.get('in_timezone'):
+                raise ValueError(
+                    "Cannot have row based and file based timezone"
+                )
+            self.in_timezone = None
         else:
-            self.in_timezone = in_timezone
+            # Validate that an intentionally good in timezone was given
+            in_timezone = kwargs.get('in_timezone')
+            if in_timezone is None or "local" in in_timezone.lower():
+                raise ValueError("A valid in_timezone was not provided")
+            else:
+                self.in_timezone = in_timezone
 
         self.log.info('Interpreting metadata in {}'.format(filename))
 
diff --git a/snowex_db/upload.py b/snowex_db/upload.py
index ac97a08..3f7b75a 100644
--- a/snowex_db/upload.py
+++ b/snowex_db/upload.py
@@ -11,6 +11,7 @@
 from os import makedirs, remove
 import boto3
 import logging
+from timezonefinder import TimezoneFinder
 from snowexsql.db import get_table_attributes
 from snowexsql.data import ImageData, LayerData, PointData
 
@@ -271,11 +272,22 @@ def __init__(self, filename, **kwargs):
         # Assign defaults for this class
         self.kwargs = assign_default_kwargs(self, kwargs, self.defaults)
 
+        # Assign if details are row based (generally for the SWE files)
+        self._row_based_crs = self.kwargs.get("row_based_crs", False)
+        self._row_based_tz = self.kwargs.get("row_based_timezone", False)
+        if self._row_based_tz:
+            in_timezone = None
+        else:
+            in_timezone = kwargs['in_timezone']
+
         # Use the files creation date as the date accessed for NSIDC citation
         self.date_accessed = get_file_creation_date(filename)
 
         # NOTE: This will error if in_timezone is not provided
-        self.hdr = DataHeader(filename, in_timezone=kwargs['in_timezone'], **self.kwargs)
+        self.hdr = DataHeader(
+            filename, in_timezone=in_timezone,
+            **self.kwargs
+        )
         self.df = self._read(filename)
 
         # Performance tracking
@@ -307,9 +319,21 @@ def _read(self, filename):
             df['date'] = self.hdr.info['date']
             df['time'] = self.hdr.info['time']
         else:
-            # date/time was provided in the data
-            df = df.apply(lambda data: add_date_time_keys(
-                data, in_timezone=self.in_timezone), axis=1)
+            # date/time was provided in the
+            if self._row_based_tz:
+                # row based in timezone
+                df = df.apply(
+                    lambda data: add_date_time_keys(
+                        data,
+                        in_timezone=TimezoneFinder().timezone_at(
+                            lng=data['longitude'], lat=data['latitude']
+                        )
+                    ), axis=1
+                )
+            else:
+                # file based timezone
+                df = df.apply(lambda data: add_date_time_keys(
+                    data, in_timezone=self.in_timezone), axis=1)
 
         # 1. Only submit valid columns to the DB
         self.log.info('Adding valid keyword arguments to metadata...')
@@ -327,22 +351,33 @@ def _read(self, filename):
                 df[k] = self.hdr.info[k]
 
         # Add geometry
-        df['geom'] = df.apply(lambda row: WKTElement(
-            'POINT({} {})'.format(
-                row['easting'],
-                row['northing']),
+        if self._row_based_crs:
+            # EPSG at row level here (EPSG:269...)
+            df['geom'] = df.apply(lambda row: WKTElement(
+                'POINT({} {})'.format(
+                    row['easting'],
+                    row['northing']),
+                srid=f"EPSG:269{row['utm_zone']}"), axis=1)
+        else:
+            # EPSG at the file level
+            df['geom'] = df.apply(lambda row: WKTElement(
+                'POINT({} {})'.format(
+                    row['easting'],
+                    row['northing']),
                 srid=self.hdr.info['epsg']), axis=1)
 
-
         # 2. Add all kwargs that were valid
         for v in valid:
             if v in self.kwargs.keys():
                 df[v] = self.kwargs[v]
 
-        # Add a camera id to the description if camera is in the cols (For camera derived snow depths)
+        # Add a camera id to the description if camera is in the cols
+        # (For camera derived snow depths)
         if 'camera' in df.columns:
             self.log.info('Adding camera id to equipment column...')
-            df['equipment'] = df.apply(lambda row: f'camera id = {row["camera"]}', axis=1)
+            df['equipment'] = df.apply(
+                lambda row: f'camera id = {row["camera"]}', axis=1
+            )
 
         # 3. Remove columns that are not valid
         drops = \
@@ -384,7 +419,6 @@ def submit(self, session):
             df = self.build_data(pt)
             self.log.info('Submitting {:,} points of {} to the database...'.format(
                 len(df.index), pt))
-
             for i, row in df.iterrows():
                 d = PointData(**row)
                 objects.append(d)

From 4376a41bd96fe89430d5d02b8e4436b8ba649613 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Thu, 25 Jul 2024 14:44:15 -0600
Subject: [PATCH 21/30] bulk swe property upload script working

---
 requirements.txt                           |  1 +
 scripts/upload/add_pits_bulk_properties.py | 11 +----------
 snowex_db/upload.py                        |  2 +-
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 0c5d68e..29e0d70 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,5 @@ coloredlogs>=14.0
 progressbar2>=3.51.3
 rasterio>=1.1.5
 boto3>=1.23.7,<1.24
+timezonefinder>=6.0,<7.0
 insitupy==0.1.0
diff --git a/scripts/upload/add_pits_bulk_properties.py b/scripts/upload/add_pits_bulk_properties.py
index 4a5c6bd..f2c6290 100644
--- a/scripts/upload/add_pits_bulk_properties.py
+++ b/scripts/upload/add_pits_bulk_properties.py
@@ -9,21 +9,15 @@
 
 import pandas as pd
 
-from snowex_db.batch import UploadProfileBatch, UploadSiteDetailsBatch
 from snowex_db.upload import PointDataCSV
 from snowex_db import db_session
 
 
-tz_map = {'US/Pacific': ['CA', 'NV', 'WA'],
-          'US/Mountain': ['CO', 'ID', 'NM', 'UT', 'MT'],
-          }
-
-
 def main():
     """
     Add bulk SWE, Depth, Density for 2020 and 2021 timeseires pits
     """
-    db_name = 'localhost/test'
+    db_name = 'localhost/snowex'
     debug = True
 
     # Point to the downloaded data from
@@ -53,9 +47,7 @@ def main():
         columns = [
             'Location', 'Site', 'PitID', 'Date/Local Standard Time', 'UTM Zone',
             'Easting (m)', 'Northing (m)', 'Latitude (deg)', 'Longitude (deg)',
-            # 'Density A Mean (kg/m^3)', 'Density B Mean (kg/m^3)',
             'Density Mean (kg/m^3)',
-            # 'SWE A (mm)', 'SWE B (mm)',
             'SWE (mm)', 'HS (cm)', 'Flag'
         ]
         df = df.loc[:, columns]
@@ -65,7 +57,6 @@ def main():
         with db_session(
             db_name, credentials='credentials.json'
         ) as (session, engine):
-            # TODO: tz based on points
             pcsv = PointDataCSV(
                 new_name, doi=doi, debug=debug,
                 depth_is_metadata=False,
diff --git a/snowex_db/upload.py b/snowex_db/upload.py
index 3f7b75a..43097c0 100644
--- a/snowex_db/upload.py
+++ b/snowex_db/upload.py
@@ -357,7 +357,7 @@ def _read(self, filename):
                 'POINT({} {})'.format(
                     row['easting'],
                     row['northing']),
-                srid=f"EPSG:269{row['utm_zone']}"), axis=1)
+                srid=int(row['epsg'])), axis=1)
         else:
             # EPSG at the file level
             df['geom'] = df.apply(lambda row: WKTElement(

From 295493ae626a7714e029c150fdc0f3af0aff9946 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Mon, 29 Jul 2024 17:05:17 -0600
Subject: [PATCH 22/30] start script to add in met timeseries data

---
 scripts/download/nsidc_sources.txt            |   1 +
 scripts/upload/add_met_timeseries.py          | 117 ++++++++++++++++++
 ...=> add_timeseries_pits_bulk_properties.py} |   0
 snowex_db/metadata.py                         |  14 ++-
 4 files changed, 126 insertions(+), 6 deletions(-)
 create mode 100644 scripts/upload/add_met_timeseries.py
 rename scripts/upload/{add_pits_bulk_properties.py => add_timeseries_pits_bulk_properties.py} (100%)

diff --git a/scripts/download/nsidc_sources.txt b/scripts/download/nsidc_sources.txt
index 43639ea..3b67c5b 100644
--- a/scripts/download/nsidc_sources.txt
+++ b/scripts/download/nsidc_sources.txt
@@ -8,3 +8,4 @@ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_UNM_GPR.001/2020.01.28/SNEX20_UNM_G
 https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD_TLI.001/2019.09.29/SNEX20_SD_TLI_clean.csv
 https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_TS_SP.002/
 https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX21_TS_SP.001/
+https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX_Met.001/
diff --git a/scripts/upload/add_met_timeseries.py b/scripts/upload/add_met_timeseries.py
new file mode 100644
index 0000000..1b19e29
--- /dev/null
+++ b/scripts/upload/add_met_timeseries.py
@@ -0,0 +1,117 @@
+"""
+Uploads SnowEx temporary met stations to the database
+
+Source: https://nsidc.org/data/snex_met/versions/1
+User guide: https://nsidc.org/sites/default/files/documents/user-guide/snex_met-v001-userguide.pdf
+
+1. Data must be downloaded via sh ../download/download_nsidc.sh
+2A. python run.py # To run all together all at once
+2B. python add_met_timeseries.py # To run individually
+"""
+
+import glob
+import time
+from os.path import abspath, join
+
+import pandas as pd
+from snowexsql.db import get_db
+from snowex_db.upload import *
+from snowex_db import db_session
+
+
+def main():
+    # Site name
+    start = time.time()
+    site_name = 'Grand Mesa'
+    timezone = 'MST'
+
+    # Read in the Grand Mesa Snow Depths Data
+    base = abspath(join('../download/data/SNOWEX/SNEX_Met.001/'))
+
+    # Start the Database
+    db_name = 'localhost/test'
+
+    csvs = glob.glob(join(base, '*/*.csv'))
+
+    # Location mapping from the user guide
+    location_mapping = {
+        "GMSP": [39.05084, -108.06144],
+        "LSOS": [39.05225, -108.09792],
+        "ME": [39.10358, -107.88383],
+        "MM": [39.03954, -107.94174],
+        "MW": [39.03388, -108.21399],
+    }
+
+    variable_unit_map = {
+        "RH_10ft": "percent",
+        "RH_20ft": "percent",
+        "BP_kPa_Avg": "kPa",
+        "AirTC_20ft_Avg": "degrees Celcius",
+        "AirTC_10ft_Avg": "degrees Celcius",
+        "WSms_20ft_Avg": "m/s",
+        "WSms_10ft_Avg": "m/s",
+        "WindDir_10ft_D1_WVT": "degrees",
+        "WindDir_20ft_D1_WVT": "degrees",
+        "SUp_Avg": "W/m^2",
+        "SDn_Avg": "W/m^2",
+        "LUpCo_Avg": "W/m^2",
+        "LDnCo_Avg": "W/m^2",
+        "SM_5cm_Avg": None,
+        "SM_20cm_Avg": None,
+        "SM_50cm_Avg": None,
+        "TC_5cm_Avg": "degrees Celcius",
+        "TC_20cm_Avg": "degrees Celcius",
+        "TC_50cm_Avg": "degrees Celcius",
+        # "DistanceSensToGnd(m)",
+        "SnowDepthFilter(m)": "m"
+    }
+
+    errors = 0
+    with db_session(
+            db_name, credentials='credentials.json'
+    ) as (session, engine):
+
+        for f in csvs:
+            # find the point relative to the file
+            point_id = f.split("Met_")[-1].split("_final")[0]
+            # get location info from the point id
+            lat, lon = location_mapping[point_id]
+
+            # Read in the file
+            df = pd.read_csv(f)
+            # add location info
+            df["latitude"] = [lat] * len(df)
+            df["longitude"] = [lon] * len(df)
+            df = df.set_index("TIMESTAMP")
+            # TODO: what do we do with site_id? is MM the site id?
+            #   we can add it as "site" to the df if it is
+            df["site"] = [point_id] * len(df)
+
+            # TODO: how do we handle to different heights?
+            #  use layer data?
+
+            # Split variables into their own files
+            for v, unit in variable_unit_map.items():
+                df_cut = df.loc[
+                    :, [v, "latitude", "longitude", "site"]
+                ]
+
+                new_f = f.replace(".csv", f"local_mod_{v}.csv")
+                df_cut.to_csv(new_f, index_label="datetime")
+                csv = PointDataCSV(
+                    new_f,
+                    depth_is_metadata=False,
+                    units=unit,
+                    site_name=site_name,
+                    in_timezone=timezone,
+                    epsg=26912,
+                    doi="https://doi.org/10.5067/497NQVJ0CBEX")
+
+                csv.submit(session)
+                errors += len(csv.errors)
+
+    return errors
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/upload/add_pits_bulk_properties.py b/scripts/upload/add_timeseries_pits_bulk_properties.py
similarity index 100%
rename from scripts/upload/add_pits_bulk_properties.py
rename to scripts/upload/add_timeseries_pits_bulk_properties.py
diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py
index 17f9e88..a1ea4d0 100644
--- a/snowex_db/metadata.py
+++ b/snowex_db/metadata.py
@@ -338,11 +338,13 @@ class DataHeader(object):
               }
 
     # Known possible profile types anything not in here will throw an error
-    available_data_names = ['density', 'permittivity', 'lwc_vol', 'temperature',
-                            'force', 'reflectance', 'sample_signal',
-                            'specific_surface_area', 'equivalent_diameter',
-                            'grain_size', 'hand_hardness', 'grain_type',
-                            'manual_wetness', 'two_way_travel', 'depth', 'swe']
+    available_data_names = [
+        'density', 'permittivity', 'lwc_vol', 'temperature',
+        'force', 'reflectance', 'sample_signal',
+        'specific_surface_area', 'equivalent_diameter',
+        'grain_size', 'hand_hardness', 'grain_type',
+        'manual_wetness', 'two_way_travel', 'depth', 'swe',
+    ]
 
     # Defaults to keywords arguments
     defaults = {
@@ -406,7 +408,7 @@ def __init__(self, filename, **kwargs):
 
     def submit(self, session):
         """
-        Submit meta data to the database as site info, Do not use on profile
+        Submit metadata to the database as site info, Do not use on profile
         headers. Only use on site_details files.
 
         Args:

From 55b6326f767894acb793b3e274191238e1301a0e Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Tue, 30 Jul 2024 15:47:34 -0600
Subject: [PATCH 23/30] Script working to upload met data for GM to the
 database

---
 requirements.txt                     |   1 +
 scripts/upload/add_met_timeseries.py | 127 +++++++++++++++++----------
 snowex_db/metadata.py                |  18 +++-
 3 files changed, 99 insertions(+), 47 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 29e0d70..8238ad8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,3 +9,4 @@ rasterio>=1.1.5
 boto3>=1.23.7,<1.24
 timezonefinder>=6.0,<7.0
 insitupy==0.1.0
+metloom==0.6.1
diff --git a/scripts/upload/add_met_timeseries.py b/scripts/upload/add_met_timeseries.py
index 1b19e29..b07e1cc 100644
--- a/scripts/upload/add_met_timeseries.py
+++ b/scripts/upload/add_met_timeseries.py
@@ -12,6 +12,7 @@
 import glob
 import time
 from os.path import abspath, join
+from metloom.pointdata.snowex import SnowExMetInfo
 
 import pandas as pd
 from snowexsql.db import get_db
@@ -31,39 +32,77 @@ def main():
     # Start the Database
     db_name = 'localhost/test'
 
-    csvs = glob.glob(join(base, '*/*.csv'))
-
-    # Location mapping from the user guide
-    location_mapping = {
-        "GMSP": [39.05084, -108.06144],
-        "LSOS": [39.05225, -108.09792],
-        "ME": [39.10358, -107.88383],
-        "MM": [39.03954, -107.94174],
-        "MW": [39.03388, -108.21399],
-    }
-
+    # Variables we will use
     variable_unit_map = {
-        "RH_10ft": "percent",
-        "RH_20ft": "percent",
-        "BP_kPa_Avg": "kPa",
-        "AirTC_20ft_Avg": "degrees Celcius",
-        "AirTC_10ft_Avg": "degrees Celcius",
-        "WSms_20ft_Avg": "m/s",
-        "WSms_10ft_Avg": "m/s",
-        "WindDir_10ft_D1_WVT": "degrees",
-        "WindDir_20ft_D1_WVT": "degrees",
-        "SUp_Avg": "W/m^2",
-        "SDn_Avg": "W/m^2",
-        "LUpCo_Avg": "W/m^2",
-        "LDnCo_Avg": "W/m^2",
-        "SM_5cm_Avg": None,
-        "SM_20cm_Avg": None,
-        "SM_50cm_Avg": None,
-        "TC_5cm_Avg": "degrees Celcius",
-        "TC_20cm_Avg": "degrees Celcius",
-        "TC_50cm_Avg": "degrees Celcius",
+        "RH_10ft": {
+            "units": "percent",
+            "notes": "Relative humidity measured at 10 ft tower level",
+            "instrument": "Campbell Scientific HC2S3"
+        },
+        # "RH_20ft": "percent",
+        "BP_kPa_Avg": {
+            "units": "kPa",
+            "notes": "Barometric pressure",
+            "instrument": "Campbell Scientific CS106",
+        },
+        # "AirTC_20ft_Avg": "degrees Celcius",
+        "AirTC_10ft_Avg": {
+            "units": "degrees Celcius",
+            "notes": "Air temperature measured at 10 ft tower level",
+            "instrument": "Campbell Scientific HC2S3"
+        },
+        # "WSms_20ft_Avg": "m/s",
+        "WSms_10ft_Avg": {
+            "units": "m/s",
+            "notes": "Vector mean wind speed measured at 10 ft tower level",
+            "instrument": "R.M. Young 05103",
+        },
+        "WindDir_10ft_D1_WVT": {
+            "units": "degrees",
+            "notes": "Vector mean wind direction measured at 10 ft tower level",
+            "instrument": "R.M. Young 05103",
+        },
+        # "WindDir_20ft_D1_WVT": "degrees",
+        "SUp_Avg": {
+            "units": "W/m^2",
+            "notes": "Shortwave radiation measured with upward-facing sensor",
+            "instrument": "Kipp and Zonnen CNR4",
+        },
+        "SDn_Avg": {
+            "units": "W/m^2",
+            "notes": "Shortwave radiation measured with downward-facing sensor",
+            "instrument": "Kipp and Zonnen CNR4",
+        },
+        "LUpCo_Avg": {
+            "units": "W/m^2",
+            "notes": "Longwave radiation measured with upward-facing sensor",
+            "instrument": "Kipp and Zonnen CNR4",
+        },
+        "LDnCo_Avg": {
+            "units": "W/m^2",
+            "notes": "Longwave radiation measured with downward-facing sensor",
+            "instrument": "Kipp and Zonnen CNR4",
+        },
+        # "SM_5cm_Avg": None,
+        "SM_20cm_Avg": {
+            "units": None,
+            "notes": "Soil moisture measured at 10 cm below the soil",
+            "instrument": "Stevens Water Hydraprobe II",
+        },
+        # "SM_50cm_Avg": None,
+        # "TC_5cm_Avg": "degrees Celcius",
+        "TC_20cm_Avg": {
+            "units": "degrees Celcius",
+            "notes": "Soil temperature measured at 10 cm below the soil",
+            "instrument": "Stevens Water Hydraprobe II",
+        },
+        # "TC_50cm_Avg": "degrees Celcius",
         # "DistanceSensToGnd(m)",
-        "SnowDepthFilter(m)": "m"
+        "SnowDepthFilter(m)": {
+            "units": "m",
+            "notes": "Temperature corrected, derived snow surface height (filtered)",
+            "instrument": "Campbell Scientific SR50A",
+        },
     }
 
     errors = 0
@@ -71,30 +110,26 @@ def main():
             db_name, credentials='credentials.json'
     ) as (session, engine):
 
-        for f in csvs:
-            # find the point relative to the file
-            point_id = f.split("Met_")[-1].split("_final")[0]
-            # get location info from the point id
-            lat, lon = location_mapping[point_id]
-
+        for stn_obj in SnowExMetInfo:
+            f = join(base, stn_obj.path)
             # Read in the file
             df = pd.read_csv(f)
             # add location info
-            df["latitude"] = [lat] * len(df)
-            df["longitude"] = [lon] * len(df)
+            df["latitude"] = [stn_obj.latitude] * len(df)
+            df["longitude"] = [stn_obj.longitude] * len(df)
             df = df.set_index("TIMESTAMP")
-            # TODO: what do we do with site_id? is MM the site id?
-            #   we can add it as "site" to the df if it is
-            df["site"] = [point_id] * len(df)
-
-            # TODO: how do we handle to different heights?
-            #  use layer data?
+            # SITE ID - use station id
+            df["site"] = [stn_obj.station_id] * len(df)
+            df["observer"] = ["P. Houser"] * len(df)
 
             # Split variables into their own files
-            for v, unit in variable_unit_map.items():
+            for v, info in variable_unit_map.items():
+                unit = info["units"]
+
                 df_cut = df.loc[
                     :, [v, "latitude", "longitude", "site"]
                 ]
+                df_cut["instrument"] = [info["instrument"]] * len(df_cut)
 
                 new_f = f.replace(".csv", f"local_mod_{v}.csv")
                 df_cut.to_csv(new_f, index_label="datetime")
diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py
index a1ea4d0..8b51906 100644
--- a/snowex_db/metadata.py
+++ b/snowex_db/metadata.py
@@ -334,7 +334,19 @@ class DataHeader(object):
               'depth_m': 'depth',
               'date_dd_mmm_yy': 'date',
               'time_gmt': 'time',
-              'elev_m': 'elevation'
+              'elev_m': 'elevation',
+              'rh_10ft': 'relative_humidity_10ft',
+              'bp_kpa_avg': 'barometric_pressure',
+              'airtc_10ft_avg': 'air_temperature_10ft',
+              'wsms_10ft_avg': 'wind_speed_10ft',
+              'winddir_10ft_d1_wvt': 'wind_direction_10ft',
+              'sup_avg': 'incoming_shortwave',
+              'sdn_avg': 'outgoing_shortwave',
+              'lupco_avg': 'incoming_longwave',
+              'ldnco_avg': 'outgoing_longwave',
+              'soil_moisture_20cm': 'soil_moisture_20cm',
+              'soil_temperature_20cm': 'soil_temperature_20cm',
+              'snowdepthfilter(m)': 'depth'
               }
 
     # Known possible profile types anything not in here will throw an error
@@ -344,6 +356,10 @@ class DataHeader(object):
         'specific_surface_area', 'equivalent_diameter',
         'grain_size', 'hand_hardness', 'grain_type',
         'manual_wetness', 'two_way_travel', 'depth', 'swe',
+        'relative_humidity_10ft', 'barometric_pressure',
+        'air_temperature_10ft', 'wind_speed_10ft', 'wind_direction_10ft',
+        'incoming_shortwave', 'outgoing_shortwave', 'incoming_longwave',
+        'outgoing_longwave', 'soil_moisture_20cm', 'soil_temperature_20cm'
     ]
 
     # Defaults to keywords arguments

From 5eb92d4d2120ced5df4cc01751854f00866e8294 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Tue, 30 Jul 2024 16:10:17 -0600
Subject: [PATCH 24/30] Issue #20 - bump insitupy for new variables

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 8238ad8..4a652f3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,5 +8,5 @@ progressbar2>=3.51.3
 rasterio>=1.1.5
 boto3>=1.23.7,<1.24
 timezonefinder>=6.0,<7.0
-insitupy==0.1.0
+insitupy==0.1.1
 metloom==0.6.1

From 690873fe8686e4cfbce26f4f8cfd8ef7a2e7a63e Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Tue, 30 Jul 2024 17:00:34 -0600
Subject: [PATCH 25/30] adjustments to variable mapping

---
 snowex_db/metadata.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py
index 8b51906..49d0141 100644
--- a/snowex_db/metadata.py
+++ b/snowex_db/metadata.py
@@ -337,15 +337,15 @@ class DataHeader(object):
               'elev_m': 'elevation',
               'rh_10ft': 'relative_humidity_10ft',
               'bp_kpa_avg': 'barometric_pressure',
-              'airtc_10ft_avg': 'air_temperature_10ft',
+              'airtc_10ft_avg': 'air_temp_10ft',
               'wsms_10ft_avg': 'wind_speed_10ft',
               'winddir_10ft_d1_wvt': 'wind_direction_10ft',
               'sup_avg': 'incoming_shortwave',
               'sdn_avg': 'outgoing_shortwave',
               'lupco_avg': 'incoming_longwave',
               'ldnco_avg': 'outgoing_longwave',
-              'soil_moisture_20cm': 'soil_moisture_20cm',
-              'soil_temperature_20cm': 'soil_temperature_20cm',
+              'sm_20cm_avg': 'soil_moisture_20cm',
+              'tc_20cm_avg': 'soil_temp_20cm',
               'snowdepthfilter(m)': 'depth'
               }
 
@@ -357,9 +357,9 @@ class DataHeader(object):
         'grain_size', 'hand_hardness', 'grain_type',
         'manual_wetness', 'two_way_travel', 'depth', 'swe',
         'relative_humidity_10ft', 'barometric_pressure',
-        'air_temperature_10ft', 'wind_speed_10ft', 'wind_direction_10ft',
+        'air_temp_10ft', 'wind_speed_10ft', 'wind_direction_10ft',
         'incoming_shortwave', 'outgoing_shortwave', 'incoming_longwave',
-        'outgoing_longwave', 'soil_moisture_20cm', 'soil_temperature_20cm'
+        'outgoing_longwave', 'soil_moisture_20cm', 'soil_temp_20cm'
     ]
 
     # Defaults to keywords arguments
@@ -389,6 +389,7 @@ def __init__(self, filename, **kwargs):
             kwargs: keyword values to pass to the database as metadata
         """
         self.log = get_logger(__name__)
+        self._fname = filename
 
         self.extra_header = assign_default_kwargs(
             self, kwargs, self.defaults, leave=['epsg'])

From 2d38b0a9cc46b616a9f35e2ed1e0bab6f1435f29 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 31 Jul 2024 10:58:58 -0600
Subject: [PATCH 26/30] bump insitupy for more depth mappings

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 4a652f3..bece44e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,5 +8,5 @@ progressbar2>=3.51.3
 rasterio>=1.1.5
 boto3>=1.23.7,<1.24
 timezonefinder>=6.0,<7.0
-insitupy==0.1.1
+insitupy==0.1.2
 metloom==0.6.1

From 356bfb35bdc71d24c2e18508d6cdbb441f07a144 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Wed, 31 Jul 2024 11:08:10 -0600
Subject: [PATCH 27/30] Remove Python 3.7 compatability

---
 .github/workflows/main.yml | 2 +-
 setup.py                   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index c457759..2667915 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9]
+        python-version: [3.8, 3.9, "3.10"]
 
     services:
 
diff --git a/setup.py b/setup.py
index 0713dd3..44171fc 100644
--- a/setup.py
+++ b/setup.py
@@ -18,14 +18,14 @@
 
 setup(
     author="Micah Johnson",
-    python_requires='>=3.7',
+    python_requires='>=3.8',
     classifiers=[
         'Development Status :: 2 - Pre-Alpha',
         'Intended Audience :: Developers',
         'Natural Language :: English',
-        'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
         'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
 
     ],
     description="Software for building and managing a SnowEx PostGIS database",

From 47ff2b5bfb11bf924a3787ec997323422b49e341 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Mon, 5 Aug 2024 14:03:42 -0600
Subject: [PATCH 28/30] fixing reqs in build

---
 docs/requirements.txt | 1 +
 requirements_dev.txt  | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index 4d7fb78..d86dd2f 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -6,3 +6,4 @@ pandoc==1.0.2
 sphinxcontrib-apidoc==0.3.0
 ipython==7.31.1
 MarkupSafe<2.1.0
+jupyterlab==2.2.10
diff --git a/requirements_dev.txt b/requirements_dev.txt
index faafada..b4b33eb 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -8,5 +8,4 @@ coverage==4.5.4
 twine==1.14.0
 pytest==6.2.3
 pytest-runner==5.1
-jupyterlab==2.2.10
 moto==3.1.11

From bba3285eba32ed756ebfc4c15fbb44380bde6f8c Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Mon, 5 Aug 2024 14:40:18 -0600
Subject: [PATCH 29/30] Fixing tests and build. SMP profile depths were not
 inverted

---
 snowex_db/metadata.py    | 103 +++++++++++++++++++++++++++++++++++++--
 snowex_db/upload.py      |   9 +++-
 tests/test_batch.py      |   5 +-
 tests/test_projection.py |   2 +-
 4 files changed, 111 insertions(+), 8 deletions(-)

diff --git a/snowex_db/metadata.py b/snowex_db/metadata.py
index 49d0141..c1a812e 100644
--- a/snowex_db/metadata.py
+++ b/snowex_db/metadata.py
@@ -6,6 +6,8 @@
 from os.path import basename
 import pandas as pd
 from insitupy.campaigns.campaign import SnowExMetadataParser
+from insitupy.campaigns.variables import SnowExProfileVariables, \
+    MeasurementDescription
 from snowexsql.db import get_table_attributes
 from snowexsql.data import SiteData
 
@@ -156,9 +158,14 @@ def _read(self, filename):
         str_cols = remap_data_names(str_cols, DataHeader.rename)
 
         dtype = {k: str for k in str_cols}
-        df = pd.read_csv(filename, header=header_pos, names=str_cols,
-                         usecols=range(n_cols), encoding='latin',
-                         parse_dates=[0], dtype=dtype)
+        df = pd.read_csv(
+            filename, header=header_pos, names=str_cols,
+            usecols=range(n_cols), encoding='latin',
+            # parse_dates=[0],
+            dtype=dtype
+        )
+        # WHY IS THIS NEEDED?
+        df["date"] = pd.to_datetime(df["date"])
 
         # Insure all values are 4 digits. Seems like some were not by accident
         df['fname_sufix'] = df['fname_sufix'].apply(lambda v: v.zfill(4))
@@ -264,6 +271,94 @@ def get_metadata(self, smp_file):
         return meta.iloc[0].to_dict()
 
 
+class ExtendedSnowExProfileVariables(SnowExProfileVariables):
+    """
+    Extend variables to add a few relevant ones
+    """
+    DEPTH = MeasurementDescription(
+        "depth", "top or center depth of measurement",
+        [
+            "depth", "top", "sample_top_height", "hs",
+            "depth_m", 'snowdepthfilter(m)', 'snowdepthfilter',
+            'height'
+        ], True
+    )
+    PERMITTIVITY = MeasurementDescription(
+        "permittivity", "Permittivity",
+        ["permittivity_a", "permittivity_b", "permittivity",
+         'dielectric_constant', 'dielectric_constant_a',
+         'dielectric_constant_b']
+    )
+    IGNORE = MeasurementDescription(
+        "ignore", "Ignore this",
+        ["original_index", 'id', 'freq_mhz', 'camera', 'avgvelocity']
+    )
+    SAMPLE_SIGNAL = MeasurementDescription(
+        'sample_signal', "Sample Signal",
+        ['sample_signal']
+    )
+    FORCE = MeasurementDescription(
+        'force', "Force", ["force"]
+    )
+    REFLECTANCE = MeasurementDescription(
+        'reflectance', "Reflectance", ['reflectance']
+    )
+    SSA = MeasurementDescription(
+        'specific_surface_area', "Specific Surface Area",
+        ['specific_surface_area']
+    )
+    DATETIME = MeasurementDescription(
+        'datetime', "Combined date and time",
+        ["Date/Local Standard Time", "date/local_standard_time", "datetime",
+         "date&time"],
+        True
+    )
+    DATE = MeasurementDescription(
+        'date', "Measurement Date (only date column)",
+        ['date_dd_mmm_yy', 'date']
+    )
+    TIME = MeasurementDescription(
+        'time', "Measurement time",
+        ['time_gmt', 'time']
+    )
+    UTCYEAR = MeasurementDescription(
+        'utcyear', "UTC Year", ['utcyear']
+    )
+    UTCDOY = MeasurementDescription(
+        'utcdoy', "UTC day of year", ['utcdoy']
+    )
+    UTCTOD = MeasurementDescription(
+        'utctod', 'UTC Time of Day', ['utctod']
+    )
+    ELEVATION = MeasurementDescription(
+        'elevation', "Elevation",
+        ['elev_m', 'elevation']
+    )
+    EQUIPMENT = MeasurementDescription(
+        'equipment', "Equipment",
+        ['equipment']
+    )
+    VERSION_NUMBER = MeasurementDescription(
+        'version_number', "Version Number",
+        ['version_number']
+    )
+    NORTHING = MeasurementDescription(
+        'northing', "UTM Northing",
+        ['northing', 'utm_wgs84_northing']
+    )
+    EASTING = MeasurementDescription(
+        'easting', "UTM Easting",
+        ['easting', 'utm_wgs84_easting']
+    )
+
+
+class ExtendedSnowExMetadataParser(SnowExMetadataParser):
+    """
+    Extend the parser to update the extended varaibles
+    """
+    VARIABLES_CLASS = ExtendedSnowExProfileVariables
+
+
 class DataHeader(object):
     """
     Class for managing information stored in files headers about a snow pit
@@ -532,7 +627,7 @@ def _read(self, filename):
                                     read_csv
        """
 
-        parser = SnowExMetadataParser(
+        parser = ExtendedSnowExMetadataParser(
             filename, timezone=self.in_timezone,
             header_sep=self.header_sep,
             allow_split_lines=self.allow_split_lines
diff --git a/snowex_db/upload.py b/snowex_db/upload.py
index 43097c0..b8315f0 100644
--- a/snowex_db/upload.py
+++ b/snowex_db/upload.py
@@ -110,7 +110,11 @@ def _read(self, profile_filename):
         depth_fmt = 'snow_height'
         is_smp = False
 
-        df = self._handle_force(df, profile_filename)
+        if 'force' in df.columns:
+            df = self._handle_force(df, profile_filename)
+            is_smp = True
+            # Make the data negative from snow surface
+            depth_fmt = 'surface_datum'
 
         if not df.empty:
             # Standardize all depth data
@@ -173,7 +177,8 @@ def build_data(self, data_name):
 
         # Assign all meta data to every entry to the data frame
         for k, v in self.hdr.info.items():
-            df[k] = v
+            if not pd.isna(v):
+                df[k] = v
 
         df['type'] = data_name
         df['date_accessed'] = self.date_accessed
diff --git a/tests/test_batch.py b/tests/test_batch.py
index 67352f9..38afaea 100644
--- a/tests/test_batch.py
+++ b/tests/test_batch.py
@@ -121,7 +121,10 @@ class TestUploadSMPBatch(TableTestBase):
     Test whether we can assign meta info from an smp log to 2 profiles
     """
     args = [['S19M1013_5S21_20200201.CSV', 'S06M0874_2N12_20200131.CSV']]
-    kwargs = {'in_timezone': 'UTC', 'smp_log_f': 'smp_log.csv', 'units': 'Newtons'}
+    kwargs = {
+        'in_timezone': 'UTC',
+        'smp_log_f': 'smp_log.csv',
+        'units': 'Newtons'}
     UploaderClass = UploadProfileBatch
     TableClass = LayerData
     attribute = 'depth'
diff --git a/tests/test_projection.py b/tests/test_projection.py
index 0512485..ea9863b 100644
--- a/tests/test_projection.py
+++ b/tests/test_projection.py
@@ -4,7 +4,7 @@
 
 import pytest
 from geoalchemy2.shape import to_shape
-from geoalchemy2.types import WKTElement
+from geoalchemy2.elements import WKTElement
 from numpy.testing import assert_almost_equal
 from rasterio.crs import CRS
 

From 620a18f0f5cb0194de78a591a21d7da62316f238 Mon Sep 17 00:00:00 2001
From: Micah Sandusky <micc5725@gmail.com>
Date: Thu, 15 Aug 2024 10:53:16 -0600
Subject: [PATCH 30/30] Repeat script here

---
 .../add_timeseries_pits_bulk_properties.py    | 70 -------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 scripts/upload/add_timeseries_pits_bulk_properties.py

diff --git a/scripts/upload/add_timeseries_pits_bulk_properties.py b/scripts/upload/add_timeseries_pits_bulk_properties.py
deleted file mode 100644
index f2c6290..0000000
--- a/scripts/upload/add_timeseries_pits_bulk_properties.py
+++ /dev/null
@@ -1,70 +0,0 @@
-"""
-Script to upload the Snowex Time Series pits
-"""
-
-import glob
-import re
-from os.path import abspath, join
-from pathlib import Path
-
-import pandas as pd
-
-from snowex_db.upload import PointDataCSV
-from snowex_db import db_session
-
-
-def main():
-    """
-    Add bulk SWE, Depth, Density for 2020 and 2021 timeseires pits
-    """
-    db_name = 'localhost/snowex'
-    debug = True
-
-    # Point to the downloaded data from
-    data_dir = abspath('../download/data/SNOWEX/')
-    error_msg = []
-
-    path_details = [
-        {
-            "DOI": "https://doi.org/10.5067/KZ43HVLZV6G4",
-            "path": "SNEX20_TS_SP.002/2019.10.24/SNEX20_TS_SP_Summary_SWE_v02.csv"
-        },
-        {
-            "DOI": "https://doi.org/10.5067/QIANJYJGRWOV",
-            "path": "SNEX21_TS_SP.001/2020.11.16/SNEX21_TS_SP_Summary_SWE_v01.csv"
-        }
-    ]
-    for info in path_details:
-        doi = info["DOI"]
-        file_path = join(data_dir, info["path"])
-        # Read csv and dump new one without the extra header lines
-        df = pd.read_csv(
-            file_path,
-            skiprows=list(range(32)) + [33]
-        )
-        new_name = file_path.replace(".csv", "_modified.csv")
-        # Filter to columns we want (density, swe, etc)
-        columns = [
-            'Location', 'Site', 'PitID', 'Date/Local Standard Time', 'UTM Zone',
-            'Easting (m)', 'Northing (m)', 'Latitude (deg)', 'Longitude (deg)',
-            'Density Mean (kg/m^3)',
-            'SWE (mm)', 'HS (cm)', 'Flag'
-        ]
-        df = df.loc[:, columns]
-        df.to_csv(new_name, index=False)
-
-        # Submit SWE file data as point data
-        with db_session(
-            db_name, credentials='credentials.json'
-        ) as (session, engine):
-            pcsv = PointDataCSV(
-                new_name, doi=doi, debug=debug,
-                depth_is_metadata=False,
-                row_based_crs=True,
-                row_based_timezone=True
-            )
-            pcsv.submit(session)
-
-
-if __name__ == '__main__':
-    main()