Skip to content

Commit

Permalink
Merge pull request #14 from SnowEx/add_more_gpr_datasets_#2
Browse files Browse the repository at this point in the history
Added CSU and UNM gpr datasets, fixed #2
  • Loading branch information
micahjohnson150 authored Jul 7, 2022
2 parents 538de45 + 82c655a commit 0578b83
Show file tree
Hide file tree
Showing 10 changed files with 202 additions and 14 deletions.
2 changes: 2 additions & 0 deletions scripts/download/nsidc_sources.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_BSU_GPR.001/
https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_GM_SP.001/
https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SMP.001/
https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_SD.001/
https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_GM_CSU_GPR.001/2020.02.06/SNEX20_GM_CSU_GPR_1GHz_v01.csv
https://n5eil01u.ecs.nsidc.org/SNOWEX/SNEX20_UNM_GPR.001/2020.01.28/SNEX20_UNM_GPR.csv
File renamed without changes.
57 changes: 57 additions & 0 deletions scripts/upload/add_csu_gpr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""
Read in the SnowEx 2020 Colorado State GPR data. Uploaded SWE, Two Way Travel, Depth, to
the database.
1. Data must be downloaded via sh ../download/download_nsidc.sh
2A. python run.py # To run all together all at once
2B. python add_gpr.py # To run individually
"""

import time
from os.path import abspath, expanduser, join

import pandas as pd

from snowexsql.db import get_db
from snowex_db.upload import *


def main():
file = '../download/data/SNOWEX/SNEX20_GM_CSU_GPR.001/2020.02.06/SNEX20_GM_CSU_GPR_1GHz_v01.csv'

kwargs = {
# Keyword argument to upload depth measurements
'depth_is_metadata': False,

# Constant Metadata for the GPR data
'site_name': 'Grand Mesa',
'observers': 'Randall Bonnell',
'instrument': 'pulse EKKO Pro multi-polarization 1 GHz GPR',
'in_timezone': 'UTC',
'out_timezone': 'UTC',
'doi': 'https://doi.org/10.5067/S5EGFLCIAB18',
'epsg': 26912
}

# Break out the path and make it an absolute path
file = abspath(expanduser(file))

# Grab a db connection to a local db named snowex
db_name = 'localhost/snowex'
engine, session = get_db(db_name, credentials='./credentials.json')

# Instantiate the point uploader
csv = PointDataCSV(file, **kwargs)
# Push it to the database
csv.submit(session)

# Close out the session with the DB
session.close()

# return the number of errors for run.py can report it
return len(csv.errors)


if __name__ == '__main__':
main()
75 changes: 75 additions & 0 deletions scripts/upload/add_unm_gpr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""
Read in the SnowEx 2020 UNM GPR. Upload SWE, Two Way Travel, Depth, to
the database.
1. Data must be downloaded via sh ../download/download_nsidc.sh
2A. python run.py # To run all together all at once
2B. python add_gpr.py # To run individually
"""

import time
from os.path import abspath, expanduser, join

import pandas as pd

from snowexsql.db import get_db
from snowex_db.upload import *


def main():
filename = '../download/data/SNOWEX/SNEX20_UNM_GPR.001/2020.01.28/SNEX20_UNM_GPR.csv'

kwargs = {
# Keyword argument to upload depth measurements
'depth_is_metadata': False,

# Constant Metadata for the GPR data
'site_name': 'Grand Mesa',
'observers': 'Ryan Webb',
'instrument': None, # See loop below
'in_timezone': 'UTC',
'out_timezone': 'UTC',
'doi': 'https://doi.org/10.5067/WE9GI1GVMQF6',
'epsg': 26912
}

# Break out the path and make it an absolute path
filename = abspath(expanduser(filename))

# Grab a db connection to a local db named snowex
db_name = 'localhost/snowex'
engine, session = get_db(db_name, credentials='./credentials.json')

# Read in for management of instruments
df_raw = pd.read_csv(filename)
low_freq = df_raw['FREQ_MHz'] == 800
hi_freq = df_raw['FREQ_MHz'] == 1600

# Instantiate the point uploader
csv = PointDataCSV(filename, **kwargs)

# Convert depth to centimeters
csv.log.info('Converting depth to centimeters...')
csv.df['depth'] = csv.df['depth'].mul(100)
df_original = csv.df.copy()

# Loop over the two insturments in the file and separate them for two submissions
for hz, ind in [(800, low_freq), (1600, hi_freq)]:
instrument = f'Mala {hz} MHz GPR'
csv.log.info(f'Isolating {instrument} data for upload.')
csv.df = df_original[ind].copy()
# Change the instrument.
csv.df['instrument'] = instrument
# Push it to the database
csv.submit(session)

# Close out the session with the DB
session.close()

# return the number of errors for run.py can report it
return len(csv.errors)


if __name__ == '__main__':
main()
24 changes: 20 additions & 4 deletions snowex_db/interpretation.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pytz

from .utilities import get_logger
from.string_management import parse_none


def is_point_data(columns):
Expand Down Expand Up @@ -201,7 +202,7 @@ def add_date_time_keys(data, in_timezone=None, out_timezone='UTC'):
else:
raise ValueError("We did not recieve a valid in_timezone")

# Look for a single header entry for date and time.
# Look for a single header entry containing date and time.
for k in data.keys():
kl = k.lower()
if 'date' in kl and 'time' in kl:
Expand All @@ -211,9 +212,18 @@ def add_date_time_keys(data, in_timezone=None, out_timezone='UTC'):

# If we didn't find date/time combined.
if d is None:
# Handle SMP data dates and times
# Handle data dates and times
if 'date' in keys and 'time' in keys:
dstr = ' '.join([str(data['date']), str(data['time'])])
# Assume MMDDYY format
if len(data['date']) == 6:
dt = data['date']
# Put into YY-MM-DD
data['date'] = f'20{dt[-2:]}-{dt[0:2]}-{dt[2:4]}'
# Allow for nan time
data['time'] = parse_none(data['time'])

dstr = ' '.join([str(data[k]) for k in ['date', 'time']
if data[k] is not None])
d = pd.to_datetime(dstr)

elif 'date' in keys:
Expand Down Expand Up @@ -260,7 +270,13 @@ def add_date_time_keys(data, in_timezone=None, out_timezone='UTC'):
d.replace(tzinfo=out_tz)

data['date'] = d.date()
data['time'] = d.timetz()

# Dont add time to a time that was nan or none
if 'time' not in data.keys():
data['time'] = d.timetz()
else:
if data['time'] is not None:
data['time'] = d.timetz()

return data

Expand Down
20 changes: 13 additions & 7 deletions snowex_db/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,13 +321,19 @@ class DataHeader(object):
'long': 'longitude',
'lon': 'longitude',
'twt': 'two_way_travel',
'twt_ns': 'two_way_travel',
'utmzone': 'utm_zone',
'measurement_tool': 'instrument',
'avgdensity': 'density',
'avg_density': 'density',
'dielectric_constant': 'permittivity',
'flag': 'flags',
'hs': 'depth'
'hs': 'depth',
'swe_mm': 'swe',
'depth_m': 'depth',
'date_dd_mmm_yy': 'date',
'time_gmt': 'time',
'elev_m': 'elevation'
}

# Known possible profile types anything not in here will throw an error
Expand All @@ -339,12 +345,12 @@ class DataHeader(object):

# Defaults to keywords arguments
defaults = {
'in_timezone': None,
'out_timezone': 'UTC',
'epsg': None,
'header_sep': ',',
'northern_hemisphere': True,
'depth_is_metadata': True}
'in_timezone': None,
'out_timezone': 'UTC',
'epsg': None,
'header_sep': ',',
'northern_hemisphere': True,
'depth_is_metadata': True}

def __init__(self, filename, **kwargs):
"""
Expand Down
6 changes: 3 additions & 3 deletions snowex_db/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def _read(self, profile_filename):
df['depth'] = new_depth

delta = abs(df['depth'].max() - df['depth'].min())
self.log.info('File contains {} profiles each with {} layers across '
self.log.info('File contains {} profiles each with {:,} layers across '
'{:0.2f} cm'.format(len(self.hdr.data_names), len(df), delta))
return df

Expand Down Expand Up @@ -354,7 +354,7 @@ def submit(self, session):
for pt in self.hdr.data_names:
objects = []
df = self.build_data(pt)
self.log.info('Submitting {} points of {} to the database...'.format(
self.log.info('Submitting {:,} points of {} to the database...'.format(
len(df.index), pt))

for i, row in df.iterrows():
Expand Down Expand Up @@ -543,7 +543,7 @@ def submit(self, session):
if len(tiles) > 1:
# -1 because the first element is not a
self.log.info(
'Raster is split into {} tiles for uploading...'.format(
'Raster is split into {:,} tiles for uploading...'.format(
len(tiles)))

# Allow for tiling, the first split is always psql statement we don't
Expand Down
7 changes: 7 additions & 0 deletions tests/data/unm_gpr.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
DATE_dd_mmm_yy,TIME_GMT,FREQ_MHz,LONG,LAT,ELEV_m,NORTHING,EASTING,UTMzone,TWT_ns,DEPTH_m,SWE_mm
28-Jan-20,16:43,800,-108.1340183,39.0296597,3157.1,4323978.711,748088.3947,12,8.97,1.09,299
29-Jan-20,20:35,800,-108.139506,39.014194,3106.94,4322247.041,747667.2868,12,7.34,0.9,244
30-Jan-20,22:20,800,-108.1622217,39.03095167,3110.8,4324045.489,745642.1231,12,9.55,1.17,318
31-Jan-20,20:04,800,-108.1762593,39.02676284,3090.19,4323542.671,744441.2436,12,8.73,1.07,291
4-Feb-20,16:00,1600,-108.1391578,39.03117433,3141.17,4324132.814,747638.1351,12,7.27,0.89,242
5-Feb-20,19:04,1600,-108.1715773,39.02401467,3103.3,4323250.231,744856.1064,12,6.15,0.75,205
5 changes: 5 additions & 0 deletions tests/test_interpretation.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ def test_cardinal_to_degrees_value_error():
time(hour=8, tzinfo=pytz.utc)),
({'date/local standard time': '2019-12-20T13:00'}, 'US/Pacific', date(2019, 12, 20),
time(hour=21, minute=0, tzinfo=pytz.utc)),
# Test CSU GPR time
({'date': '020620', 'time': 'nan'}, 'US/Mountain', date(2020, 2, 6), None),
# Test Mala GPR time
({'date': '28-Jan-20', 'time': '16:07'}, 'UTC', date(2020, 1, 28), time(16,7,tzinfo=pytz.utc)),
])
def test_add_date_time_keys(data, in_tz, expected_date, expected_time):
"""
Expand Down
20 changes: 20 additions & 0 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,26 @@ def setup_class(self):
super().setup_class(self)


class TestUNMGPRHeader(DataHeaderTestBase):
"""
Test the header information can be interpreted correctly in the UNM GPR data
"""
depth_is_metadata = False

def setup_class(self):
self.file = 'unm_gpr.csv'
self.data_names = ['depth', 'swe', 'two_way_travel']
self.columns = ['date', 'time', 'utm_zone', 'easting', 'latitude','longitude',
'northing', 'elevation', 'freq_mhz'] + self.data_names

self.multi_sample_profiles = []

# no header in the GPR file
self.info = {}

super().setup_class(self)


class TestSMPHeader(DataHeaderTestBase):
"""
Test interpreting an SMP header without a SMP Log file
Expand Down

0 comments on commit 0578b83

Please sign in to comment.