-
Notifications
You must be signed in to change notification settings - Fork 91
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
STOFS3D-Atl-shadow: added scripts to generate river inputs
- Loading branch information
Showing
5 changed files
with
226 additions
and
0 deletions.
There are no files selected for viewing
8 changes: 8 additions & 0 deletions
8
src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/README
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
Generate river inputs for St Lawrence river: | ||
flux.th | ||
- download_flow.sh (get river discharge data) | ||
- python gen_fluxth.py | ||
|
||
TEM_1.th | ||
- python get_realtime_temp_crawl.py (download water temperature data from CCG. In the winter, there may be no data because of water is frozen) | ||
- python gen_TEM_1.py |
4 changes: 4 additions & 0 deletions
4
src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/download_flow.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/csh | ||
set idate=`/bin/date --date='0 days ago' +%Y-%m-%d` | ||
echo $idate | ||
wget -O St_Lawrence_river_discharge_${idate}.csv https://dd.meteo.gc.ca/hydrometric/csv/QC/hourly/QC_02OA016_hourly_hydrometric.csv |
90 changes: 90 additions & 0 deletions
90
src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/gen_TEM_1.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import argparse | ||
from datetime import datetime, timedelta | ||
import glob | ||
import os | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
def get_river_obs(df, datevectors, datevectors2): | ||
#df = pd.read_csv(fname, sep=',', na_values='') | ||
df.drop(df.columns[[1, 3, 4, 5]],axis=1, inplace=True) | ||
#df.rename(columns={df.columns[0]: 'date_local', df.columns[1]: 'flow'}, inplace=True) | ||
ts = pd.to_datetime(pd.Series(df['date'].values)) | ||
ts2 = ts.dt.tz_localize('US/Eastern', ambiguous='infer', nonexistent='shift_forward') | ||
ts3 = ts2.dt.tz_convert('UTC') | ||
df.insert(1, 'date_utc', ts3) | ||
#df.drop('date', inplace=True) | ||
df.set_index('date_utc', inplace=True) | ||
df_daily = df.resample('D', closed='left').mean() | ||
#check missing values | ||
if df_daily['t'].isnull().sum() > 0: | ||
df_daily['t'] = df_daily['t'].interpolate() | ||
|
||
data = [] | ||
for i, dt in enumerate(datevectors): | ||
data.append(float(df_daily.loc[dt].values[0])) | ||
|
||
for dt in datevectors2[i+1:]: | ||
data.append(data[-1]) | ||
data.append(data[-1]) | ||
return data | ||
|
||
if __name__ == '__main__': | ||
''' | ||
python gen_TEM_1_rescue.py startdate enddate | ||
Assume you have all QC_waterT_*.csv files in the current folder. | ||
The "enddate" should be the second date in the latest filename. For example: | ||
the latest filename is "QC_waterT_2023-07-19_2023-07-26.csv", then enddate should be 2023-07-26. The | ||
script will extend 5 days to the further from enddate. | ||
python gen_TEM_1_rescue.py 2023-07-01 2023-07-26 | ||
''' | ||
|
||
#input paramters | ||
argparser = argparse.ArgumentParser() | ||
argparser.add_argument('startdate', type=datetime.fromisoformat, help='input startdate') | ||
argparser.add_argument('enddate', type=datetime.fromisoformat, help='input enddate') | ||
args=argparser.parse_args() | ||
startDT=args.startdate | ||
endDT=args.enddate | ||
|
||
datevectors = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT.strftime('%Y-%m-%d')) | ||
#datevector2 - real time in TEM_1.th | ||
endDT2 = endDT + timedelta(days=5) | ||
datevectors2 = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT2.strftime('%Y-%m-%d')) | ||
|
||
#combine csv files | ||
files = glob.glob('QC_waterT_*.csv') | ||
files.sort() | ||
|
||
#check date | ||
date0 = files[0].split('_')[2] | ||
date1 = files[-1].split('_')[3].split('.')[0] | ||
if startDT < datetime.strptime(date0, '%Y-%m-%d'): | ||
raise ValueError(f'startdate {startDT} is ahead of date {date0} in available files!') | ||
if endDT > datetime.strptime(date1, '%Y-%m-%d'): | ||
raise ValueError(f'enddate {endDT} exceeds date {date1} in available files!') | ||
|
||
df = pd.concat(map(pd.read_csv, files), ignore_index=True) | ||
df.drop_duplicates(subset='date', keep='last', inplace=True, ignore_index=True) | ||
|
||
temp = {} | ||
#get st lawrence river | ||
temp['SL'] = get_river_obs(df, datevectors, datevectors2) | ||
|
||
rivers = ['SL'] | ||
#write file | ||
data = [] | ||
for i, date in enumerate(datevectors2): | ||
line = [] | ||
dt = (date - datevectors[0]).total_seconds() | ||
print(f'time = {dt}') | ||
line.append(dt) | ||
for riv in rivers: | ||
line.append(temp[riv][i]) | ||
|
||
data.append(line) | ||
|
||
newset = np.array(data) | ||
np.savetxt('TEM_1.th', newset, fmt='%.2f') |
93 changes: 93 additions & 0 deletions
93
src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/gen_fluxth.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import argparse | ||
from datetime import datetime, timedelta | ||
import os | ||
import glob | ||
|
||
import numpy as np | ||
import pandas as pd | ||
|
||
def get_river_obs(df, datevectors, datevectors2): | ||
#df = pd.read_csv(fname, sep=',', na_values='') | ||
df.drop(df.columns[[0, 2, 3, 4, 5, 7, 8, 9]],axis=1, inplace=True) | ||
df.rename(columns={df.columns[0]: 'date_local', df.columns[1]: 'flow'}, inplace=True) | ||
ts = pd.to_datetime(pd.Series(df['date_local'].values)) | ||
#ts2 = ts.dt.tz_localize('US/Pacific', ambiguous='infer', nonexistent='shift_forward') | ||
ts3 = ts.dt.tz_convert('UTC') | ||
df.insert(1, 'date_utc', ts3) | ||
#df.drop('date_local', inplace=True) | ||
df.set_index('date_utc', inplace=True) | ||
|
||
df_daily = df.resample('D', closed='left').mean() | ||
#check missing values | ||
if df_daily['flow'].isnull().sum() > 0: | ||
df_daily['flow'] = df_daily['flow'].interpolate() | ||
|
||
data = [] | ||
for i, dt in enumerate(datevectors): | ||
data.append(round(float(df_daily.loc[dt]['flow']), 3)) | ||
|
||
for dt in datevectors2[i+1:]: | ||
data.append(data[-1]) | ||
return data | ||
|
||
if __name__ == '__main__': | ||
|
||
''' | ||
python gen_fluxth.py startdate enddate | ||
Assume you have all St_Lawrence_river_discharge_*.csv files in the current folder. | ||
The "enddate" should be the date in the latest filename. For example: | ||
the latest filename is "St_Lawrence_river_discharge_2023-07-26.csv", | ||
then enddate should be 2023-07-26. The | ||
script will extend 5 days to the further from enddate. | ||
python gen_fluxth.py 2023-07-01 2023-07-26 | ||
''' | ||
#input paramters | ||
argparser = argparse.ArgumentParser() | ||
argparser.add_argument('startdate', type=datetime.fromisoformat, help='input startdate') | ||
argparser.add_argument('enddate', type=datetime.fromisoformat, help='input enddate') | ||
args=argparser.parse_args() | ||
startDT=args.startdate | ||
endDT=args.enddate | ||
|
||
datevectors = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT.strftime('%Y-%m-%d')) | ||
#datevector2 - real time in TEM_1.th | ||
endDT2 = endDT + timedelta(days=5) | ||
datevectors2 = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT2.strftime('%Y-%m-%d')) | ||
|
||
#combine csv files | ||
files = glob.glob('St_Lawrence_river_discharge_*.csv') | ||
files.sort() | ||
|
||
#check date | ||
date0 = files[0].split('_')[-1].split('.')[0] | ||
date1 = datetime.strptime(date0, '%Y-%m-%d') - timedelta(days=3) | ||
date2 = files[-1].split('_')[-1].split('.')[0] | ||
if startDT < date1: | ||
raise ValueError(f'startdate {startDT} is ahead of date {date1} in available files!') | ||
if endDT > datetime.strptime(date2, '%Y-%m-%d'): | ||
raise ValueError(f'enddate {endDT} exceeds date {date2} in available files!') | ||
|
||
df = pd.concat(map(pd.read_csv, files), ignore_index=True) | ||
df.drop_duplicates(subset='Date', keep='last', inplace=True, ignore_index=True) | ||
|
||
flow = {} | ||
#get st Lawrence river | ||
flow['SL'] = get_river_obs(df, datevectors, datevectors2) | ||
|
||
|
||
rivers = ['SL'] | ||
#write file | ||
data = [] | ||
for i, date in enumerate(datevectors2): | ||
line = [] | ||
dt = (date - datevectors[0]).total_seconds() | ||
print(f'time = {dt}') | ||
line.append(dt) | ||
for riv in rivers: | ||
line.append(-flow[riv][i]) | ||
|
||
data.append(line) | ||
|
||
newset = np.array(data) | ||
np.savetxt('flux.th', newset, fmt='%.3f') |
31 changes: 31 additions & 0 deletions
31
...y/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/get_realtime_temp_crawl.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import json | ||
import requests | ||
from bs4 import BeautifulSoup | ||
import numpy as np | ||
import pandas as pd | ||
|
||
data = [] | ||
|
||
url = 'https://e-navigation.canada.ca/topics/water-levels/central/temperatures-en?type=recent&location=qc' | ||
resp = requests.get(url) | ||
soup = BeautifulSoup(resp.text, 'html.parser') | ||
scripts=soup.find_all("script") | ||
table = scripts[9].contents[0].split('\r\n')[2].split("= [")[1].split("];")[0] | ||
|
||
dicts = table.split('},') | ||
total = len(dicts) | ||
if total <= 1: print('No data available!') | ||
while (total > 1): | ||
for i, elem in enumerate(dicts): | ||
if i+1 != total: | ||
data.append(json.loads(elem+'}')) | ||
else: | ||
data.append(json.loads(elem)) | ||
|
||
df = pd.DataFrame(data) | ||
df.set_index('date', inplace=True) | ||
start_date = df.index[0].split(' ')[0] | ||
print(start_date) | ||
end_date = df.index[-1].split(' ')[0] | ||
print(end_date) | ||
df.to_csv(f'QC_waterT_{start_date}_{end_date}.csv') |