Skip to content

Commit

Permalink
STOFS3D-Atl-shadow: added scripts to generate river inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
cuill committed Dec 20, 2023
1 parent a542f23 commit 835bed9
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Generate river inputs for St Lawrence river:
flux.th
- download_flow.sh (get river discharge data)
- python gen_fluxth.py

TEM_1.th
- python get_realtime_temp_crawl.py (download water temperature data from CCG. In the winter, there may be no data because of water is frozen)
- python gen_TEM_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/csh
set idate=`/bin/date --date='0 days ago' +%Y-%m-%d`
echo $idate
wget -O St_Lawrence_river_discharge_${idate}.csv https://dd.meteo.gc.ca/hydrometric/csv/QC/hourly/QC_02OA016_hourly_hydrometric.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import argparse
from datetime import datetime, timedelta
import glob
import os

import numpy as np
import pandas as pd

def get_river_obs(df, datevectors, datevectors2):
#df = pd.read_csv(fname, sep=',', na_values='')
df.drop(df.columns[[1, 3, 4, 5]],axis=1, inplace=True)
#df.rename(columns={df.columns[0]: 'date_local', df.columns[1]: 'flow'}, inplace=True)
ts = pd.to_datetime(pd.Series(df['date'].values))
ts2 = ts.dt.tz_localize('US/Eastern', ambiguous='infer', nonexistent='shift_forward')
ts3 = ts2.dt.tz_convert('UTC')
df.insert(1, 'date_utc', ts3)
#df.drop('date', inplace=True)
df.set_index('date_utc', inplace=True)
df_daily = df.resample('D', closed='left').mean()
#check missing values
if df_daily['t'].isnull().sum() > 0:
df_daily['t'] = df_daily['t'].interpolate()

data = []
for i, dt in enumerate(datevectors):
data.append(float(df_daily.loc[dt].values[0]))

for dt in datevectors2[i+1:]:
data.append(data[-1])
data.append(data[-1])
return data

if __name__ == '__main__':
'''
python gen_TEM_1_rescue.py startdate enddate
Assume you have all QC_waterT_*.csv files in the current folder.
The "enddate" should be the second date in the latest filename. For example:
the latest filename is "QC_waterT_2023-07-19_2023-07-26.csv", then enddate should be 2023-07-26. The
script will extend 5 days to the further from enddate.
python gen_TEM_1_rescue.py 2023-07-01 2023-07-26
'''

#input paramters
argparser = argparse.ArgumentParser()
argparser.add_argument('startdate', type=datetime.fromisoformat, help='input startdate')
argparser.add_argument('enddate', type=datetime.fromisoformat, help='input enddate')
args=argparser.parse_args()
startDT=args.startdate
endDT=args.enddate

datevectors = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT.strftime('%Y-%m-%d'))
#datevector2 - real time in TEM_1.th
endDT2 = endDT + timedelta(days=5)
datevectors2 = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT2.strftime('%Y-%m-%d'))

#combine csv files
files = glob.glob('QC_waterT_*.csv')
files.sort()

#check date
date0 = files[0].split('_')[2]
date1 = files[-1].split('_')[3].split('.')[0]
if startDT < datetime.strptime(date0, '%Y-%m-%d'):
raise ValueError(f'startdate {startDT} is ahead of date {date0} in available files!')
if endDT > datetime.strptime(date1, '%Y-%m-%d'):
raise ValueError(f'enddate {endDT} exceeds date {date1} in available files!')

df = pd.concat(map(pd.read_csv, files), ignore_index=True)
df.drop_duplicates(subset='date', keep='last', inplace=True, ignore_index=True)

temp = {}
#get st lawrence river
temp['SL'] = get_river_obs(df, datevectors, datevectors2)

rivers = ['SL']
#write file
data = []
for i, date in enumerate(datevectors2):
line = []
dt = (date - datevectors[0]).total_seconds()
print(f'time = {dt}')
line.append(dt)
for riv in rivers:
line.append(temp[riv][i])

data.append(line)

newset = np.array(data)
np.savetxt('TEM_1.th', newset, fmt='%.2f')
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import argparse
from datetime import datetime, timedelta
import os
import glob

import numpy as np
import pandas as pd

def get_river_obs(df, datevectors, datevectors2):
#df = pd.read_csv(fname, sep=',', na_values='')
df.drop(df.columns[[0, 2, 3, 4, 5, 7, 8, 9]],axis=1, inplace=True)
df.rename(columns={df.columns[0]: 'date_local', df.columns[1]: 'flow'}, inplace=True)
ts = pd.to_datetime(pd.Series(df['date_local'].values))
#ts2 = ts.dt.tz_localize('US/Pacific', ambiguous='infer', nonexistent='shift_forward')
ts3 = ts.dt.tz_convert('UTC')
df.insert(1, 'date_utc', ts3)
#df.drop('date_local', inplace=True)
df.set_index('date_utc', inplace=True)

df_daily = df.resample('D', closed='left').mean()
#check missing values
if df_daily['flow'].isnull().sum() > 0:
df_daily['flow'] = df_daily['flow'].interpolate()

data = []
for i, dt in enumerate(datevectors):
data.append(round(float(df_daily.loc[dt]['flow']), 3))

for dt in datevectors2[i+1:]:
data.append(data[-1])
return data

if __name__ == '__main__':

'''
python gen_fluxth.py startdate enddate
Assume you have all St_Lawrence_river_discharge_*.csv files in the current folder.
The "enddate" should be the date in the latest filename. For example:
the latest filename is "St_Lawrence_river_discharge_2023-07-26.csv",
then enddate should be 2023-07-26. The
script will extend 5 days to the further from enddate.
python gen_fluxth.py 2023-07-01 2023-07-26
'''
#input paramters
argparser = argparse.ArgumentParser()
argparser.add_argument('startdate', type=datetime.fromisoformat, help='input startdate')
argparser.add_argument('enddate', type=datetime.fromisoformat, help='input enddate')
args=argparser.parse_args()
startDT=args.startdate
endDT=args.enddate

datevectors = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT.strftime('%Y-%m-%d'))
#datevector2 - real time in TEM_1.th
endDT2 = endDT + timedelta(days=5)
datevectors2 = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT2.strftime('%Y-%m-%d'))

#combine csv files
files = glob.glob('St_Lawrence_river_discharge_*.csv')
files.sort()

#check date
date0 = files[0].split('_')[-1].split('.')[0]
date1 = datetime.strptime(date0, '%Y-%m-%d') - timedelta(days=3)
date2 = files[-1].split('_')[-1].split('.')[0]
if startDT < date1:
raise ValueError(f'startdate {startDT} is ahead of date {date1} in available files!')
if endDT > datetime.strptime(date2, '%Y-%m-%d'):
raise ValueError(f'enddate {endDT} exceeds date {date2} in available files!')

df = pd.concat(map(pd.read_csv, files), ignore_index=True)
df.drop_duplicates(subset='Date', keep='last', inplace=True, ignore_index=True)

flow = {}
#get st Lawrence river
flow['SL'] = get_river_obs(df, datevectors, datevectors2)


rivers = ['SL']
#write file
data = []
for i, date in enumerate(datevectors2):
line = []
dt = (date - datevectors[0]).total_seconds()
print(f'time = {dt}')
line.append(dt)
for riv in rivers:
line.append(-flow[riv][i])

data.append(line)

newset = np.array(data)
np.savetxt('flux.th', newset, fmt='%.3f')
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import json
import requests
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd

data = []

url = 'https://e-navigation.canada.ca/topics/water-levels/central/temperatures-en?type=recent&location=qc'
resp = requests.get(url)
soup = BeautifulSoup(resp.text, 'html.parser')
scripts=soup.find_all("script")
table = scripts[9].contents[0].split('\r\n')[2].split("= [")[1].split("];")[0]

dicts = table.split('},')
total = len(dicts)
if total <= 1: print('No data available!')
while (total > 1):
for i, elem in enumerate(dicts):
if i+1 != total:
data.append(json.loads(elem+'}'))
else:
data.append(json.loads(elem))

df = pd.DataFrame(data)
df.set_index('date', inplace=True)
start_date = df.index[0].split(' ')[0]
print(start_date)
end_date = df.index[-1].split(' ')[0]
print(end_date)
df.to_csv(f'QC_waterT_{start_date}_{end_date}.csv')

0 comments on commit 835bed9

Please sign in to comment.