diff --git a/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/README b/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/README new file mode 100644 index 000000000..4bf2f79b2 --- /dev/null +++ b/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/README @@ -0,0 +1,8 @@ +Generate river inputs for St Lawrence river: +flux.th + - download_flow.sh (get river discharge data) + - python gen_fluxth.py + +TEM_1.th + - python get_realtime_temp_crawl.py (download water temperature data from CCG. In the winter, there may be no data because of water is frozen) + - python gen_TEM_1.py diff --git a/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/download_flow.sh b/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/download_flow.sh new file mode 100755 index 000000000..871baf91f --- /dev/null +++ b/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/download_flow.sh @@ -0,0 +1,4 @@ +#!/bin/csh +set idate=`/bin/date --date='0 days ago' +%Y-%m-%d` +echo $idate +wget -O St_Lawrence_river_discharge_${idate}.csv https://dd.meteo.gc.ca/hydrometric/csv/QC/hourly/QC_02OA016_hourly_hydrometric.csv diff --git a/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/gen_TEM_1.py b/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/gen_TEM_1.py new file mode 100644 index 000000000..65fd36736 --- /dev/null +++ b/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/gen_TEM_1.py @@ -0,0 +1,90 @@ +import argparse +from datetime import datetime, timedelta +import glob +import os + +import numpy as np +import pandas as pd + +def get_river_obs(df, datevectors, datevectors2): + #df = pd.read_csv(fname, sep=',', na_values='') + df.drop(df.columns[[1, 3, 4, 5]],axis=1, inplace=True) + #df.rename(columns={df.columns[0]: 'date_local', df.columns[1]: 'flow'}, inplace=True) + ts = pd.to_datetime(pd.Series(df['date'].values)) + ts2 = ts.dt.tz_localize('US/Eastern', ambiguous='infer', nonexistent='shift_forward') + ts3 = ts2.dt.tz_convert('UTC') + df.insert(1, 'date_utc', ts3) + #df.drop('date', inplace=True) + df.set_index('date_utc', inplace=True) + df_daily = df.resample('D', closed='left').mean() + #check missing values + if df_daily['t'].isnull().sum() > 0: + df_daily['t'] = df_daily['t'].interpolate() + + data = [] + for i, dt in enumerate(datevectors): + data.append(float(df_daily.loc[dt].values[0])) + + for dt in datevectors2[i+1:]: + data.append(data[-1]) + data.append(data[-1]) + return data + +if __name__ == '__main__': + ''' + python gen_TEM_1_rescue.py startdate enddate + + Assume you have all QC_waterT_*.csv files in the current folder. + The "enddate" should be the second date in the latest filename. For example: + the latest filename is "QC_waterT_2023-07-19_2023-07-26.csv", then enddate should be 2023-07-26. The + script will extend 5 days to the further from enddate. + python gen_TEM_1_rescue.py 2023-07-01 2023-07-26 + ''' + + #input paramters + argparser = argparse.ArgumentParser() + argparser.add_argument('startdate', type=datetime.fromisoformat, help='input startdate') + argparser.add_argument('enddate', type=datetime.fromisoformat, help='input enddate') + args=argparser.parse_args() + startDT=args.startdate + endDT=args.enddate + + datevectors = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT.strftime('%Y-%m-%d')) + #datevector2 - real time in TEM_1.th + endDT2 = endDT + timedelta(days=5) + datevectors2 = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT2.strftime('%Y-%m-%d')) + + #combine csv files + files = glob.glob('QC_waterT_*.csv') + files.sort() + + #check date + date0 = files[0].split('_')[2] + date1 = files[-1].split('_')[3].split('.')[0] + if startDT < datetime.strptime(date0, '%Y-%m-%d'): + raise ValueError(f'startdate {startDT} is ahead of date {date0} in available files!') + if endDT > datetime.strptime(date1, '%Y-%m-%d'): + raise ValueError(f'enddate {endDT} exceeds date {date1} in available files!') + + df = pd.concat(map(pd.read_csv, files), ignore_index=True) + df.drop_duplicates(subset='date', keep='last', inplace=True, ignore_index=True) + + temp = {} + #get st lawrence river + temp['SL'] = get_river_obs(df, datevectors, datevectors2) + + rivers = ['SL'] + #write file + data = [] + for i, date in enumerate(datevectors2): + line = [] + dt = (date - datevectors[0]).total_seconds() + print(f'time = {dt}') + line.append(dt) + for riv in rivers: + line.append(temp[riv][i]) + + data.append(line) + + newset = np.array(data) + np.savetxt('TEM_1.th', newset, fmt='%.2f') diff --git a/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/gen_fluxth.py b/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/gen_fluxth.py new file mode 100644 index 000000000..5f23b0830 --- /dev/null +++ b/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/gen_fluxth.py @@ -0,0 +1,93 @@ +import argparse +from datetime import datetime, timedelta +import os +import glob + +import numpy as np +import pandas as pd + +def get_river_obs(df, datevectors, datevectors2): + #df = pd.read_csv(fname, sep=',', na_values='') + df.drop(df.columns[[0, 2, 3, 4, 5, 7, 8, 9]],axis=1, inplace=True) + df.rename(columns={df.columns[0]: 'date_local', df.columns[1]: 'flow'}, inplace=True) + ts = pd.to_datetime(pd.Series(df['date_local'].values)) + #ts2 = ts.dt.tz_localize('US/Pacific', ambiguous='infer', nonexistent='shift_forward') + ts3 = ts.dt.tz_convert('UTC') + df.insert(1, 'date_utc', ts3) + #df.drop('date_local', inplace=True) + df.set_index('date_utc', inplace=True) + + df_daily = df.resample('D', closed='left').mean() + #check missing values + if df_daily['flow'].isnull().sum() > 0: + df_daily['flow'] = df_daily['flow'].interpolate() + + data = [] + for i, dt in enumerate(datevectors): + data.append(round(float(df_daily.loc[dt]['flow']), 3)) + + for dt in datevectors2[i+1:]: + data.append(data[-1]) + return data + +if __name__ == '__main__': + + ''' + python gen_fluxth.py startdate enddate + + Assume you have all St_Lawrence_river_discharge_*.csv files in the current folder. + The "enddate" should be the date in the latest filename. For example: + the latest filename is "St_Lawrence_river_discharge_2023-07-26.csv", + then enddate should be 2023-07-26. The + script will extend 5 days to the further from enddate. + python gen_fluxth.py 2023-07-01 2023-07-26 + ''' + #input paramters + argparser = argparse.ArgumentParser() + argparser.add_argument('startdate', type=datetime.fromisoformat, help='input startdate') + argparser.add_argument('enddate', type=datetime.fromisoformat, help='input enddate') + args=argparser.parse_args() + startDT=args.startdate + endDT=args.enddate + + datevectors = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT.strftime('%Y-%m-%d')) + #datevector2 - real time in TEM_1.th + endDT2 = endDT + timedelta(days=5) + datevectors2 = pd.date_range(start=startDT.strftime('%Y-%m-%d'), end=endDT2.strftime('%Y-%m-%d')) + + #combine csv files + files = glob.glob('St_Lawrence_river_discharge_*.csv') + files.sort() + + #check date + date0 = files[0].split('_')[-1].split('.')[0] + date1 = datetime.strptime(date0, '%Y-%m-%d') - timedelta(days=3) + date2 = files[-1].split('_')[-1].split('.')[0] + if startDT < date1: + raise ValueError(f'startdate {startDT} is ahead of date {date1} in available files!') + if endDT > datetime.strptime(date2, '%Y-%m-%d'): + raise ValueError(f'enddate {endDT} exceeds date {date2} in available files!') + + df = pd.concat(map(pd.read_csv, files), ignore_index=True) + df.drop_duplicates(subset='Date', keep='last', inplace=True, ignore_index=True) + + flow = {} + #get st Lawrence river + flow['SL'] = get_river_obs(df, datevectors, datevectors2) + + + rivers = ['SL'] + #write file + data = [] + for i, date in enumerate(datevectors2): + line = [] + dt = (date - datevectors[0]).total_seconds() + print(f'time = {dt}') + line.append(dt) + for riv in rivers: + line.append(-flow[riv][i]) + + data.append(line) + + newset = np.array(data) + np.savetxt('flux.th', newset, fmt='%.3f') diff --git a/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/get_realtime_temp_crawl.py b/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/get_realtime_temp_crawl.py new file mode 100644 index 000000000..3588a3f69 --- /dev/null +++ b/src/Utility/Pre-Processing/STOFS-3D-Atl-shadow-VIMS/Pre_processing/River/get_realtime_temp_crawl.py @@ -0,0 +1,31 @@ +import json +import requests +from bs4 import BeautifulSoup +import numpy as np +import pandas as pd + +data = [] + +url = 'https://e-navigation.canada.ca/topics/water-levels/central/temperatures-en?type=recent&location=qc' +resp = requests.get(url) +soup = BeautifulSoup(resp.text, 'html.parser') +scripts=soup.find_all("script") +table = scripts[9].contents[0].split('\r\n')[2].split("= [")[1].split("];")[0] + +dicts = table.split('},') +total = len(dicts) +if total <= 1: print('No data available!') +while (total > 1): + for i, elem in enumerate(dicts): + if i+1 != total: + data.append(json.loads(elem+'}')) + else: + data.append(json.loads(elem)) + + df = pd.DataFrame(data) + df.set_index('date', inplace=True) + start_date = df.index[0].split(' ')[0] + print(start_date) + end_date = df.index[-1].split(' ')[0] + print(end_date) + df.to_csv(f'QC_waterT_{start_date}_{end_date}.csv')