Skip to content

Commit

Permalink
fix PL price
Browse files Browse the repository at this point in the history
- Omit polish price data reported in EUR (keeping PLN prices)
- Turn off interpolation for price data
- Fix CE(S)T timestamp column in Excel
  • Loading branch information
jgmill committed Jul 1, 2018
1 parent 3618095 commit e72c468
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 12 deletions.
10 changes: 5 additions & 5 deletions checksums.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
time_series.sqlite,4f5f7d2432cb4c48d29b062ce322b4517c4e82aa6841eb275070017a5cd46158
time_series.xlsx,3f03c3387da92cef1948154c79d347f52b9acb4726bf4b6b9f164ebf9ee8682a
time_series.sqlite,82418ddf346af9525101edf78224d9fbb2c9f25fc2d07801e8b707c1ab915b90
time_series.xlsx,b8d95fc579ae3cf60d00b5f5cffac7e98b3232b79891af7cee199306e0409b5f
time_series_15min_multiindex.csv,898cff6ff1276097e45a44d4beeb53066297a476af326d25748b9209b56f4ebe
time_series_15min_singleindex.csv,7bc97b7053f7895aea2a9b3901cfa149eeea4232e4461b6aef870c90be38f151
time_series_15min_stacked.csv,3b2510aa6cdb3897313003bc743b2be6b8cbf8cdd5b647d242a0cc6feeb3f93d
time_series_30min_multiindex.csv,1d652f39f755825aa340bbde09f783346bdc53221b5e67bed58474ba97fed412
time_series_30min_singleindex.csv,1d99520014a6ac8428056fd319d88c4489447cec5d6604f42358a9ea76469d79
time_series_30min_stacked.csv,21acd2c7c75da1657ea7821bad3a5b9f6c886642df6a1c27cc7fc72d69fbfb19
time_series_60min_multiindex.csv,f7467de31d1c0f710bba51f4647c1f644eb357f6a414f7e1b6d30b049d1257ef
time_series_60min_singleindex.csv,20dd12e4073eac0dd8150c34b8d6a0f38d77ec12abad6e59fa8c261837db0fd5
time_series_60min_stacked.csv,87ceb85ae256d68d8bd93b6b44aeebcce4e31c084e34966d348476db320c43bd
time_series_60min_multiindex.csv,5bfe62ffa0098629260637e8af795dabdbf8f2a9261350068e36642c39b3e3a4
time_series_60min_singleindex.csv,6f2fb8b24614f45377518fb6528dc1925aff456ba5705b79b1d4a4fb698867e7
time_series_60min_stacked.csv,0619a1fc2b9ee104f2609a9ea36361d8e5841d2d64e401ec46bf585362766168
10 changes: 7 additions & 3 deletions processing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1722,7 +1722,7 @@
}
},
"source": [
"Writing the full tables to Excel takes extremely long. As a workaround, only the first 5 rows are exported. The rest of the data can than be inserted manually from the `_multindex.csv` files."
"Writing the full tables to Excel takes extremely long. As a workaround, only the timestamp-columns are exported. The rest of the data can than be inserted manually from the `_multindex.csv` files."
]
},
{
Expand All @@ -1739,8 +1739,12 @@
"os.chdir(out_path)\n",
"writer = pd.ExcelWriter('time_series1.xlsx')\n",
"for res_key, df in data_sets_multiindex.items():\n",
" df.iloc[:, 0].to_excel(writer, res_key.split('_')[0], float_format='%.2f',\n",
" merge_cells=True)\n",
" # Need to convert CE(S)T-timestamps to tz-naive, otherwise Excel converts\n",
" # them back to UTC\n",
" excel_timestamps = df.loc[:,(info_cols['cet'], '', '', '', '', '')]\n",
" excel_timestamps = excel_timestamps.dt.tz_localize(None)\n",
" excel_timestamps.to_excel(writer, res_key.split('_')[0],\n",
" float_format='%.2f', merge_cells=True)\n",
" # merge_cells=False doesn't work properly with multiindex\n",
"writer.save()"
]
Expand Down
9 changes: 6 additions & 3 deletions timeseries_scripts/imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,21 +187,24 @@ def choose_fill_method(
'''
for i, nan_block in nan_blocks.iterrows():
j = 0
if col_name[1] == 'price':
# Do not interpolate prices
patched_col = col

# Interpolate missing value spans up to 2 hours
if nan_block['span'] <= timedelta(hours=2):
elif nan_block['span'] <= timedelta(hours=2):
patched_col, marker_col = my_interpolate(
i, j, nan_block, col, col_name, marker_col, nan_blocks,
one_period, message)

# Guess missing value spans longer than one hour based on other tsos
# (Only for German wind and solar generation data)
elif col_name[1][:2] == 'DE' and col_name[2] == 'generation':
elif col_name[0][:2] == 'DE' and col_name[2] == 'generation_actual':

# NOT IMPLEMENTED
# patched_col = impute(nan_block, col, col_name, nan_blocks, df, one_period)
# instead:
pass
patched_col = col
else:
j += 1
patched_col = col
Expand Down
4 changes: 3 additions & 1 deletion timeseries_scripts/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,10 @@ def read_entso_e_transparency(
df_raw.replace({'ProductionType_Name': renewables}, inplace=True)

if variable_name == 'Day Ahead Prices':
# Omit polish price data reported in EUR (keeping PLN prices)
# (Before 2017-03-02, the data is very messy)
no_polish_euro = ~((df_raw['AreaName'] == 'PSE SA BZ') &
(df_raw['Currency_IsoCode'] == 'EUR'))
(df_raw.index < pd.to_datetime('2017-03-02 00:00:00')))
df_raw = df_raw.loc[no_polish_euro]

# keep only entries for selected geographic entities as specified in
Expand Down

0 comments on commit e72c468

Please sign in to comment.