Skip to content

Commit

Permalink
delete zeros before first/after last non-zero value in each column (2)
Browse files Browse the repository at this point in the history
  • Loading branch information
jgmill committed May 17, 2019
1 parent 7958fa4 commit 05656d5
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 33 deletions.
39 changes: 14 additions & 25 deletions processing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1201,7 +1201,7 @@
" # Calculate aggregate German generation\n",
" sum_frame = df.loc[:, (control_areas_DE, variable, attribute)]\n",
" sum_frame.head() \n",
" sum_col = sum_frame.sum(axis='columns', skipna=True).to_frame().round(0)\n",
" sum_col = sum_frame.sum(axis='columns', skipna=False).to_frame().round(0)\n",
"\n",
" # Create a new MultiIndex\n",
" new_col_header = {\n",
Expand Down Expand Up @@ -1244,7 +1244,7 @@
" sum_col = (\n",
" data_sets['60min']\n",
" .loc[:, (bidding_zones_IT, variable, attributes)]\n",
" .sum(axis='columns', skipna=True))\n",
" .sum(axis='columns', skipna=False))\n",
" \n",
" # Create a new MultiIndex\n",
" new_col_header = {\n",
Expand Down Expand Up @@ -1283,7 +1283,7 @@
"for variable in ['solar', 'wind']:\n",
" sum_col = (data_sets['30min']\n",
" .loc[:, ('GB_GBN', variable, ['generation_actual_dso', 'generation_actual_tso'])]\n",
" .sum(axis='columns', skipna=True))\n",
" .sum(axis='columns', skipna=False))\n",
" \n",
" # Create a new MultiIndex\n",
" new_col_header = {\n",
Expand Down Expand Up @@ -1524,8 +1524,8 @@
" else:\n",
" # Keep only region, variable, attribute in MultiIndex for comparison\n",
" # Compare columns from ENTSO-E against TSO's, keep which we don't have yet\n",
" tuples = [col for col in df.columns if not col[:3] in data_cols]\n",
" add_cols = pd.MultiIndex.from_tuples(tuples, names=headers)\n",
" cols = [col for col in df.columns if not col[:3] in data_cols]\n",
" add_cols = pd.MultiIndex.from_tuples(cols, names=headers)\n",
" data_sets[res_key] = data_sets[res_key].combine_first(df[add_cols])\n",
"\n",
"# # Add the ENTSO-E markers (but only for the columns actually copied)\n",
Expand Down Expand Up @@ -1623,9 +1623,9 @@
"data_sets['15min'].to_pickle('final_15.pickle')\n",
"data_sets['30min'].to_pickle('final_30.pickle')\n",
"data_sets['60min'].to_pickle('final_60.pickle')\n",
"entso_e['15min'].to_pickle('final_entso_e_15.pickle')\n",
"entso_e['30min'].to_pickle('final_entso_e_30.pickle')\n",
"entso_e['60min'].to_pickle('final_entso_e_60.pickle')"
"#entso_e['15min'].to_pickle('final_entso_e_15.pickle')\n",
"#entso_e['30min'].to_pickle('final_entso_e_30.pickle')\n",
"#entso_e['60min'].to_pickle('final_entso_e_60.pickle')"
]
},
{
Expand All @@ -1641,17 +1641,10 @@
"data_sets['15min'] = pd.read_pickle('final_15.pickle')\n",
"data_sets['30min'] = pd.read_pickle('final_30.pickle')\n",
"data_sets['60min'] = pd.read_pickle('final_60.pickle')\n",
"entso_e = {}\n",
"entso_e['15min'] = pd.read_pickle('final_entso_e_15.pickle')\n",
"entso_e['30min'] = pd.read_pickle('final_entso_e_30.pickle')\n",
"entso_e['60min'] = pd.read_pickle('final_entso_e_60.pickle')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Combine TSO and ENTSO-E datasets"
"#entso_e = {}\n",
"#entso_e['15min'] = pd.read_pickle('final_entso_e_15.pickle')\n",
"#entso_e['30min'] = pd.read_pickle('final_entso_e_30.pickle')\n",
"#entso_e['60min'] = pd.read_pickle('final_entso_e_60.pickle')"
]
},
{
Expand All @@ -1660,11 +1653,7 @@
"metadata": {},
"outputs": [],
"source": [
"combined = {}\n",
"\n",
"for res_key, df in entso_e.items():\n",
" # Combine with TSO data\n",
" combined[res_key] = data_sets[res_key].combine_first(df)"
"combined = data_sets"
]
},
{
Expand Down Expand Up @@ -1830,7 +1819,7 @@
}
},
"source": [
"## Write to SQL-database"
"## Write to SQLite-database"
]
},
{
Expand Down
15 changes: 7 additions & 8 deletions timeseries_scripts/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,13 +609,6 @@ def read_opsd(filepath, param_dict, headers):
keep = ['wind', 'wind_onshore', 'wind_offshore', 'solar']
df = df.loc[:, (slice(None), keep)]

# delete zeros before first non-zero value in each column
for col_name, col in df.iteritems():
nan_for_zero = col.replace(0, np.nan)
slicer = ((col.index <= nan_for_zero.first_valid_index()) |
(col.index >= nan_for_zero.last_valid_index()))
col.loc[slicer] = np.nan

# The capacities data only has one entry per day, which pandas
# interprets as 00:00h. We will broadcast the dayly data for
# all quarter-hours of the day until the next given data point.
Expand Down Expand Up @@ -1122,7 +1115,6 @@ def read_dataset(

# First call to update_progress
update_progress(files_success, files_existing, container)
# logger.debug(source_dataset_timerange + 'reading...')

# Select read function for source
if source_name == 'OPSD':
Expand Down Expand Up @@ -1175,6 +1167,13 @@ def read_dataset(
logger.info('%s | %s | empty DataFrame: ', files[0], res_key)
continue

# delete zeros before first/after last non-zero value in each column
for col_name, col in df.iteritems():
nan_for_zero = col.replace(0, np.nan)
slicer = ((col.index <= nan_for_zero.first_valid_index()) |
(col.index >= nan_for_zero.last_valid_index()))
col.loc[slicer] = np.nan

if cumulated[res_key].empty:
cumulated[res_key] = df
else:
Expand Down

0 comments on commit 05656d5

Please sign in to comment.