Skip to content

Commit

Permalink
Merge pull request #348 from kperrynrel/filter-update
Browse files Browse the repository at this point in the history
added new private method for filtering on daily aggregated data
  • Loading branch information
mdeceglie authored Dec 7, 2022
2 parents 06908d3 + f8ac85f commit adf9f44
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 14 deletions.
2 changes: 1 addition & 1 deletion docs/sphinx/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
RdTools Change Log
==================
.. include:: changelog/pending.rst
.. include:: changelog/v2.2.0-beta.1.rst
.. include:: changelog/v2.1.4.rst
.. include:: changelog/v2.2.0-beta.0.rst
.. include:: changelog/v2.1.3.rst
Expand Down
7 changes: 7 additions & 0 deletions docs/sphinx/source/changelog/v2.2.0-beta.1.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
********************************
v2.2.0-beta.1 (December 7, 2022)
********************************

Enhancements
------------
* Added framework for running aggregated filters in ``analysis_chains.py`` (:pull:`348`)
108 changes: 99 additions & 9 deletions rdtools/analysis_chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,14 @@ class TrendAnalysis():
filter_params defaults to empty dicts for each function in rdtools.filtering,
in which case those functions use default parameter values, `ad_hoc_filter`
defaults to None. See examples for more information.
filter_params_aggregated: dict
parameters to be passed to rdtools.filtering functions that specifically handle
aggregated data (dily filters, etc). Keys are the names of the rdtools.filtering functions.
Values are dicts of parameters to be passed to those functions. Also has a special key
`ad_hoc_filter`; this filter is a boolean mask joined with the rest of the filters.
filter_params_aggregated defaults to empty dicts for each function in rdtools.filtering,
in which case those functions use default parameter values, `ad_hoc_filter`
defaults to None. See examples for more information.
results : dict
Nested dict used to store the results of methods ending with `_analysis`
'''
Expand Down Expand Up @@ -133,6 +141,9 @@ def __init__(self, pv, poa_global=None, temperature_cell=None, temperature_ambie
'csi_filter': {},
'ad_hoc_filter': None # use this to include an explict filter
}
self.filter_params_aggregated = {
'ad_hoc_filter': None
}
# remove tcell_filter from list if power_expected is passed in
if power_expected is not None and temperature_cell is None:
del self.filter_params['tcell_filter']
Expand Down Expand Up @@ -252,7 +263,8 @@ def _calc_clearsky_poa(self, times=None, rescale=True, **kwargs):
clearsky_poa = clearsky_poa['poa_global']

if aggregate:
interval_id = pd.Series(range(len(self.poa_global)), index=self.poa_global.index)
interval_id = pd.Series(
range(len(self.poa_global)), index=self.poa_global.index)
interval_id = interval_id.reindex(times, method='backfill')
clearsky_poa = clearsky_poa.groupby(interval_id).mean()
clearsky_poa.index = self.poa_global.index
Expand Down Expand Up @@ -383,7 +395,8 @@ def _filter(self, energy_normalized, case):
self.filter_params, which is a dict, the keys of which are names of
functions in rdtools.filtering, and the values of which are dicts
containing the associated parameters with which to run the filtering
functions. See examples for details on how to modify filter parameters.
functions. This private method is specifically for the original indexed
data. See examples for details on how to modify filter parameters.
Parameters
----------
Expand All @@ -405,7 +418,8 @@ def _filter(self, energy_normalized, case):
# at once. However, we add a default value of True, with the same index as
# energy_normalized, so that the output is still correct even when all
# filters have been disabled.
filter_components = {'default': pd.Series(True, index=energy_normalized.index)}
filter_components = {'default': pd.Series(
True, index=energy_normalized.index)}

if case == 'sensor':
poa = self.poa_global
Expand Down Expand Up @@ -455,14 +469,16 @@ def _filter(self, energy_normalized, case):
ad_hoc_filter = self.filter_params['ad_hoc_filter']

if ad_hoc_filter.isnull().any():
warnings.warn('ad_hoc_filter contains NaN values; setting to False (excluding)')
warnings.warn(
'ad_hoc_filter contains NaN values; setting to False (excluding)')
ad_hoc_filter = ad_hoc_filter.fillna(False)

if not filter_components.index.equals(ad_hoc_filter.index):
warnings.warn('ad_hoc_filter index does not match index of other filters; missing '
'values will be set to True (kept). Align the index with the index '
'of the filter_components attribute to prevent this warning')
ad_hoc_filter = ad_hoc_filter.reindex(filter_components.index).fillna(True)
ad_hoc_filter = ad_hoc_filter.reindex(
filter_components.index).fillna(True)

filter_components['ad_hoc_filter'] = ad_hoc_filter

Expand All @@ -475,6 +491,63 @@ def _filter(self, energy_normalized, case):
self.clearsky_filter = bool_filter
self.clearsky_filter_components = filter_components

def _aggregated_filter(self, aggregated, case):
"""
Mirrors the _filter private function, but with aggregated filters applied.
These aggregated filters are based on those in rdtools.filtering. Uses
self.filter_params_aggregated, which is a dict, the keys of which are names of
functions in rdtools.filtering, and the values of which are dicts
containing the associated parameters with which to run the filtering
functions. See examples for details on how to modify filter parameters.
Parameters
----------
aggregated : pandas.Series
Time series of aggregated normalized AC energy
case : str
'sensor' or 'clearsky' which filtering protocol to apply. Affects
whether result is stored in self.sensor_filter_aggregated or
self.clearsky_filter_aggregated)
Returns
-------
None
"""
filter_components_aggregated = {'default':
pd.Series(True, index=aggregated.index)}
# Add daily aggregate filters as they come online here.
# Convert the dictionary into a dataframe (after running filters)
filter_components_aggregated = pd.DataFrame(
filter_components_aggregated).fillna(False)
# Run the ad-hoc filter from filter_params_aggregated, if available
if self.filter_params_aggregated.get('ad_hoc_filter', None) is not None:
ad_hoc_filter_aggregated = self.filter_params_aggregated['ad_hoc_filter']

if ad_hoc_filter_aggregated.isnull().any():
warnings.warn(
'aggregated ad_hoc_filter contains NaN values; setting to False (excluding)')
ad_hoc_filter_aggregated = ad_hoc_filter_aggregated.fillna(False)

if not filter_components_aggregated.index.equals(ad_hoc_filter_aggregated.index):
warnings.warn('Aggregated ad_hoc_filter index does not match index of other '
'filters; missing values will be set to True (kept). '
'Align the index with the index of the '
'filter_components_aggregated attribute to prevent this warning')
ad_hoc_filter_aggregated = ad_hoc_filter_aggregated.reindex(
filter_components_aggregated.index).fillna(True)

filter_components_aggregated['ad_hoc_filter'] = ad_hoc_filter_aggregated

bool_filter_aggregated = filter_components_aggregated.all(axis=1)
filter_components_aggregated = filter_components_aggregated.drop(
columns=['default'])
if case == 'sensor':
self.sensor_filter_aggregated = bool_filter_aggregated
self.sensor_filter_components_aggregated = filter_components_aggregated
elif case == 'clearsky':
self.clearsky_filter_aggregated = bool_filter_aggregated
self.clearsky_filter_components_aggregated = filter_components_aggregated

def _filter_check(self, post_filter):
'''
post-filter check for requisite 730 days of data
Expand Down Expand Up @@ -621,8 +694,16 @@ def _sensor_preprocess(self):
self._filter(energy_normalized, 'sensor')
aggregated, aggregated_insolation = self._aggregate(
energy_normalized[self.sensor_filter], insolation[self.sensor_filter])
self.sensor_aggregated_performance = aggregated
self.sensor_aggregated_insolation = aggregated_insolation
# Run daily filters on aggregated data
self._aggregated_filter(aggregated, 'sensor')
# Apply filter to aggregated data and store
self.sensor_aggregated_performance = aggregated[self.sensor_filter_aggregated]
self.sensor_aggregated_insolation = aggregated_insolation[self.sensor_filter_aggregated]
# Reindex the data after the fact, so it's on the aggregated interval
self.sensor_aggregated_performance = self.sensor_aggregated_performance.asfreq(
self.aggregation_freq)
self.sensor_aggregated_insolation = self.sensor_aggregated_insolation.asfreq(
self.aggregation_freq)

def _clearsky_preprocess(self):
'''
Expand Down Expand Up @@ -651,8 +732,17 @@ def _clearsky_preprocess(self):
self._filter(cs_normalized, 'clearsky')
cs_aggregated, cs_aggregated_insolation = self._aggregate(
cs_normalized[self.clearsky_filter], cs_insolation[self.clearsky_filter])
self.clearsky_aggregated_performance = cs_aggregated
self.clearsky_aggregated_insolation = cs_aggregated_insolation
# Run daily filters on aggregated data
self._aggregated_filter(cs_aggregated, 'clearsky')
# Apply daily filter to aggregated data and store
self.clearsky_aggregated_performance = cs_aggregated[self.clearsky_filter_aggregated]
self.clearsky_aggregated_insolation = \
cs_aggregated_insolation[self.clearsky_filter_aggregated]
# Reindex the data after the fact, so it's on the aggregated interval
self.clearsky_aggregated_performance = self.clearsky_aggregated_performance.asfreq(
self.aggregation_freq)
self.clearsky_aggregated_insolation = self.clearsky_aggregated_insolation.asfreq(
self.aggregation_freq)

def sensor_analysis(self, analyses=['yoy_degradation'], yoy_kwargs={}, srr_kwargs={}):
'''
Expand Down
96 changes: 92 additions & 4 deletions rdtools/test/analysis_chains_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,39 @@ def test_sensor_analysis_ad_hoc_filter(sensor_parameters):
rd_analysis.sensor_analysis(analyses=['yoy_degradation'])


def test_sensor_analysis_aggregated_ad_hoc_filter(sensor_parameters):
# by excluding all but a few points, we should trigger the <2yr error
filt = pd.Series(False,
index=sensor_parameters['pv'].index)
filt = filt.resample('1D').first().dropna(how='all')
filt.iloc[-500:] = True
rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0)
rd_analysis.filter_params_aggregated['ad_hoc_filter'] = filt
with pytest.raises(ValueError, match="Less than two years of data left after filtering"):
rd_analysis.sensor_analysis(analyses=['yoy_degradation'])


def test_filter_components(sensor_parameters):
poa = sensor_parameters['poa_global']
poa_filter = (poa > 200) & (poa < 1200)
rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0)
rd_analysis.sensor_analysis(analyses=['yoy_degradation'])
assert (poa_filter == rd_analysis.sensor_filter_components['poa_filter']).all()
assert (poa_filter ==
rd_analysis.sensor_filter_components['poa_filter']).all()


def test_aggregated_filter_components(sensor_parameters):
daily_ad_hoc_filter = pd.Series(True,
index=sensor_parameters['pv'].index)
daily_ad_hoc_filter[:600] = False
daily_ad_hoc_filter = daily_ad_hoc_filter.resample(
'1D').first().dropna(how='all')
rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0)
rd_analysis.filter_params = {} # disable all index-based filters
rd_analysis.filter_params_aggregated['ad_hoc_filter'] = daily_ad_hoc_filter
rd_analysis.sensor_analysis(analyses=['yoy_degradation'])
assert (daily_ad_hoc_filter ==
rd_analysis.sensor_filter_components_aggregated['ad_hoc_filter']).all()


def test_filter_components_no_filters(sensor_parameters):
Expand All @@ -196,12 +223,23 @@ def test_filter_components_no_filters(sensor_parameters):
assert rd_analysis.sensor_filter_components.empty


def test_aggregated_filter_components_no_filters(sensor_parameters):
rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0)
rd_analysis.filter_params = {} # disable all index-based filters
rd_analysis.filter_params_aggregated = {} # disable all daily filters
rd_analysis.sensor_analysis(analyses=['yoy_degradation'])
expected = pd.Series(True, index=rd_analysis.pv_energy.index)
daily_expected = expected.resample('1D').first().dropna(how='all')
pd.testing.assert_series_equal(rd_analysis.sensor_filter_aggregated,
daily_expected)
assert rd_analysis.sensor_filter_components.empty


@pytest.mark.parametrize('workflow', ['sensor', 'clearsky'])
def test_filter_ad_hoc_warnings(workflow, sensor_parameters):
rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0)
rd_analysis.set_clearsky(pvlib_location=pvlib.location.Location(40, -80),
poa_global_clearsky=rd_analysis.poa_global)

# warning for incomplete index
ad_hoc_filter = pd.Series(True, index=sensor_parameters['pv'].index[:-5])
rd_analysis.filter_params['ad_hoc_filter'] = ad_hoc_filter
Expand Down Expand Up @@ -233,6 +271,54 @@ def test_filter_ad_hoc_warnings(workflow, sensor_parameters):
assert components.drop(components.index[10])['ad_hoc_filter'].all()


@pytest.mark.parametrize('workflow', ['sensor', 'clearsky'])
def test_aggregated_filter_ad_hoc_warnings(workflow, sensor_parameters):
rd_analysis = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0)
rd_analysis.set_clearsky(pvlib_location=pvlib.location.Location(40, -80),
poa_global_clearsky=rd_analysis.poa_global)
# disable all filters outside of CSI
rd_analysis.filter_params = {'csi_filter': {}}
# warning for incomplete index
daily_ad_hoc_filter = pd.Series(True,
index=sensor_parameters['pv'].index[:-5])
daily_ad_hoc_filter = daily_ad_hoc_filter.resample(
'1D').first().dropna(how='all')
rd_analysis.filter_params_aggregated['ad_hoc_filter'] = daily_ad_hoc_filter
with pytest.warns(UserWarning, match='ad_hoc_filter index does not match index'):
if workflow == 'sensor':
rd_analysis.sensor_analysis(analyses=['yoy_degradation'])
components = rd_analysis.sensor_filter_components_aggregated
else:
rd_analysis.clearsky_analysis(analyses=['yoy_degradation'])
components = rd_analysis.clearsky_filter_components_aggregated

# missing values set to True
assert components['ad_hoc_filter'].all()

# warning about NaNs
rd_analysis_2 = TrendAnalysis(**sensor_parameters, power_dc_rated=1.0)
rd_analysis_2.set_clearsky(pvlib_location=pvlib.location.Location(40, -80),
poa_global_clearsky=rd_analysis_2.poa_global)
# disable all filters outside of CSI
rd_analysis_2.filter_params = {'csi_filter': {}}
daily_ad_hoc_filter = pd.Series(True, index=sensor_parameters['pv'].index)
daily_ad_hoc_filter = daily_ad_hoc_filter.resample(
'1D').first().dropna(how='all')
daily_ad_hoc_filter.iloc[10] = np.nan
rd_analysis_2.filter_params_aggregated['ad_hoc_filter'] = daily_ad_hoc_filter
with pytest.warns(UserWarning, match='ad_hoc_filter contains NaN values; setting to False'):
if workflow == 'sensor':
rd_analysis_2.sensor_analysis(analyses=['yoy_degradation'])
components = rd_analysis_2.sensor_filter_components_aggregated
else:
rd_analysis_2.clearsky_analysis(analyses=['yoy_degradation'])
components = rd_analysis_2.clearsky_filter_components_aggregated

# NaN values set to False
assert not components['ad_hoc_filter'].iloc[10]
assert components.drop(components.index[10])['ad_hoc_filter'].all()


def test_cell_temperature_model_invalid(sensor_parameters):
wind = pd.Series(0, index=sensor_parameters['pv'].index)
sensor_parameters.pop('temperature_model')
Expand Down Expand Up @@ -351,8 +437,10 @@ def test_index_mismatch():
# GH #277
times = pd.date_range('2019-01-01', '2022-01-01', freq='15min')
pv = pd.Series(1.0, index=times)
dummy_series = pd.Series(1.0, index=times[::4]) # low-frequency weather inputs
keys = ['poa_global', 'temperature_cell', 'temperature_ambient', 'power_expected', 'windspeed']
# low-frequency weather inputs
dummy_series = pd.Series(1.0, index=times[::4])
keys = ['poa_global', 'temperature_cell',
'temperature_ambient', 'power_expected', 'windspeed']
kwargs = {key: dummy_series.copy() for key in keys}
rd_analysis = TrendAnalysis(pv, **kwargs)
for key in keys:
Expand Down

0 comments on commit adf9f44

Please sign in to comment.