diff --git a/pyproject.toml b/pyproject.toml index 2e83e4a..8ae8744 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,7 +132,10 @@ filterwarnings = [ omit = [ "wind_up/plots/*.py", ] -exclude_lines = ["if __name__ == .__main__.:"] +exclude_lines = [ + "if __name__ == .__main__.:", + "if TYPE_CHECKING:" +] [tool.poe.tasks] [tool.poe.tasks.lint] diff --git a/tests/test_ops_curve_shift.py b/tests/test_ops_curve_shift.py new file mode 100644 index 0000000..4a5cb94 --- /dev/null +++ b/tests/test_ops_curve_shift.py @@ -0,0 +1,294 @@ +import logging +from unittest.mock import Mock, patch + +import numpy as np +import pandas as pd +import pytest + +from wind_up.ops_curve_shift import ( + CurveConfig, + CurveShiftInput, + CurveThresholds, + CurveTypes, + calculate_pitch_curve_shift, + calculate_power_curve_shift, + calculate_rpm_curve_shift, + check_for_ops_curve_shift, +) + + +@pytest.fixture +def fake_power_curve_df() -> pd.DataFrame: + return pd.DataFrame( + { + "wind_speed": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + "power": [0, 0, 0, 1, 3, 6, 10, 15, 22, 30, 36, 39, 40, 40, 40], + } + ).set_index("power") + + +@pytest.fixture +def fake_gen_rpm_curve_df() -> pd.DataFrame: + return pd.DataFrame( + { + "wind_speed": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + "gen_rpm": [900, 900, 850, 875, 900, 1000, 1100, 1200, 1350, 1500, 1600, 1600, 1600, 1600, 1600], + } + ).set_index("gen_rpm") + + +@pytest.fixture +def fake_pitch_curve_df() -> pd.DataFrame: + return pd.DataFrame( + { + "wind_speed": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], + "pitch": [4, 4, 4, 3, 2, 1, 1, 1, 2, 5, 8, 11, 13, 14, 15], + } + ).set_index("pitch") + + +class TestCurveShiftInput: + @staticmethod + def test_acceptable_inputs(fake_power_curve_df: pd.DataFrame) -> None: + _input = CurveShiftInput( + turbine_name="anything", + pre_df=fake_power_curve_df.reset_index(), + post_df=fake_power_curve_df.reset_index(), + curve_config=CurveConfig( + name=CurveTypes.POWER_CURVE.value, + x_col="wind_speed", + y_col="power", + x_bin_width=1, + warning_threshold=0.01, + ), + ) + + @pytest.mark.parametrize("column_name", ["wind_speed", "power"]) + def test_missing_column_in_pre_df(self, column_name: str, fake_power_curve_df: pd.DataFrame) -> None: + with pytest.raises(IndexError, match="Column name missing in dataframe"): + CurveShiftInput( + turbine_name="anything", + pre_df=fake_power_curve_df.reset_index().drop(columns=column_name), + post_df=(fake_power_curve_df + 2).reset_index(), + curve_config=CurveConfig( + name=CurveTypes.POWER_CURVE.value, + x_col="wind_speed", + y_col="power", + x_bin_width=1, + warning_threshold=0.01, + ), + ) + + @pytest.mark.parametrize("column_name", ["wind_speed", "power"]) + def test_missing_column_in_post_df(self, column_name: str, fake_power_curve_df: pd.DataFrame) -> None: + with pytest.raises(IndexError, match="Column name missing in dataframe"): + CurveShiftInput( + turbine_name="anything", + pre_df=fake_power_curve_df.reset_index(), + post_df=(fake_power_curve_df + 2).reset_index().drop(columns=column_name), + curve_config=CurveConfig( + name=CurveTypes.POWER_CURVE.value, + x_col="wind_speed", + y_col="power", + x_bin_width=1, + warning_threshold=0.01, + ), + ) + + +@pytest.mark.parametrize( + ("shift_amount", "expected"), + [ + pytest.param(2.0, -0.22099447513812154, id="shift DOES exceed threshold"), + pytest.param(0.05, -0.007042253521126751, id="shift DOES NOT exceed threshold"), + ], +) +def test_calculate_power_curve_shift( + shift_amount: float, expected: float, fake_power_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture +) -> None: + with caplog.at_level(logging.WARNING): + actual = calculate_power_curve_shift( + turbine_name="anything", + pre_df=fake_power_curve_df.reset_index(), + post_df=(fake_power_curve_df + shift_amount).reset_index(), + x_col="wind_speed", + y_col="power", + ) + + if abs(expected) > CurveThresholds.POWER_CURVE.value: + assert "Ops Curve Shift warning" in caplog.text + + np.testing.assert_almost_equal(actual=actual, desired=expected) + + +@pytest.mark.parametrize( + ("shift_amount", "expected"), + [ + pytest.param(0.2, -0.00712694877505593, id="shift DOES exceed threshold"), + pytest.param(0.1, -0.0033534540576795058, id="shift DOES NOT exceed threshold"), + ], +) +def test_calculate_rpm_curve_shift( + shift_amount: float, expected: float, fake_gen_rpm_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture +) -> None: + with caplog.at_level(logging.WARNING): + actual = calculate_rpm_curve_shift( + turbine_name="anything", + pre_df=fake_gen_rpm_curve_df.reset_index(), + post_df=(fake_gen_rpm_curve_df + shift_amount).reset_index(), + x_col="wind_speed", + y_col="gen_rpm", + ) + + if abs(expected) > CurveThresholds.RPM.value: + assert "Ops Curve Shift warning" in caplog.text + + np.testing.assert_almost_equal(actual=actual, desired=expected) + + +@pytest.mark.parametrize( + ("shift_amount", "expected"), + [ + pytest.param(0.14, -0.1026666666666678, id="shift DOES exceed threshold"), + pytest.param(0.13, -0.09533333333333438, id="shift DOES NOT exceed threshold"), + ], +) +def test_calculate_pitch_curve_shift( + shift_amount: float, expected: float, fake_pitch_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture +) -> None: + with caplog.at_level(logging.WARNING): + actual = calculate_pitch_curve_shift( + turbine_name="anything", + pre_df=fake_pitch_curve_df.reset_index(), + post_df=(fake_pitch_curve_df + shift_amount).reset_index(), + x_col="wind_speed", + y_col="pitch", + ) + + if abs(expected) > CurveThresholds.PITCH.value: + assert "Ops Curve Shift warning" in caplog.text + + np.testing.assert_almost_equal(actual=actual, desired=expected) + + +class TestCheckForOpsCurveShift: + @pytest.mark.parametrize( + ("pre_df_or_post_df", "missing_column"), + [ + ("pre", "wind_speed"), + ("pre", "power"), + ("pre", "gen_rpm"), + ("pre", "pitch"), + ("post", "wind_speed"), + ("post", "power"), + ("post", "gen_rpm"), + ("post", "pitch"), + ], + ) + def test_missing_required_column( + self, + pre_df_or_post_df: str, + missing_column: str, + fake_power_curve_df: pd.DataFrame, + fake_gen_rpm_curve_df: pd.DataFrame, + fake_pitch_curve_df: pd.DataFrame, + ) -> None: + _df = pd.concat( + [ + fake_power_curve_df.reset_index().set_index("wind_speed"), + fake_gen_rpm_curve_df.reset_index().set_index("wind_speed"), + fake_pitch_curve_df.reset_index().set_index("wind_speed"), + ], + axis=1, + ).reset_index() + + pre_df = _df.drop(columns=missing_column) if pre_df_or_post_df == "pre" else _df + post_df = _df.drop(columns=missing_column) if pre_df_or_post_df == "post" else _df + + actual = check_for_ops_curve_shift( + pre_df=pre_df, + post_df=post_df, + wtg_name="anything", + scada_ws_col="wind_speed", + pw_col="power", + rpm_col="gen_rpm", + pt_col="pitch", + cfg=Mock(), + plot_cfg=Mock(), + plot=False, + ) + + expected = { + f"{CurveTypes.POWER_CURVE.value}_shift": np.nan, + f"{CurveTypes.RPM.value}_shift": np.nan, + f"{CurveTypes.PITCH.value}_shift": np.nan, + } + + assert actual == expected + + def test_calls_funcs_as_intended( + self, fake_power_curve_df: pd.DataFrame, fake_gen_rpm_curve_df: pd.DataFrame, fake_pitch_curve_df: pd.DataFrame + ) -> None: + _df = pd.concat( + [ + fake_power_curve_df.reset_index().set_index("wind_speed"), + fake_gen_rpm_curve_df.reset_index().set_index("wind_speed"), + fake_pitch_curve_df.reset_index().set_index("wind_speed"), + ], + axis=1, + ).reset_index() + + wtg_name = "anything" + + with ( + patch("wind_up.ops_curve_shift.calculate_power_curve_shift", return_value=np.nan) as mock_power, + patch("wind_up.ops_curve_shift.calculate_rpm_curve_shift", return_value=np.nan) as mock_rpm, + patch("wind_up.ops_curve_shift.calculate_pitch_curve_shift", return_value=np.nan) as mock_pitch, + patch("wind_up.ops_curve_shift.compare_ops_curves_pre_post", return_value=None) as mock_plot_func, + ): + mock_wind_up_conf = Mock() + mock_wind_up_conf.toggle = True + mock_plot_conf = Mock() + + actual = check_for_ops_curve_shift( + pre_df=_df, + post_df=_df, + wtg_name=wtg_name, + scada_ws_col="wind_speed", + pw_col="power", + rpm_col="gen_rpm", + pt_col="pitch", + cfg=mock_wind_up_conf, + plot_cfg=mock_plot_conf, + ) + + mock_power.assert_called_once_with( + turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="wind_speed", y_col="power" + ) + + mock_rpm.assert_called_once_with(turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="power", y_col="gen_rpm") + + mock_pitch.assert_called_once_with( + turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="wind_speed", y_col="pitch" + ) + + mock_plot_func.assert_called_once_with( + pre_df=_df, + post_df=_df, + wtg_name=wtg_name, + ws_col="wind_speed", + pw_col="power", + pt_col="pitch", + rpm_col="gen_rpm", + plot_cfg=mock_plot_conf, + is_toggle_test=mock_wind_up_conf.toggle is not None, + sub_dir=None, + ) + + expected = { + f"{CurveTypes.POWER_CURVE.value}_shift": np.nan, + f"{CurveTypes.RPM.value}_shift": np.nan, + f"{CurveTypes.PITCH.value}_shift": np.nan, + } + + assert actual == expected diff --git a/wind_up/main_analysis.py b/wind_up/main_analysis.py index 17d3d19..d2f2c82 100644 --- a/wind_up/main_analysis.py +++ b/wind_up/main_analysis.py @@ -24,9 +24,10 @@ from wind_up.northing import ( check_wtg_northing, ) +from wind_up.ops_curve_shift import check_for_ops_curve_shift from wind_up.plots.data_coverage_plots import plot_detrend_data_cov, plot_pre_post_data_cov from wind_up.plots.detrend_plots import plot_apply_wsratio_v_wd_scen -from wind_up.plots.scada_funcs_plots import compare_ops_curves_pre_post, print_filter_stats +from wind_up.plots.scada_funcs_plots import print_filter_stats from wind_up.plots.yaw_direction_plots import plot_yaw_direction_pre_post from wind_up.pp_analysis import pre_post_pp_analysis_with_reversal_and_bootstrapping from wind_up.result_manager import result_manager @@ -365,78 +366,6 @@ def yaw_offset_results( return results -def check_for_ops_curve_shift( - pre_df: pd.DataFrame, - post_df: pd.DataFrame, - *, - wtg_name: str, - scada_ws_col: str, - pw_col: str, - rpm_col: str, - pt_col: str, - cfg: WindUpConfig, - plot_cfg: PlotConfig, - sub_dir: str | None = None, -) -> dict[str, float]: - results_dict = { - "powercurve_shift": np.nan, - "rpm_shift": np.nan, - "pitch_shift": np.nan, - } - # check if all required columns are present - required_cols = [scada_ws_col, pw_col, pt_col, rpm_col] - for req_col in required_cols: - if req_col not in pre_df.columns: - msg = f"check_for_ops_curve_shift {wtg_name} pre_df missing required column {req_col}" - result_manager.warning(msg) - return results_dict - if req_col not in post_df.columns: - msg = f"check_for_ops_curve_shift {wtg_name} post_df missing required column {req_col}" - result_manager.warning(msg) - return results_dict - pre_dropna_df = pre_df.dropna(subset=[scada_ws_col, pw_col, pt_col, rpm_col]).copy() - post_dropna_df = post_df.dropna(subset=[scada_ws_col, pw_col, pt_col, rpm_col]).copy() - - warning_msg: str | None = None - for descr, x_var, y_var, x_bin_width, warn_thresh in [ - ("powercurve_shift", scada_ws_col, pw_col, 1, 0.01), - ("rpm_shift", pw_col, rpm_col, 0, 0.005), - ("pitch_shift", scada_ws_col, pt_col, 1, 0.1), - ]: - bins = np.arange(0, pre_dropna_df[x_var].max() + x_bin_width, x_bin_width) if x_bin_width > 0 else 10 - mean_curve = pre_dropna_df.groupby(pd.cut(pre_dropna_df[x_var], bins=bins, retbins=False), observed=True).agg( - x_mean=pd.NamedAgg(column=x_var, aggfunc="mean"), - y_mean=pd.NamedAgg(column=y_var, aggfunc="mean"), - ) - post_dropna_df["expected_y"] = np.interp(post_dropna_df[x_var], mean_curve["x_mean"], mean_curve["y_mean"]) - mean_df = post_dropna_df.mean() - if y_var == pt_col: - results_dict[descr] = mean_df[y_var] - mean_df["expected_y"] - else: - results_dict[descr] = (mean_df[y_var] / mean_df["expected_y"] - 1).clip(-1, 1) - if abs(results_dict[descr]) > warn_thresh: - if warning_msg is None: - warning_msg = f"{wtg_name} check_for_ops_curve_shift warnings:" - warning_msg += f" abs({descr}) > {warn_thresh}: {abs(results_dict[descr]):.3f}" - if warning_msg is not None: - result_manager.warning(warning_msg) - - compare_ops_curves_pre_post( - pre_df=pre_df, - post_df=post_df, - wtg_name=wtg_name, - ws_col=scada_ws_col, - pw_col=pw_col, - pt_col=pt_col, - rpm_col=rpm_col, - plot_cfg=plot_cfg, - is_toggle_test=(cfg.toggle is not None), - sub_dir=sub_dir, - ) - - return results_dict - - def calc_test_ref_results( *, test_df: pd.DataFrame, diff --git a/wind_up/ops_curve_shift.py b/wind_up/ops_curve_shift.py new file mode 100644 index 0000000..5694b7a --- /dev/null +++ b/wind_up/ops_curve_shift.py @@ -0,0 +1,218 @@ +from __future__ import annotations + +from enum import Enum +from typing import TYPE_CHECKING, NamedTuple + +import numpy as np +import pandas as pd +from pydantic import BaseModel, ConfigDict, model_validator + +from wind_up.plots.scada_funcs_plots import compare_ops_curves_pre_post +from wind_up.result_manager import result_manager + +if TYPE_CHECKING: + from wind_up.models import PlotConfig, WindUpConfig + + +class CurveThresholds(Enum): + POWER_CURVE = 0.01 + RPM = 0.005 + PITCH = 0.1 + + +class CurveTypes(str, Enum): + POWER_CURVE = "powercurve" + RPM = "rpm" + PITCH = "pitch" + + +class CurveConfig(BaseModel): + name: CurveTypes + x_col: str + y_col: str + x_bin_width: int + warning_threshold: float + + +class CurveShiftInput(BaseModel): + turbine_name: str + pre_df: pd.DataFrame + post_df: pd.DataFrame + curve_config: CurveConfig + model_config = ConfigDict(arbitrary_types_allowed=True) + + @model_validator(mode="after") + def validate_dataframes(self) -> CurveShiftInput: + # check column names + required_cols = {self.curve_config.x_col, self.curve_config.y_col} + columns_missing_in_pre_df = required_cols - set(self.pre_df.columns) + columns_missing_in_post_df = required_cols - set(self.post_df.columns) + if columns_missing_in_pre_df or columns_missing_in_post_df: + err_msg = "Column name missing in dataframe" + raise IndexError(err_msg) + + # remove NA + self.pre_df = self.pre_df.dropna(subset=list(required_cols)).copy() + self.post_df = self.post_df.dropna(subset=list(required_cols)).copy() + + return self + + +class OpsCurveRequiredColumns(NamedTuple): + wind_speed: str + power: str + pitch: str + rpm: str + + +def check_for_ops_curve_shift( + pre_df: pd.DataFrame, + post_df: pd.DataFrame, + *, + wtg_name: str, + scada_ws_col: str, + pw_col: str, + rpm_col: str, + pt_col: str, + cfg: WindUpConfig, + plot_cfg: PlotConfig, + sub_dir: str | None = None, + plot: bool = True, +) -> dict[str, float]: + results_dict = { + f"{CurveTypes.POWER_CURVE.value}_shift": np.nan, + f"{CurveTypes.RPM.value}_shift": np.nan, + f"{CurveTypes.PITCH.value}_shift": np.nan, + } + + required_cols = OpsCurveRequiredColumns(wind_speed=scada_ws_col, power=pw_col, pitch=pt_col, rpm=rpm_col) + + if not _required_cols_are_present( + pre_df=pre_df, post_df=post_df, turbine_name=wtg_name, required_ops_curve_columns=required_cols + ): + return results_dict + + results_dict[f"{CurveTypes.POWER_CURVE.value}_shift"] = calculate_power_curve_shift( + turbine_name=wtg_name, pre_df=pre_df, post_df=post_df, x_col=scada_ws_col, y_col=pw_col + ) + + results_dict[f"{CurveTypes.RPM.value}_shift"] = calculate_rpm_curve_shift( + turbine_name=wtg_name, pre_df=pre_df, post_df=post_df, x_col=pw_col, y_col=rpm_col + ) + + results_dict[f"{CurveTypes.PITCH.value}_shift"] = calculate_pitch_curve_shift( + turbine_name=wtg_name, pre_df=pre_df, post_df=post_df, x_col=scada_ws_col, y_col=pt_col + ) + + if plot: + compare_ops_curves_pre_post( + pre_df=pre_df, + post_df=post_df, + wtg_name=wtg_name, + ws_col=scada_ws_col, + pw_col=pw_col, + pt_col=pt_col, + rpm_col=rpm_col, + plot_cfg=plot_cfg, + is_toggle_test=(cfg.toggle is not None), + sub_dir=sub_dir, + ) + + return results_dict + + +def calculate_power_curve_shift( + turbine_name: str, pre_df: pd.DataFrame, post_df: pd.DataFrame, x_col: str, y_col: str +) -> float: + curve_config = CurveConfig( + name=CurveTypes.POWER_CURVE.value, + x_col=x_col, + y_col=y_col, + x_bin_width=1, + warning_threshold=CurveThresholds.POWER_CURVE.value, + ) + + curve_shift_input = CurveShiftInput( + turbine_name=turbine_name, pre_df=pre_df, post_df=post_df, curve_config=curve_config + ) + + return _calculate_curve_shift(curve_shift_input=curve_shift_input) + + +def calculate_rpm_curve_shift( + turbine_name: str, pre_df: pd.DataFrame, post_df: pd.DataFrame, x_col: str, y_col: str +) -> float: + curve_config = CurveConfig( + name=CurveTypes.RPM.value, x_col=x_col, y_col=y_col, x_bin_width=0, warning_threshold=CurveThresholds.RPM.value + ) + + curve_shift_input = CurveShiftInput( + turbine_name=turbine_name, pre_df=pre_df, post_df=post_df, curve_config=curve_config + ) + + return _calculate_curve_shift(curve_shift_input=curve_shift_input) + + +def calculate_pitch_curve_shift( + turbine_name: str, pre_df: pd.DataFrame, post_df: pd.DataFrame, x_col: str, y_col: str +) -> float: + curve_config = CurveConfig( + name=CurveTypes.PITCH.value, + x_col=x_col, + y_col=y_col, + x_bin_width=1, + warning_threshold=CurveThresholds.PITCH.value, + ) + + curve_shift_input = CurveShiftInput( + turbine_name=turbine_name, pre_df=pre_df, post_df=post_df, curve_config=curve_config + ) + + return _calculate_curve_shift(curve_shift_input=curve_shift_input) + + +def _required_cols_are_present( + pre_df: pd.DataFrame, post_df: pd.DataFrame, turbine_name: str, required_ops_curve_columns: OpsCurveRequiredColumns +) -> bool: + # check if all required columns are present + required_cols = list(required_ops_curve_columns) + for req_col in required_cols: + if req_col not in pre_df.columns: + msg = f"check_for_ops_curve_shift {turbine_name} pre_df missing required column {req_col}" + result_manager.warning(msg) + return False + if req_col not in post_df.columns: + msg = f"check_for_ops_curve_shift {turbine_name} post_df missing required column {req_col}" + result_manager.warning(msg) + return False + return True + + +def _calculate_curve_shift(curve_shift_input: CurveShiftInput) -> float: + conf = curve_shift_input.curve_config + pre_df = curve_shift_input.pre_df + post_df = curve_shift_input.post_df + wtg_name = curve_shift_input.turbine_name + + bins = np.arange(0, pre_df[conf.x_col].max() + conf.x_bin_width, conf.x_bin_width) if conf.x_bin_width > 0 else 10 + + mean_curve = pre_df.groupby(pd.cut(pre_df[conf.x_col], bins=bins, retbins=False), observed=True).agg( + x_mean=pd.NamedAgg(column=conf.x_col, aggfunc="mean"), + y_mean=pd.NamedAgg(column=conf.y_col, aggfunc="mean"), + ) + post_df["expected_y"] = np.interp(post_df[conf.x_col], mean_curve["x_mean"], mean_curve["y_mean"]) + mean_df = post_df.mean() + + if conf.y_col == CurveTypes.PITCH.value: + result = mean_df[conf.y_col] - mean_df["expected_y"] + else: + result = (mean_df[conf.y_col] / mean_df["expected_y"] - 1).clip(-1, 1) + + # log warning + if abs(result) > conf.warning_threshold: + warning_msg = ( + f"{wtg_name} Ops Curve Shift warning: abs({conf.name}) > {conf.warning_threshold}: {abs(result):.3f}" + ) + result_manager.warning(warning_msg) + + return result