refactor: move ops curve shift calcs and reduce complexity of functions

- Moves Operational Curve shift calculations into their own module - Refactor into separate curve specific functions (with underlying more generic private functions) This change improves readability and ease of adding further curve shift calculations.
resgroup · Oct 8, 2024 · 5121f08 · 5121f08
1 parent b58aa58
commit 5121f08
Show file tree

Hide file tree

Showing 4 changed files with 518 additions and 74 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -132,7 +132,10 @@ filterwarnings = [
 omit = [
     "wind_up/plots/*.py",
 ]
-exclude_lines = ["if __name__ == .__main__.:"]
+exclude_lines = [
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:"
+]
 
 [tool.poe.tasks]
 [tool.poe.tasks.lint]

diff --git a/tests/test_ops_curve_shift.py b/tests/test_ops_curve_shift.py
@@ -0,0 +1,294 @@
+import logging
+from unittest.mock import Mock, patch
+
+import numpy as np
+import pandas as pd
+import pytest
+
+from wind_up.ops_curve_shift import (
+    CurveConfig,
+    CurveShiftInput,
+    CurveThresholds,
+    CurveTypes,
+    calculate_pitch_curve_shift,
+    calculate_power_curve_shift,
+    calculate_rpm_curve_shift,
+    check_for_ops_curve_shift,
+)
+
+
+@pytest.fixture
+def fake_power_curve_df() -> pd.DataFrame:
+    return pd.DataFrame(
+        {
+            "wind_speed": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+            "power": [0, 0, 0, 1, 3, 6, 10, 15, 22, 30, 36, 39, 40, 40, 40],
+        }
+    ).set_index("power")
+
+
+@pytest.fixture
+def fake_gen_rpm_curve_df() -> pd.DataFrame:
+    return pd.DataFrame(
+        {
+            "wind_speed": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+            "gen_rpm": [900, 900, 850, 875, 900, 1000, 1100, 1200, 1350, 1500, 1600, 1600, 1600, 1600, 1600],
+        }
+    ).set_index("gen_rpm")
+
+
+@pytest.fixture
+def fake_pitch_curve_df() -> pd.DataFrame:
+    return pd.DataFrame(
+        {
+            "wind_speed": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
+            "pitch": [4, 4, 4, 3, 2, 1, 1, 1, 2, 5, 8, 11, 13, 14, 15],
+        }
+    ).set_index("pitch")
+
+
+class TestCurveShiftInput:
+    @staticmethod
+    def test_acceptable_inputs(fake_power_curve_df: pd.DataFrame) -> None:
+        _input = CurveShiftInput(
+            turbine_name="anything",
+            pre_df=fake_power_curve_df.reset_index(),
+            post_df=fake_power_curve_df.reset_index(),
+            curve_config=CurveConfig(
+                name=CurveTypes.POWER_CURVE.value,
+                x_col="wind_speed",
+                y_col="power",
+                x_bin_width=1,
+                warning_threshold=0.01,
+            ),
+        )
+
+    @pytest.mark.parametrize("column_name", ["wind_speed", "power"])
+    def test_missing_column_in_pre_df(self, column_name: str, fake_power_curve_df: pd.DataFrame) -> None:
+        with pytest.raises(IndexError, match="Column name missing in dataframe"):
+            CurveShiftInput(
+                turbine_name="anything",
+                pre_df=fake_power_curve_df.reset_index().drop(columns=column_name),
+                post_df=(fake_power_curve_df + 2).reset_index(),
+                curve_config=CurveConfig(
+                    name=CurveTypes.POWER_CURVE.value,
+                    x_col="wind_speed",
+                    y_col="power",
+                    x_bin_width=1,
+                    warning_threshold=0.01,
+                ),
+            )
+
+    @pytest.mark.parametrize("column_name", ["wind_speed", "power"])
+    def test_missing_column_in_post_df(self, column_name: str, fake_power_curve_df: pd.DataFrame) -> None:
+        with pytest.raises(IndexError, match="Column name missing in dataframe"):
+            CurveShiftInput(
+                turbine_name="anything",
+                pre_df=fake_power_curve_df.reset_index(),
+                post_df=(fake_power_curve_df + 2).reset_index().drop(columns=column_name),
+                curve_config=CurveConfig(
+                    name=CurveTypes.POWER_CURVE.value,
+                    x_col="wind_speed",
+                    y_col="power",
+                    x_bin_width=1,
+                    warning_threshold=0.01,
+                ),
+            )
+
+
+@pytest.mark.parametrize(
+    ("shift_amount", "expected"),
+    [
+        pytest.param(2.0, -0.22099447513812154, id="shift DOES exceed threshold"),
+        pytest.param(0.05, -0.007042253521126751, id="shift DOES NOT exceed threshold"),
+    ],
+)
+def test_calculate_power_curve_shift(
+    shift_amount: float, expected: float, fake_power_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture
+) -> None:
+    with caplog.at_level(logging.WARNING):
+        actual = calculate_power_curve_shift(
+            turbine_name="anything",
+            pre_df=fake_power_curve_df.reset_index(),
+            post_df=(fake_power_curve_df + shift_amount).reset_index(),
+            x_col="wind_speed",
+            y_col="power",
+        )
+
+    if abs(expected) > CurveThresholds.POWER_CURVE.value:
+        assert "Ops Curve Shift warning" in caplog.text
+
+    np.testing.assert_almost_equal(actual=actual, desired=expected)
+
+
+@pytest.mark.parametrize(
+    ("shift_amount", "expected"),
+    [
+        pytest.param(0.2, -0.00712694877505593, id="shift DOES exceed threshold"),
+        pytest.param(0.1, -0.0033534540576795058, id="shift DOES NOT exceed threshold"),
+    ],
+)
+def test_calculate_rpm_curve_shift(
+    shift_amount: float, expected: float, fake_gen_rpm_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture
+) -> None:
+    with caplog.at_level(logging.WARNING):
+        actual = calculate_rpm_curve_shift(
+            turbine_name="anything",
+            pre_df=fake_gen_rpm_curve_df.reset_index(),
+            post_df=(fake_gen_rpm_curve_df + shift_amount).reset_index(),
+            x_col="wind_speed",
+            y_col="gen_rpm",
+        )
+
+    if abs(expected) > CurveThresholds.RPM.value:
+        assert "Ops Curve Shift warning" in caplog.text
+
+    np.testing.assert_almost_equal(actual=actual, desired=expected)
+
+
+@pytest.mark.parametrize(
+    ("shift_amount", "expected"),
+    [
+        pytest.param(0.14, -0.1026666666666678, id="shift DOES exceed threshold"),
+        pytest.param(0.13, -0.09533333333333438, id="shift DOES NOT exceed threshold"),
+    ],
+)
+def test_calculate_pitch_curve_shift(
+    shift_amount: float, expected: float, fake_pitch_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture
+) -> None:
+    with caplog.at_level(logging.WARNING):
+        actual = calculate_pitch_curve_shift(
+            turbine_name="anything",
+            pre_df=fake_pitch_curve_df.reset_index(),
+            post_df=(fake_pitch_curve_df + shift_amount).reset_index(),
+            x_col="wind_speed",
+            y_col="pitch",
+        )
+
+    if abs(expected) > CurveThresholds.PITCH.value:
+        assert "Ops Curve Shift warning" in caplog.text
+
+    np.testing.assert_almost_equal(actual=actual, desired=expected)
+
+
+class TestCheckForOpsCurveShift:
+    @pytest.mark.parametrize(
+        ("pre_df_or_post_df", "missing_column"),
+        [
+            ("pre", "wind_speed"),
+            ("pre", "power"),
+            ("pre", "gen_rpm"),
+            ("pre", "pitch"),
+            ("post", "wind_speed"),
+            ("post", "power"),
+            ("post", "gen_rpm"),
+            ("post", "pitch"),
+        ],
+    )
+    def test_missing_required_column(
+        self,
+        pre_df_or_post_df: str,
+        missing_column: str,
+        fake_power_curve_df: pd.DataFrame,
+        fake_gen_rpm_curve_df: pd.DataFrame,
+        fake_pitch_curve_df: pd.DataFrame,
+    ) -> None:
+        _df = pd.concat(
+            [
+                fake_power_curve_df.reset_index().set_index("wind_speed"),
+                fake_gen_rpm_curve_df.reset_index().set_index("wind_speed"),
+                fake_pitch_curve_df.reset_index().set_index("wind_speed"),
+            ],
+            axis=1,
+        ).reset_index()
+
+        pre_df = _df.drop(columns=missing_column) if pre_df_or_post_df == "pre" else _df
+        post_df = _df.drop(columns=missing_column) if pre_df_or_post_df == "post" else _df
+
+        actual = check_for_ops_curve_shift(
+            pre_df=pre_df,
+            post_df=post_df,
+            wtg_name="anything",
+            scada_ws_col="wind_speed",
+            pw_col="power",
+            rpm_col="gen_rpm",
+            pt_col="pitch",
+            cfg=Mock(),
+            plot_cfg=Mock(),
+            plot=False,
+        )
+
+        expected = {
+            f"{CurveTypes.POWER_CURVE.value}_shift": np.nan,
+            f"{CurveTypes.RPM.value}_shift": np.nan,
+            f"{CurveTypes.PITCH.value}_shift": np.nan,
+        }
+
+        assert actual == expected
+
+    def test_calls_funcs_as_intended(
+        self, fake_power_curve_df: pd.DataFrame, fake_gen_rpm_curve_df: pd.DataFrame, fake_pitch_curve_df: pd.DataFrame
+    ) -> None:
+        _df = pd.concat(
+            [
+                fake_power_curve_df.reset_index().set_index("wind_speed"),
+                fake_gen_rpm_curve_df.reset_index().set_index("wind_speed"),
+                fake_pitch_curve_df.reset_index().set_index("wind_speed"),
+            ],
+            axis=1,
+        ).reset_index()
+
+        wtg_name = "anything"
+
+        with (
+            patch("wind_up.ops_curve_shift.calculate_power_curve_shift", return_value=np.nan) as mock_power,
+            patch("wind_up.ops_curve_shift.calculate_rpm_curve_shift", return_value=np.nan) as mock_rpm,
+            patch("wind_up.ops_curve_shift.calculate_pitch_curve_shift", return_value=np.nan) as mock_pitch,
+            patch("wind_up.ops_curve_shift.compare_ops_curves_pre_post", return_value=None) as mock_plot_func,
+        ):
+            mock_wind_up_conf = Mock()
+            mock_wind_up_conf.toggle = True
+            mock_plot_conf = Mock()
+
+            actual = check_for_ops_curve_shift(
+                pre_df=_df,
+                post_df=_df,
+                wtg_name=wtg_name,
+                scada_ws_col="wind_speed",
+                pw_col="power",
+                rpm_col="gen_rpm",
+                pt_col="pitch",
+                cfg=mock_wind_up_conf,
+                plot_cfg=mock_plot_conf,
+            )
+
+        mock_power.assert_called_once_with(
+            turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="wind_speed", y_col="power"
+        )
+
+        mock_rpm.assert_called_once_with(turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="power", y_col="gen_rpm")
+
+        mock_pitch.assert_called_once_with(
+            turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="wind_speed", y_col="pitch"
+        )
+
+        mock_plot_func.assert_called_once_with(
+            pre_df=_df,
+            post_df=_df,
+            wtg_name=wtg_name,
+            ws_col="wind_speed",
+            pw_col="power",
+            pt_col="pitch",
+            rpm_col="gen_rpm",
+            plot_cfg=mock_plot_conf,
+            is_toggle_test=mock_wind_up_conf.toggle is not None,
+            sub_dir=None,
+        )
+
+        expected = {
+            f"{CurveTypes.POWER_CURVE.value}_shift": np.nan,
+            f"{CurveTypes.RPM.value}_shift": np.nan,
+            f"{CurveTypes.PITCH.value}_shift": np.nan,
+        }
+
+        assert actual == expected
diff --git a/wind_up/main_analysis.py b/wind_up/main_analysis.py
@@ -24,9 +24,10 @@
 from wind_up.northing import (
     check_wtg_northing,
 )
+from wind_up.ops_curve_shift import check_for_ops_curve_shift
 from wind_up.plots.data_coverage_plots import plot_detrend_data_cov, plot_pre_post_data_cov
 from wind_up.plots.detrend_plots import plot_apply_wsratio_v_wd_scen
-from wind_up.plots.scada_funcs_plots import compare_ops_curves_pre_post, print_filter_stats
+from wind_up.plots.scada_funcs_plots import print_filter_stats
 from wind_up.plots.yaw_direction_plots import plot_yaw_direction_pre_post
 from wind_up.pp_analysis import pre_post_pp_analysis_with_reversal_and_bootstrapping
 from wind_up.result_manager import result_manager
@@ -365,78 +366,6 @@ def yaw_offset_results(
     return results
 
 
-def check_for_ops_curve_shift(
-    pre_df: pd.DataFrame,
-    post_df: pd.DataFrame,
-    *,
-    wtg_name: str,
-    scada_ws_col: str,
-    pw_col: str,
-    rpm_col: str,
-    pt_col: str,
-    cfg: WindUpConfig,
-    plot_cfg: PlotConfig,
-    sub_dir: str | None = None,
-) -> dict[str, float]:
-    results_dict = {
-        "powercurve_shift": np.nan,
-        "rpm_shift": np.nan,
-        "pitch_shift": np.nan,
-    }
-    # check if all required columns are present
-    required_cols = [scada_ws_col, pw_col, pt_col, rpm_col]
-    for req_col in required_cols:
-        if req_col not in pre_df.columns:
-            msg = f"check_for_ops_curve_shift {wtg_name} pre_df missing required column {req_col}"
-            result_manager.warning(msg)
-            return results_dict
-        if req_col not in post_df.columns:
-            msg = f"check_for_ops_curve_shift {wtg_name} post_df missing required column {req_col}"
-            result_manager.warning(msg)
-            return results_dict
-    pre_dropna_df = pre_df.dropna(subset=[scada_ws_col, pw_col, pt_col, rpm_col]).copy()
-    post_dropna_df = post_df.dropna(subset=[scada_ws_col, pw_col, pt_col, rpm_col]).copy()
-
-    warning_msg: str | None = None
-    for descr, x_var, y_var, x_bin_width, warn_thresh in [
-        ("powercurve_shift", scada_ws_col, pw_col, 1, 0.01),
-        ("rpm_shift", pw_col, rpm_col, 0, 0.005),
-        ("pitch_shift", scada_ws_col, pt_col, 1, 0.1),
-    ]:
-        bins = np.arange(0, pre_dropna_df[x_var].max() + x_bin_width, x_bin_width) if x_bin_width > 0 else 10
-        mean_curve = pre_dropna_df.groupby(pd.cut(pre_dropna_df[x_var], bins=bins, retbins=False), observed=True).agg(
-            x_mean=pd.NamedAgg(column=x_var, aggfunc="mean"),
-            y_mean=pd.NamedAgg(column=y_var, aggfunc="mean"),
-        )
-        post_dropna_df["expected_y"] = np.interp(post_dropna_df[x_var], mean_curve["x_mean"], mean_curve["y_mean"])
-        mean_df = post_dropna_df.mean()
-        if y_var == pt_col:
-            results_dict[descr] = mean_df[y_var] - mean_df["expected_y"]
-        else:
-            results_dict[descr] = (mean_df[y_var] / mean_df["expected_y"] - 1).clip(-1, 1)
-        if abs(results_dict[descr]) > warn_thresh:
-            if warning_msg is None:
-                warning_msg = f"{wtg_name} check_for_ops_curve_shift warnings:"
-            warning_msg += f" abs({descr}) > {warn_thresh}: {abs(results_dict[descr]):.3f}"
-    if warning_msg is not None:
-        result_manager.warning(warning_msg)
-
-    compare_ops_curves_pre_post(
-        pre_df=pre_df,
-        post_df=post_df,
-        wtg_name=wtg_name,
-        ws_col=scada_ws_col,
-        pw_col=pw_col,
-        pt_col=pt_col,
-        rpm_col=rpm_col,
-        plot_cfg=plot_cfg,
-        is_toggle_test=(cfg.toggle is not None),
-        sub_dir=sub_dir,
-    )
-
-    return results_dict
-
-
 def calc_test_ref_results(
     *,
     test_df: pd.DataFrame,