Skip to content

Commit

Permalink
refactor: move ops curve shift calcs and reduce complexity of functions
Browse files Browse the repository at this point in the history
- Moves Operational Curve shift calculations into their own module
- Refactor into separate curve specific functions
  (with underlying more generic private functions)

This change improves readability and ease of adding further curve shift
calculations.
  • Loading branch information
samuelwnaylor committed Oct 8, 2024
1 parent b58aa58 commit 5121f08
Show file tree
Hide file tree
Showing 4 changed files with 518 additions and 74 deletions.
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,10 @@ filterwarnings = [
omit = [
"wind_up/plots/*.py",
]
exclude_lines = ["if __name__ == .__main__.:"]
exclude_lines = [
"if __name__ == .__main__.:",
"if TYPE_CHECKING:"
]

[tool.poe.tasks]
[tool.poe.tasks.lint]
Expand Down
294 changes: 294 additions & 0 deletions tests/test_ops_curve_shift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
import logging
from unittest.mock import Mock, patch

import numpy as np
import pandas as pd
import pytest

from wind_up.ops_curve_shift import (
CurveConfig,
CurveShiftInput,
CurveThresholds,
CurveTypes,
calculate_pitch_curve_shift,
calculate_power_curve_shift,
calculate_rpm_curve_shift,
check_for_ops_curve_shift,
)


@pytest.fixture
def fake_power_curve_df() -> pd.DataFrame:
return pd.DataFrame(
{
"wind_speed": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
"power": [0, 0, 0, 1, 3, 6, 10, 15, 22, 30, 36, 39, 40, 40, 40],
}
).set_index("power")


@pytest.fixture
def fake_gen_rpm_curve_df() -> pd.DataFrame:
return pd.DataFrame(
{
"wind_speed": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
"gen_rpm": [900, 900, 850, 875, 900, 1000, 1100, 1200, 1350, 1500, 1600, 1600, 1600, 1600, 1600],
}
).set_index("gen_rpm")


@pytest.fixture
def fake_pitch_curve_df() -> pd.DataFrame:
return pd.DataFrame(
{
"wind_speed": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
"pitch": [4, 4, 4, 3, 2, 1, 1, 1, 2, 5, 8, 11, 13, 14, 15],
}
).set_index("pitch")


class TestCurveShiftInput:
@staticmethod
def test_acceptable_inputs(fake_power_curve_df: pd.DataFrame) -> None:
_input = CurveShiftInput(
turbine_name="anything",
pre_df=fake_power_curve_df.reset_index(),
post_df=fake_power_curve_df.reset_index(),
curve_config=CurveConfig(
name=CurveTypes.POWER_CURVE.value,
x_col="wind_speed",
y_col="power",
x_bin_width=1,
warning_threshold=0.01,
),
)

@pytest.mark.parametrize("column_name", ["wind_speed", "power"])
def test_missing_column_in_pre_df(self, column_name: str, fake_power_curve_df: pd.DataFrame) -> None:
with pytest.raises(IndexError, match="Column name missing in dataframe"):
CurveShiftInput(
turbine_name="anything",
pre_df=fake_power_curve_df.reset_index().drop(columns=column_name),
post_df=(fake_power_curve_df + 2).reset_index(),
curve_config=CurveConfig(
name=CurveTypes.POWER_CURVE.value,
x_col="wind_speed",
y_col="power",
x_bin_width=1,
warning_threshold=0.01,
),
)

@pytest.mark.parametrize("column_name", ["wind_speed", "power"])
def test_missing_column_in_post_df(self, column_name: str, fake_power_curve_df: pd.DataFrame) -> None:
with pytest.raises(IndexError, match="Column name missing in dataframe"):
CurveShiftInput(
turbine_name="anything",
pre_df=fake_power_curve_df.reset_index(),
post_df=(fake_power_curve_df + 2).reset_index().drop(columns=column_name),
curve_config=CurveConfig(
name=CurveTypes.POWER_CURVE.value,
x_col="wind_speed",
y_col="power",
x_bin_width=1,
warning_threshold=0.01,
),
)


@pytest.mark.parametrize(
("shift_amount", "expected"),
[
pytest.param(2.0, -0.22099447513812154, id="shift DOES exceed threshold"),
pytest.param(0.05, -0.007042253521126751, id="shift DOES NOT exceed threshold"),
],
)
def test_calculate_power_curve_shift(
shift_amount: float, expected: float, fake_power_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture
) -> None:
with caplog.at_level(logging.WARNING):
actual = calculate_power_curve_shift(
turbine_name="anything",
pre_df=fake_power_curve_df.reset_index(),
post_df=(fake_power_curve_df + shift_amount).reset_index(),
x_col="wind_speed",
y_col="power",
)

if abs(expected) > CurveThresholds.POWER_CURVE.value:
assert "Ops Curve Shift warning" in caplog.text

np.testing.assert_almost_equal(actual=actual, desired=expected)


@pytest.mark.parametrize(
("shift_amount", "expected"),
[
pytest.param(0.2, -0.00712694877505593, id="shift DOES exceed threshold"),
pytest.param(0.1, -0.0033534540576795058, id="shift DOES NOT exceed threshold"),
],
)
def test_calculate_rpm_curve_shift(
shift_amount: float, expected: float, fake_gen_rpm_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture
) -> None:
with caplog.at_level(logging.WARNING):
actual = calculate_rpm_curve_shift(
turbine_name="anything",
pre_df=fake_gen_rpm_curve_df.reset_index(),
post_df=(fake_gen_rpm_curve_df + shift_amount).reset_index(),
x_col="wind_speed",
y_col="gen_rpm",
)

if abs(expected) > CurveThresholds.RPM.value:
assert "Ops Curve Shift warning" in caplog.text

np.testing.assert_almost_equal(actual=actual, desired=expected)


@pytest.mark.parametrize(
("shift_amount", "expected"),
[
pytest.param(0.14, -0.1026666666666678, id="shift DOES exceed threshold"),
pytest.param(0.13, -0.09533333333333438, id="shift DOES NOT exceed threshold"),
],
)
def test_calculate_pitch_curve_shift(
shift_amount: float, expected: float, fake_pitch_curve_df: pd.DataFrame, caplog: pytest.LogCaptureFixture
) -> None:
with caplog.at_level(logging.WARNING):
actual = calculate_pitch_curve_shift(
turbine_name="anything",
pre_df=fake_pitch_curve_df.reset_index(),
post_df=(fake_pitch_curve_df + shift_amount).reset_index(),
x_col="wind_speed",
y_col="pitch",
)

if abs(expected) > CurveThresholds.PITCH.value:
assert "Ops Curve Shift warning" in caplog.text

np.testing.assert_almost_equal(actual=actual, desired=expected)


class TestCheckForOpsCurveShift:
@pytest.mark.parametrize(
("pre_df_or_post_df", "missing_column"),
[
("pre", "wind_speed"),
("pre", "power"),
("pre", "gen_rpm"),
("pre", "pitch"),
("post", "wind_speed"),
("post", "power"),
("post", "gen_rpm"),
("post", "pitch"),
],
)
def test_missing_required_column(
self,
pre_df_or_post_df: str,
missing_column: str,
fake_power_curve_df: pd.DataFrame,
fake_gen_rpm_curve_df: pd.DataFrame,
fake_pitch_curve_df: pd.DataFrame,
) -> None:
_df = pd.concat(
[
fake_power_curve_df.reset_index().set_index("wind_speed"),
fake_gen_rpm_curve_df.reset_index().set_index("wind_speed"),
fake_pitch_curve_df.reset_index().set_index("wind_speed"),
],
axis=1,
).reset_index()

pre_df = _df.drop(columns=missing_column) if pre_df_or_post_df == "pre" else _df
post_df = _df.drop(columns=missing_column) if pre_df_or_post_df == "post" else _df

actual = check_for_ops_curve_shift(
pre_df=pre_df,
post_df=post_df,
wtg_name="anything",
scada_ws_col="wind_speed",
pw_col="power",
rpm_col="gen_rpm",
pt_col="pitch",
cfg=Mock(),
plot_cfg=Mock(),
plot=False,
)

expected = {
f"{CurveTypes.POWER_CURVE.value}_shift": np.nan,
f"{CurveTypes.RPM.value}_shift": np.nan,
f"{CurveTypes.PITCH.value}_shift": np.nan,
}

assert actual == expected

def test_calls_funcs_as_intended(
self, fake_power_curve_df: pd.DataFrame, fake_gen_rpm_curve_df: pd.DataFrame, fake_pitch_curve_df: pd.DataFrame
) -> None:
_df = pd.concat(
[
fake_power_curve_df.reset_index().set_index("wind_speed"),
fake_gen_rpm_curve_df.reset_index().set_index("wind_speed"),
fake_pitch_curve_df.reset_index().set_index("wind_speed"),
],
axis=1,
).reset_index()

wtg_name = "anything"

with (
patch("wind_up.ops_curve_shift.calculate_power_curve_shift", return_value=np.nan) as mock_power,
patch("wind_up.ops_curve_shift.calculate_rpm_curve_shift", return_value=np.nan) as mock_rpm,
patch("wind_up.ops_curve_shift.calculate_pitch_curve_shift", return_value=np.nan) as mock_pitch,
patch("wind_up.ops_curve_shift.compare_ops_curves_pre_post", return_value=None) as mock_plot_func,
):
mock_wind_up_conf = Mock()
mock_wind_up_conf.toggle = True
mock_plot_conf = Mock()

actual = check_for_ops_curve_shift(
pre_df=_df,
post_df=_df,
wtg_name=wtg_name,
scada_ws_col="wind_speed",
pw_col="power",
rpm_col="gen_rpm",
pt_col="pitch",
cfg=mock_wind_up_conf,
plot_cfg=mock_plot_conf,
)

mock_power.assert_called_once_with(
turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="wind_speed", y_col="power"
)

mock_rpm.assert_called_once_with(turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="power", y_col="gen_rpm")

mock_pitch.assert_called_once_with(
turbine_name=wtg_name, pre_df=_df, post_df=_df, x_col="wind_speed", y_col="pitch"
)

mock_plot_func.assert_called_once_with(
pre_df=_df,
post_df=_df,
wtg_name=wtg_name,
ws_col="wind_speed",
pw_col="power",
pt_col="pitch",
rpm_col="gen_rpm",
plot_cfg=mock_plot_conf,
is_toggle_test=mock_wind_up_conf.toggle is not None,
sub_dir=None,
)

expected = {
f"{CurveTypes.POWER_CURVE.value}_shift": np.nan,
f"{CurveTypes.RPM.value}_shift": np.nan,
f"{CurveTypes.PITCH.value}_shift": np.nan,
}

assert actual == expected
75 changes: 2 additions & 73 deletions wind_up/main_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
from wind_up.northing import (
check_wtg_northing,
)
from wind_up.ops_curve_shift import check_for_ops_curve_shift
from wind_up.plots.data_coverage_plots import plot_detrend_data_cov, plot_pre_post_data_cov
from wind_up.plots.detrend_plots import plot_apply_wsratio_v_wd_scen
from wind_up.plots.scada_funcs_plots import compare_ops_curves_pre_post, print_filter_stats
from wind_up.plots.scada_funcs_plots import print_filter_stats
from wind_up.plots.yaw_direction_plots import plot_yaw_direction_pre_post
from wind_up.pp_analysis import pre_post_pp_analysis_with_reversal_and_bootstrapping
from wind_up.result_manager import result_manager
Expand Down Expand Up @@ -365,78 +366,6 @@ def yaw_offset_results(
return results


def check_for_ops_curve_shift(
pre_df: pd.DataFrame,
post_df: pd.DataFrame,
*,
wtg_name: str,
scada_ws_col: str,
pw_col: str,
rpm_col: str,
pt_col: str,
cfg: WindUpConfig,
plot_cfg: PlotConfig,
sub_dir: str | None = None,
) -> dict[str, float]:
results_dict = {
"powercurve_shift": np.nan,
"rpm_shift": np.nan,
"pitch_shift": np.nan,
}
# check if all required columns are present
required_cols = [scada_ws_col, pw_col, pt_col, rpm_col]
for req_col in required_cols:
if req_col not in pre_df.columns:
msg = f"check_for_ops_curve_shift {wtg_name} pre_df missing required column {req_col}"
result_manager.warning(msg)
return results_dict
if req_col not in post_df.columns:
msg = f"check_for_ops_curve_shift {wtg_name} post_df missing required column {req_col}"
result_manager.warning(msg)
return results_dict
pre_dropna_df = pre_df.dropna(subset=[scada_ws_col, pw_col, pt_col, rpm_col]).copy()
post_dropna_df = post_df.dropna(subset=[scada_ws_col, pw_col, pt_col, rpm_col]).copy()

warning_msg: str | None = None
for descr, x_var, y_var, x_bin_width, warn_thresh in [
("powercurve_shift", scada_ws_col, pw_col, 1, 0.01),
("rpm_shift", pw_col, rpm_col, 0, 0.005),
("pitch_shift", scada_ws_col, pt_col, 1, 0.1),
]:
bins = np.arange(0, pre_dropna_df[x_var].max() + x_bin_width, x_bin_width) if x_bin_width > 0 else 10
mean_curve = pre_dropna_df.groupby(pd.cut(pre_dropna_df[x_var], bins=bins, retbins=False), observed=True).agg(
x_mean=pd.NamedAgg(column=x_var, aggfunc="mean"),
y_mean=pd.NamedAgg(column=y_var, aggfunc="mean"),
)
post_dropna_df["expected_y"] = np.interp(post_dropna_df[x_var], mean_curve["x_mean"], mean_curve["y_mean"])
mean_df = post_dropna_df.mean()
if y_var == pt_col:
results_dict[descr] = mean_df[y_var] - mean_df["expected_y"]
else:
results_dict[descr] = (mean_df[y_var] / mean_df["expected_y"] - 1).clip(-1, 1)
if abs(results_dict[descr]) > warn_thresh:
if warning_msg is None:
warning_msg = f"{wtg_name} check_for_ops_curve_shift warnings:"
warning_msg += f" abs({descr}) > {warn_thresh}: {abs(results_dict[descr]):.3f}"
if warning_msg is not None:
result_manager.warning(warning_msg)

compare_ops_curves_pre_post(
pre_df=pre_df,
post_df=post_df,
wtg_name=wtg_name,
ws_col=scada_ws_col,
pw_col=pw_col,
pt_col=pt_col,
rpm_col=rpm_col,
plot_cfg=plot_cfg,
is_toggle_test=(cfg.toggle is not None),
sub_dir=sub_dir,
)

return results_dict


def calc_test_ref_results(
*,
test_df: pd.DataFrame,
Expand Down
Loading

0 comments on commit 5121f08

Please sign in to comment.