From a2d75db7a7275498a64752582fa1facc79d2930f Mon Sep 17 00:00:00 2001 From: Samuel Naylor Date: Thu, 12 Sep 2024 15:06:09 +0100 Subject: [PATCH] refactor: improve readability and support both WeDoWind datasets - Support both Pitch Angle Pair and Vortex Generator Pair WeDoWind datasets - Support not clipping rated power - Improve readability and replace some hard-coded column name strings --- examples/helpers.py | 4 +- examples/wdw_example.ipynb | 1287 ---------------------------- examples/wedowind_example.py | 310 ++++--- wind_up/models.py | 1 + wind_up/plots/scada_funcs_plots.py | 68 +- wind_up/pp_analysis.py | 14 +- 6 files changed, 259 insertions(+), 1425 deletions(-) delete mode 100644 examples/wdw_example.ipynb diff --git a/examples/helpers.py b/examples/helpers.py index ca49d38..cd14666 100644 --- a/examples/helpers.py +++ b/examples/helpers.py @@ -15,7 +15,7 @@ def setup_logger(log_fpath: Path, level: int = logging.INFO) -> None: - log_formatter_file = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s") + log_formatter_file = logging.Formatter("%(asctime)s [%(levelname)-8s] %(message)s") root_logger = logging.getLogger() root_logger.setLevel(level) @@ -23,7 +23,7 @@ def setup_logger(log_fpath: Path, level: int = logging.INFO) -> None: file_handler.setFormatter(log_formatter_file) root_logger.addHandler(file_handler) - log_formatter_console = logging.Formatter("%(message)s") + log_formatter_console = logging.Formatter("%(asctime)s [%(levelname)-8s] %(message)s") console_handler = logging.StreamHandler() console_handler.setFormatter(log_formatter_console) root_logger.addHandler(console_handler) diff --git a/examples/wdw_example.ipynb b/examples/wdw_example.ipynb deleted file mode 100644 index 3599bdc..0000000 --- a/examples/wdw_example.ipynb +++ /dev/null @@ -1,1287 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "a16a48dc-14f9-4ed3-98c2-6b7198cdecdf", - "metadata": {}, - "outputs": [], - "source": [ - "# ruff: noqa: F405" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "8de26d60-249e-46e9-ae5e-d4f6ed38dc2f", - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ea667b9b-19e7-455b-909a-1c3bc6e5a2f6", - "metadata": {}, - "outputs": [], - "source": [ - "from wedowind_example import * # noqa: F403" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "c99e2a22-d632-4f66-8fa8-1294614a657b", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Downloading example data from Zenodo\n", - "File C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\cache\\wedowind_example_data\\Turbine_Upgrade_Dataset.zip already exists. Skipping download.\n", - "File C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\cache\\wedowind_example_data\\Inland_Offshore_Wind_Farm_Dataset1.zip already exists. Skipping download.\n" - ] - } - ], - "source": [ - "download_wdw_data_from_zenodo()" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "61992e29-545d-4d81-b5cc-9279517ec9cd", - "metadata": {}, - "outputs": [], - "source": [ - "assumed_rated_power_kw = 1500\n", - "rotor_diameter_m = 80\n", - "cutout_ws_mps = 20\n", - "scada_file_name = \"Turbine Upgrade Dataset(Pitch Angle Pair).csv\" # or Turbine Upgrade Dataset(VG Pair).csv" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "7009cf71-3418-435a-a983-ee194453b465", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Preprocessing turbine SCADA data\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
upgrade statusVDrhoSInormalized_powerTurbineNameActivePowerMeanWindSpeedMeanYawAngleMeanPitchAngleMeanGenRpmMeanShutdownDuration
TimeStamp_StartFormat
2010-07-30 22:40:00+00:0007.96138.91.1402240.2665120.0904520.393152Test589.7272737.96138.9010000
2010-07-30 22:50:00+00:0008.19140.61.1405220.2861670.0830280.457455Test686.1818178.19140.6010000
2010-07-30 23:00:00+00:0007.20139.31.1407710.3393210.0986110.382121Test573.1818187.20139.3010000
2010-07-30 23:10:00+00:0006.81137.41.1411860.3758150.1013220.282182Test423.2727276.81137.4010000
2010-07-30 23:20:00+00:0005.09137.51.1414640.3034720.1650290.127212Test190.8181825.09137.5010000
\n", - "
" - ], - "text/plain": [ - " upgrade status V D rho S \\\n", - "TimeStamp_StartFormat \n", - "2010-07-30 22:40:00+00:00 0 7.96 138.9 1.140224 0.266512 \n", - "2010-07-30 22:50:00+00:00 0 8.19 140.6 1.140522 0.286167 \n", - "2010-07-30 23:00:00+00:00 0 7.20 139.3 1.140771 0.339321 \n", - "2010-07-30 23:10:00+00:00 0 6.81 137.4 1.141186 0.375815 \n", - "2010-07-30 23:20:00+00:00 0 5.09 137.5 1.141464 0.303472 \n", - "\n", - " I normalized_power TurbineName \\\n", - "TimeStamp_StartFormat \n", - "2010-07-30 22:40:00+00:00 0.090452 0.393152 Test \n", - "2010-07-30 22:50:00+00:00 0.083028 0.457455 Test \n", - "2010-07-30 23:00:00+00:00 0.098611 0.382121 Test \n", - "2010-07-30 23:10:00+00:00 0.101322 0.282182 Test \n", - "2010-07-30 23:20:00+00:00 0.165029 0.127212 Test \n", - "\n", - " ActivePowerMean WindSpeedMean YawAngleMean \\\n", - "TimeStamp_StartFormat \n", - "2010-07-30 22:40:00+00:00 589.727273 7.96 138.9 \n", - "2010-07-30 22:50:00+00:00 686.181817 8.19 140.6 \n", - "2010-07-30 23:00:00+00:00 573.181818 7.20 139.3 \n", - "2010-07-30 23:10:00+00:00 423.272727 6.81 137.4 \n", - "2010-07-30 23:20:00+00:00 190.818182 5.09 137.5 \n", - "\n", - " PitchAngleMean GenRpmMean ShutdownDuration \n", - "TimeStamp_StartFormat \n", - "2010-07-30 22:40:00+00:00 0 1000 0 \n", - "2010-07-30 22:50:00+00:00 0 1000 0 \n", - "2010-07-30 23:00:00+00:00 0 1000 0 \n", - "2010-07-30 23:10:00+00:00 0 1000 0 \n", - "2010-07-30 23:20:00+00:00 0 1000 0 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "logger.info(\"Preprocessing turbine SCADA data\")\n", - "scada_df = WDWScadaUnpacker(scada_file_name=scada_file_name).unpack(rated_power_kw=assumed_rated_power_kw)\n", - "scada_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "c0922049-6159-41d8-ba4b-94affe82b296", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NameLatitudeLongitudeTimeZoneTimeSpanMinutesTimeFormat
0WT140.036394-89.052141UTC10Start
1WT240.039089-89.032205UTC10Start
2WT339.954324-88.942660UTC10Start
3WT439.972739-88.969221UTC10Start
4MAST140.042682-89.058004UTC10Start
\n", - "
" - ], - "text/plain": [ - " Name Latitude Longitude TimeZone TimeSpanMinutes TimeFormat\n", - "0 WT1 40.036394 -89.052141 UTC 10 Start\n", - "1 WT2 40.039089 -89.032205 UTC 10 Start\n", - "2 WT3 39.954324 -88.942660 UTC 10 Start\n", - "3 WT4 39.972739 -88.969221 UTC 10 Start\n", - "4 MAST1 40.042682 -89.058004 UTC 10 Start" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "metadata_df = make_wdw_metadata_df()\n", - "metadata_df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "3734d8fd-e4c8-4e5e-820d-a95589998c77", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Custom plots saved to directory: C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\output\\wedowind_example\\custom_plots\n", - "Custom plots saved to directory: C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\output\\wedowind_example\\custom_plots\n" - ] - } - ], - "source": [ - "run_custom_plots(scada_df=scada_df, assumed_rated_power_kw=assumed_rated_power_kw, rotor_diameter_m=rotor_diameter_m)" - ] - }, - { - "cell_type": "markdown", - "id": "774b80d0-476f-4a86-a774-b6aca2462181", - "metadata": {}, - "source": [ - "Amend dataframes based on reviewing the Custom Plots." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "3685f588-060d-4209-aadc-5f87f606887e", - "metadata": {}, - "outputs": [], - "source": [ - "# based on the above I think the objects are MAST1, test=WT1 and ref=WT2\n", - "scada_df = scada_df.replace(\n", - " {\"TurbineName\": {TurbineNames.TEST.value: \"WT1\", TurbineNames.REF.value: \"WT2\", \"Mast\": \"MAST1\"}}\n", - ")\n", - "# drop everything except the turbines from the metadata\n", - "metadata_df = metadata_df[metadata_df[\"Name\"].isin([\"WT1\", \"WT2\"])]" - ] - }, - { - "cell_type": "markdown", - "id": "a346cc79-1012-4d24-b340-ae4115f34fe4", - "metadata": {}, - "source": [ - "### Construct Reanalysis" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "47ed234d-5c55-4165-902f-f808408f82a1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
100_m_hws_mean_mps100_m_hwd_mean_deg-n_true
2010-07-30 22:40:00+00:008.184808172.795653
2010-07-30 23:40:00+00:006.34893483.654251
2010-07-31 00:40:00+00:005.204868288.677008
2010-07-31 01:40:00+00:005.082638332.470858
2010-07-31 02:40:00+00:009.06635195.806898
\n", - "
" - ], - "text/plain": [ - " 100_m_hws_mean_mps 100_m_hwd_mean_deg-n_true\n", - "2010-07-30 22:40:00+00:00 8.184808 172.795653\n", - "2010-07-30 23:40:00+00:00 6.348934 83.654251\n", - "2010-07-31 00:40:00+00:00 5.204868 288.677008\n", - "2010-07-31 01:40:00+00:00 5.082638 332.470858\n", - "2010-07-31 02:40:00+00:00 9.066351 95.806898" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from wind_up.reanalysis_data import ReanalysisDataset\n", - "\n", - "rng = np.random.default_rng(0)\n", - "rows = 100\n", - "reanalysis_dataset = ReanalysisDataset(\n", - " id=\"dummy_reanalysis_data\",\n", - " data=pd.DataFrame(\n", - " data={\n", - " \"100_m_hws_mean_mps\": rng.uniform(5, 10, rows),\n", - " \"100_m_hwd_mean_deg-n_true\": rng.uniform(0, 360, rows),\n", - " },\n", - " index=pd.DatetimeIndex(pd.date_range(start=scada_df.index.min(), periods=rows, freq=\"h\", tz=\"UTC\")),\n", - " ),\n", - ")\n", - "\n", - "reanalysis_dataset.data.head()" - ] - }, - { - "cell_type": "markdown", - "id": "d989a0a1-4b3f-4e9d-b3f0-fa4061111616", - "metadata": {}, - "source": [ - "# Construct `wind-up` Configuration" - ] - }, - { - "cell_type": "markdown", - "id": "6aa9d215-e58b-409b-a1ab-bb5eb1739ebf", - "metadata": {}, - "source": [ - "## Wind Farm Config" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "63e01414-8585-4cf8-9169-c3229eaa1f2e", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "loaded WindUpConfig assessment_name: wedowind_example\n", - "loaded WindUpConfig assessment_name: wedowind_example\n", - "loaded WindUpConfig assessment_name: wedowind_example\n", - "pre analysis period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n", - "pre analysis period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n", - "pre analysis period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n", - "post analysis period (UTC): 2011-04-25 21:50 to 2011-06-25 18:40\n", - "post analysis period (UTC): 2011-04-25 21:50 to 2011-06-25 18:40\n", - "post analysis period (UTC): 2011-04-25 21:50 to 2011-06-25 18:40\n", - "long term period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n", - "long term period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n", - "long term period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n", - "detrend period (UTC): 2010-07-30 22:40 to 2011-04-18 21:50\n", - "detrend period (UTC): 2010-07-30 22:40 to 2011-04-18 21:50\n", - "detrend period (UTC): 2010-07-30 22:40 to 2011-04-18 21:50\n" - ] - }, - { - "data": { - "text/plain": [ - "WindUpConfig(assessment_name='wedowind_example', timebase_s=600, ignore_turbine_anemometer_data=False, require_test_wake_free=False, require_ref_wake_free=False, detrend_min_hours=24, ref_wd_filter=[150.0, 240.0], ref_hod_filter=None, filter_all_test_wtgs_together=False, use_lt_distribution=False, use_test_wtg_lt_distribution=True, out_dir=WindowsPath('C:/Users/snaylor/Documents/GitHub/wind-up/output/wedowind_example'), test_wtgs=[Turbine(name='WT1', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan)], ref_wtgs=[Turbine(name='WT2', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan)], ref_super_wtgs=[], non_wtg_ref_names=[], upgrade_first_dt_utc_start=Timestamp('2011-04-25 21:50:00+0000', tz='UTC'), analysis_last_dt_utc_start=Timestamp('2011-06-25 18:30:00+0000', tz='UTC'), analysis_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), lt_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), lt_last_dt_utc_start=Timestamp('2010-09-29 19:10:00+0000', tz='UTC'), detrend_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), detrend_last_dt_utc_start=Timestamp('2011-04-18 21:40:00+0000', tz='UTC'), years_offset_for_pre_period=1, years_for_lt_distribution=1, years_for_detrend=1, ws_bin_width=1.0, bootstrap_runs_override=None, reanalysis_method='node_with_best_ws_corr', missing_scada_data_fields=['YawAngleMin', 'YawAngleMax'], asset=Asset(name='Mystery Wind Farm', wtgs=[Turbine(name='WT1', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan), Turbine(name='WT2', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan)], masts_and_lidars=[]), exclusion_periods_utc=[], yaw_data_exclusions_utc=[], optimize_northing_corrections=False, northing_corrections_utc=[], toggle=None, prepost=PrePost(pre_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), pre_last_dt_utc_start=Timestamp('2010-09-29 19:10:00+0000', tz='UTC'), post_first_dt_utc_start=Timestamp('2011-04-25 21:50:00+0000', tz='UTC'), post_last_dt_utc_start=Timestamp('2011-06-25 18:30:00+0000', tz='UTC')))" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "wtg_map = {\n", - " x: {\n", - " \"name\": x,\n", - " \"turbine_type\": {\n", - " \"turbine_type\": \"unknown turbine type\",\n", - " \"rotor_diameter_m\": rotor_diameter_m,\n", - " \"rated_power_kw\": assumed_rated_power_kw,\n", - " \"cutout_ws_mps\": cutout_ws_mps,\n", - " \"normal_operation_pitch_range\": (-10.0, 35.0),\n", - " \"normal_operation_genrpm_range\": (0, 2000.0),\n", - " },\n", - " }\n", - " for x in [\"WT1\", \"WT2\"]\n", - "}\n", - "\n", - "cfg = WindUpConfig(\n", - " assessment_name=ASSESSMENT_NAME,\n", - " ref_wd_filter=[150, 240], # apparent wake free sector\n", - " use_lt_distribution=False,\n", - " out_dir=OUTPUT_DIR / ASSESSMENT_NAME,\n", - " test_wtgs=[wtg_map[x] for x in [\"WT1\"]],\n", - " ref_wtgs=[wtg_map[x] for x in [\"WT2\"]],\n", - " analysis_first_dt_utc_start=scada_df.index.min(),\n", - " upgrade_first_dt_utc_start=scada_df[scada_df[\"upgrade status\"] > 0].index.min(),\n", - " analysis_last_dt_utc_start=scada_df[scada_df[\"upgrade status\"] > 0].index.max(),\n", - " years_offset_for_pre_period=1,\n", - " lt_first_dt_utc_start=scada_df.index.min(),\n", - " lt_last_dt_utc_start=scada_df.index.min()\n", - " + (scada_df[scada_df[\"upgrade status\"] > 0].index.max() - scada_df[scada_df[\"upgrade status\"] > 0].index.min())\n", - " - pd.Timedelta(minutes=10),\n", - " detrend_first_dt_utc_start=scada_df.index.min(),\n", - " detrend_last_dt_utc_start=scada_df[scada_df[\"upgrade status\"] > 0].index.min()\n", - " - pd.DateOffset(weeks=1)\n", - " - pd.Timedelta(minutes=10),\n", - " years_for_lt_distribution=1,\n", - " years_for_detrend=1,\n", - " ws_bin_width=1.0,\n", - " asset={\n", - " \"name\": \"Mystery Wind Farm\",\n", - " \"wtgs\": list(wtg_map.values()),\n", - " },\n", - " missing_scada_data_fields=[\"YawAngleMin\", \"YawAngleMax\"],\n", - " prepost={\n", - " \"pre_first_dt_utc_start\": scada_df.index.min(),\n", - " \"pre_last_dt_utc_start\": scada_df.index.min()\n", - " + (scada_df[scada_df[\"upgrade status\"] > 0].index.max() - scada_df[scada_df[\"upgrade status\"] > 0].index.min())\n", - " - pd.Timedelta(minutes=10),\n", - " \"post_first_dt_utc_start\": scada_df[scada_df[\"upgrade status\"] > 0].index.min(),\n", - " \"post_last_dt_utc_start\": scada_df[scada_df[\"upgrade status\"] > 0].index.max(),\n", - " },\n", - " optimize_northing_corrections=False,\n", - ")\n", - "\n", - "cfg" - ] - }, - { - "cell_type": "markdown", - "id": "95dd5c37-0945-4384-9019-34b0dab1ce0b", - "metadata": {}, - "source": [ - "## Plot Configuration" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "9785a31a-ed13-4249-beec-36e91d5458f6", - "metadata": {}, - "outputs": [], - "source": [ - "plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / \"plots\")" - ] - }, - { - "cell_type": "markdown", - "id": "a4944d52-3e0d-4159-a6a7-7bdf4d6b5838", - "metadata": {}, - "source": [ - "## Assessment Configs" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "092491ca-55be-480e-9494-0cb919c5dae6", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "running wind_up analysis for wedowind_example\n", - "running wind_up analysis for wedowind_example\n", - "running wind_up analysis for wedowind_example\n", - "running load_smart_scada_and_md_from_file for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n", - "running load_smart_scada_and_md_from_file for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n", - "running load_smart_scada_and_md_from_file for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n", - "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\wind_up\\smart_data.py:99: SettingWithCopyWarning: \n", - "A value is trying to be set on a copy of a slice from a DataFrame.\n", - "Try using .loc[row_indexer,col_indexer] = value instead\n", - "\n", - "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", - " scada_raw[\"TurbineName\"] = scada_raw[\"TurbineName\"].astype(\"category\")\n", - "loaded 2 turbines, 0.5 years per turbine\n", - "loaded 2 turbines, 0.5 years per turbine\n", - "loaded 2 turbines, 0.5 years per turbine\n", - "finished load_smart_scada_and_md for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n", - "finished load_smart_scada_and_md for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n", - "finished load_smart_scada_and_md for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n", - "average capacity factor: 52.9%\n", - "average capacity factor: 52.9%\n", - "average capacity factor: 52.9%\n", - "top 3 capacity factor [%]:\n", - "+-----+------+\n", - "| WT2 | 53.3 |\n", - "| WT1 | 52.4 |\n", - "+-----+------+\n", - "top 3 capacity factor [%]:\n", - "+-----+------+\n", - "| WT2 | 53.3 |\n", - "| WT1 | 52.4 |\n", - "+-----+------+\n", - "top 3 capacity factor [%]:\n", - "+-----+------+\n", - "| WT2 | 53.3 |\n", - "| WT1 | 52.4 |\n", - "+-----+------+\n", - "bottom 3 capacity factor [%]:\n", - "+-----+------+\n", - "| WT1 | 52.4 |\n", - "| WT2 | 53.3 |\n", - "+-----+------+\n", - "bottom 3 capacity factor [%]:\n", - "+-----+------+\n", - "| WT1 | 52.4 |\n", - "| WT2 | 53.3 |\n", - "+-----+------+\n", - "bottom 3 capacity factor [%]:\n", - "+-----+------+\n", - "| WT1 | 52.4 |\n", - "| WT2 | 53.3 |\n", - "+-----+------+\n", - "0 rows [0.0%] of power data is missing before filtering\n", - "0 rows [0.0%] of power data is missing before filtering\n", - "0 rows [0.0%] of power data is missing before filtering\n", - "filter_stuck_data set 0 rows [0.0%] to NA\n", - "filter_stuck_data set 0 rows [0.0%] to NA\n", - "filter_stuck_data set 0 rows [0.0%] to NA\n", - "filter_bad_pw_ws set 0 rows [0.0%] to NA\n", - "filter_bad_pw_ws set 0 rows [0.0%] to NA\n", - "filter_bad_pw_ws set 0 rows [0.0%] to NA\n", - "filter_exclusions set 0 rows [0.0%] to NA\n", - "filter_exclusions set 0 rows [0.0%] to NA\n", - "filter_exclusions set 0 rows [0.0%] to NA\n", - "filter_yaw_exclusions set 0 rows [0.0%] to NA yaw\n", - "filter_yaw_exclusions set 0 rows [0.0%] to NA yaw\n", - "filter_yaw_exclusions set 0 rows [0.0%] to NA yaw\n", - "filter_downtime set 0 rows [0.0%] to NA\n", - "filter_downtime set 0 rows [0.0%] to NA\n", - "filter_downtime set 0 rows [0.0%] to NA\n", - "filter_missing_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are NA\n", - "filter_missing_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are NA\n", - "filter_missing_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are NA\n", - "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are out of range\n", - "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are out of range\n", - "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are out of range\n", - "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch curve filtering\n", - "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch curve filtering\n", - "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch curve filtering\n", - "0 rows [0.0%] of power data is missing after filtering\n", - "0 rows [0.0%] of power data is missing after filtering\n", - "0 rows [0.0%] of power data is missing after filtering\n", - "dummy_reanalysis_data best correlation is 0.282501 with a shift of -86\n", - "dummy_reanalysis_data best correlation is 0.282501 with a shift of -86\n", - "dummy_reanalysis_data best correlation is 0.282501 with a shift of -86\n", - "dummy_reanalysis_data has best correlation: 0.283 with a shift of -86\n", - "dummy_reanalysis_data has best correlation: 0.283 with a shift of -86\n", - "dummy_reanalysis_data has best correlation: 0.283 with a shift of -86\n", - "top 3 turbines needing northing correction vs reanalysis_wd before northing:\n", - "+-----+-----+\n", - "| WT1 | nan |\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "top 3 turbines needing northing correction vs reanalysis_wd before northing:\n", - "+-----+-----+\n", - "| WT1 | nan |\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "top 3 turbines needing northing correction vs reanalysis_wd before northing:\n", - "+-----+-----+\n", - "| WT1 | nan |\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd before northing:\n", - "+-----+-----+\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd before northing:\n", - "+-----+-----+\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd before northing:\n", - "+-----+-----+\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "no northing corrections to apply\n", - "no northing corrections to apply\n", - "no northing corrections to apply\n", - "applied 0 northing corrections\n", - "applied 0 northing corrections\n", - "applied 0 northing corrections\n", - "top 3 turbines needing northing correction vs reanalysis_wd after northing:\n", - "+-----+-----+\n", - "| WT1 | nan |\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "top 3 turbines needing northing correction vs reanalysis_wd after northing:\n", - "+-----+-----+\n", - "| WT1 | nan |\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "top 3 turbines needing northing correction vs reanalysis_wd after northing:\n", - "+-----+-----+\n", - "| WT1 | nan |\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd after northing:\n", - "+-----+-----+\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd after northing:\n", - "+-----+-----+\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd after northing:\n", - "+-----+-----+\n", - "| WT2 | nan |\n", - "+-----+-----+\n", - "estimated rated wind speed = 14.2 m/s\n", - "estimated rated wind speed = 14.2 m/s\n", - "estimated rated wind speed = 14.2 m/s\n", - "estimated cut-in wind speed = 3.5 m/s\n", - "estimated cut-in wind speed = 3.5 m/s\n", - "estimated cut-in wind speed = 3.5 m/s\n", - "##############################################################################\n", - "# estimate wind speed from power\n", - "##############################################################################\n", - "##############################################################################\n", - "# estimate wind speed from power\n", - "##############################################################################\n", - "##############################################################################\n", - "# estimate wind speed from power\n", - "##############################################################################\n", - "WT1 cp correction factor = 0.99\n", - "WT1 cp correction factor = 0.99\n", - "WT1 cp correction factor = 0.99\n", - "WT2 cp correction factor = 1.01\n", - "WT2 cp correction factor = 1.01\n", - "WT2 cp correction factor = 1.01\n", - "unknown turbine type 100.0% of rows are waking\n", - "unknown turbine type 100.0% of rows are waking\n", - "unknown turbine type 100.0% of rows are waking\n", - "unknown turbine type 0.0% of rows are not waking\n", - "unknown turbine type 0.0% of rows are not waking\n", - "unknown turbine type 0.0% of rows are not waking\n", - "unknown turbine type 0.0% of rows have unknown or partial waking\n", - "unknown turbine type 0.0% of rows have unknown or partial waking\n", - "unknown turbine type 0.0% of rows have unknown or partial waking\n" - ] - } - ], - "source": [ - "assessment_inputs = AssessmentInputs.from_cfg(\n", - " cfg=cfg,\n", - " plot_cfg=plot_cfg,\n", - " scada_df=scada_df[(scada_df[\"D\"] < 70) | (scada_df[\"D\"] > 150)], # noqa PLR2004 filter out apparent mast waked sector\n", - " metadata_df=metadata_df,\n", - " reanalysis_datasets=[reanalysis_dataset],\n", - " cache_dir=CACHE_DIR,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "5f80aa62-d7cd-4ec5-a28c-3e4ffe2e006d", - "metadata": {}, - "source": [ - "# Run Analysis" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "d12503f2-49b4-473f-b98f-5cb76b636aba", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "test turbines: ['WT1']\n", - "test turbines: ['WT1']\n", - "test turbines: ['WT1']\n", - "test turbines: ['WT1']\n", - "ref list: ['WT2']\n", - "ref list: ['WT2']\n", - "ref list: ['WT2']\n", - "ref list: ['WT2']\n", - "turbines to test: ['WT1']\n", - "turbines to test: ['WT1']\n", - "turbines to test: ['WT1']\n", - "turbines to test: ['WT1']\n", - "could not calculate rolling windspeed diff\n", - "could not calculate rolling windspeed diff\n", - "could not calculate rolling windspeed diff\n", - "could not calculate rolling windspeed diff\n", - "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n", - " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", - "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n", - " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", - "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n", - "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n", - "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n", - "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n", - "analysing WT1 WT2, loop_counter=0\n", - "analysing WT1 WT2, loop_counter=0\n", - "analysing WT1 WT2, loop_counter=0\n", - "analysing WT1 WT2, loop_counter=0\n", - "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n", - "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n", - "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n", - "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n", - "could not calculate rolling windspeed diff\n", - "could not calculate rolling windspeed diff\n", - "could not calculate rolling windspeed diff\n", - "could not calculate rolling windspeed diff\n", - "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n", - " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", - "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n", - " return np.nanmean(a, axis, out=out, keepdims=keepdims)\n", - "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n", - "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n", - "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n", - "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n", - "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n", - "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n", - "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n", - "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "removed 0 bad detrend results\n", - "removed 0 bad detrend results\n", - "removed 0 bad detrend results\n", - "removed 0 bad detrend results\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "top 1 WT1 WT2 waking scenarios [%]:\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "+--------------+-------+\n", - "| none offline | 100.0 |\n", - "+--------------+-------+\n", - "detrend applied to 91 scenario - directions\n", - "detrend applied to 91 scenario - directions\n", - "detrend applied to 91 scenario - directions\n", - "detrend applied to 91 scenario - directions\n", - "detrend applied to 91 scenario - directions\n", - "detrend applied to 91 scenario - directions\n", - "detrend applied to 91 scenario - directions\n", - "detrend applied to 91 scenario - directions\n", - "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n", - "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n", - "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n", - "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n", - "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n", - "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n", - "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n", - "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n", - "removed 0 bad detrend results\n", - "removed 0 bad detrend results\n", - "removed 0 bad detrend results\n", - "removed 0 bad detrend results\n", - "removed 0 bad detrend results\n", - "removed 0 bad detrend results\n", - "removed 0 bad detrend results\n", - "removed 0 bad detrend results\n", - "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n", - "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n", - "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n", - "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n", - "\n", - "results for test=WT1 ref=WT2:\n", - "\n", - "\n", - "results for test=WT1 ref=WT2:\n", - "\n", - "\n", - "results for test=WT1 ref=WT2:\n", - "\n", - "\n", - "results for test=WT1 ref=WT2:\n", - "\n", - "hours pre = 459.3\n", - "hours pre = 459.3\n", - "hours pre = 459.3\n", - "hours pre = 459.3\n", - "hours post = 522.2\n", - "hours post = 522.2\n", - "hours post = 522.2\n", - "hours post = 522.2\n", - "\n", - "uplift estimate before adjustments = 2.6 %\n", - "\n", - "uplift estimate before adjustments = 2.6 %\n", - "\n", - "uplift estimate before adjustments = 2.6 %\n", - "\n", - "uplift estimate before adjustments = 2.6 %\n", - "\n", - "power only uplift estimate = 2.3 %\n", - "\n", - "power only uplift estimate = 2.3 %\n", - "\n", - "power only uplift estimate = 2.3 %\n", - "\n", - "power only uplift estimate = 2.3 %\n", - "reversed (power only) uplift estimate = 2.6 %\n", - "\n", - "reversed (power only) uplift estimate = 2.6 %\n", - "\n", - "reversed (power only) uplift estimate = 2.6 %\n", - "\n", - "reversed (power only) uplift estimate = 2.6 %\n", - "\n", - "Running block bootstrapping uncertainty analysis n_samples = 400\n", - "Running block bootstrapping uncertainty analysis n_samples = 400\n", - "Running block bootstrapping uncertainty analysis n_samples = 400\n", - "Running block bootstrapping uncertainty analysis n_samples = 400\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "8b9f3c80d5e94e4e8c49d9c67f12325d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/400 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
wind_up_versiontime_calculatedpreprocess_warning_countstest_warning_countstest_ref_warning_countstest_wtgtest_pw_colrefref_ws_coluplift_frc...poweronly_uplift_frcreversed_uplift_frcreversal_errorlt_wtg_hours_rawlt_wtg_hours_filttest_max_ws_drifttest_max_ws_drift_pp_periodtest_powercurve_shifttest_rpm_shifttest_pitch_shift
00.1.92024-09-10 14:56:31.754381+00:00022WT1test_pw_clippedWT2ref_ws_est_blend0.027311...0.0234370.025710.00227400NaNNaN0.0716210.00.0
\n", - "

1 rows × 49 columns

\n", - "" - ], - "text/plain": [ - " wind_up_version time_calculated preprocess_warning_counts \\\n", - "0 0.1.9 2024-09-10 14:56:31.754381+00:00 0 \n", - "\n", - " test_warning_counts test_ref_warning_counts test_wtg test_pw_col \\\n", - "0 2 2 WT1 test_pw_clipped \n", - "\n", - " ref ref_ws_col uplift_frc ... poweronly_uplift_frc \\\n", - "0 WT2 ref_ws_est_blend 0.027311 ... 0.023437 \n", - "\n", - " reversed_uplift_frc reversal_error lt_wtg_hours_raw lt_wtg_hours_filt \\\n", - "0 0.02571 0.002274 0 0 \n", - "\n", - " test_max_ws_drift test_max_ws_drift_pp_period test_powercurve_shift \\\n", - "0 NaN NaN 0.071621 \n", - "\n", - " test_rpm_shift test_pitch_shift \n", - "0 0.0 0.0 \n", - "\n", - "[1 rows x 49 columns]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results_per_test_ref_df" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/wedowind_example.py b/examples/wedowind_example.py index 16f69f5..ba4ea16 100644 --- a/examples/wedowind_example.py +++ b/examples/wedowind_example.py @@ -1,15 +1,18 @@ # example based on https://relight.cloud/doc/turbine-upgrade-dataset-9zw1vl/turbineperformance +import datetime as dt import logging import math import sys import zipfile from enum import Enum from pathlib import Path +from typing import NamedTuple import numpy as np import pandas as pd from matplotlib import pyplot as plt +from pydantic import BaseModel, Field from wind_up.constants import OUTPUT_DIR, PROJECTROOT_DIR, TIMESTAMP_COL, DataColumns from wind_up.interface import AssessmentInputs @@ -21,7 +24,7 @@ sys.path.append(str(PROJECTROOT_DIR)) from examples.helpers import download_zenodo_data, setup_logger -CACHE_DIR = PROJECTROOT_DIR / "cache" / "wedowind_example_data" +CACHE_DIR = PROJECTROOT_DIR / "cache" ASSESSMENT_NAME = "wedowind_example" ANALYSIS_OUTPUT_DIR = OUTPUT_DIR / ASSESSMENT_NAME ANALYSIS_OUTPUT_DIR.mkdir(exist_ok=True, parents=True) @@ -36,23 +39,34 @@ class WeDoWindScadaColumns(Enum): Y_CTRL_NORM = "y_ctrl(normalized)" Y_TEST_NORM = "y_test(normalized)" + UPGRADE_STATUS = "upgradestatus" + WIND_SPEED = "V" + WIND_DIRECTION = "D" -class TurbineNames(Enum): +class WeDoWindTurbineNames(Enum): REF = "Ref" TEST = "Test" -class MetadataColumns(Enum): - NAME = "Name" - LATITUDE = "Latitude" - LONGITUDE = "Longitude" +class KeyDates(NamedTuple): + analysis_first_dt_utc_start: dt.datetime + upgrade_first_dt_utc_start: dt.datetime + analysis_last_dt_utc_start: dt.datetime + lt_first_dt_utc_start: dt.datetime + lt_last_dt_utc_start: dt.datetime + detrend_first_dt_utc_start: dt.datetime + detrend_last_dt_utc_start: dt.datetime + pre_first_dt_utc_start: dt.datetime + pre_last_dt_utc_start: dt.datetime + post_first_dt_utc_start: dt.datetime + post_last_dt_utc_start: dt.datetime -class WDWScadaUnpacker: - def __init__(self, scada_file_name: str, wdw_zip_file_path: Path = CACHE_DIR / ZIP_FILENAME) -> None: +class WeDoWindScadaUnpacker: + def __init__(self, scada_file_name: str, wedowind_zip_file_path: Path = CACHE_DIR / ZIP_FILENAME) -> None: self.scada_file_name = scada_file_name - self.wdw_zip_file_path = wdw_zip_file_path + self.wedowind_zip_file_path = wedowind_zip_file_path self.scada_df = None def unpack(self, rated_power_kw: float) -> pd.DataFrame: @@ -66,17 +80,19 @@ def unpack(self, rated_power_kw: float) -> pd.DataFrame: return self.scada_df def _read_raw_df(self) -> pd.DataFrame: - with zipfile.ZipFile(self.wdw_zip_file_path) as zf: - return pd.read_csv(zf.open(self.scada_file_name), parse_dates=[1], index_col=0).drop( + with zipfile.ZipFile(self.wedowind_zip_file_path) as zf: + raw_df = pd.read_csv(zf.open(self.scada_file_name), parse_dates=[1], index_col=0).drop( columns=["VcosD", "VsinD"] ) + raw_df.columns = raw_df.columns.str.replace(" ", "") + return raw_df @staticmethod def _format_scada_df(scada_df: pd.DataFrame, rated_power_kw: float) -> pd.DataFrame: scada_df[DataColumns.active_power_mean] = scada_df["normalized_power"] * rated_power_kw # map some mast data to the turbine for convenience - scada_df[DataColumns.wind_speed_mean] = scada_df["V"] - scada_df[DataColumns.yaw_angle_mean] = scada_df["D"] + scada_df[DataColumns.wind_speed_mean] = scada_df[WeDoWindScadaColumns.WIND_SPEED.value] + scada_df[DataColumns.yaw_angle_mean] = scada_df[WeDoWindScadaColumns.WIND_DIRECTION.value] # placeholder values for other required columns scada_df[DataColumns.pitch_angle_mean] = 0 scada_df[DataColumns.gen_rpm_mean] = 1000 @@ -93,7 +109,7 @@ def _construct_scada_df_test(scada_df_raw: pd.DataFrame) -> pd.DataFrame: return ( scada_df_raw.drop(columns=[WeDoWindScadaColumns.Y_CTRL_NORM.value]) .copy() - .assign(TurbineName=TurbineNames.TEST.value) + .assign(TurbineName=WeDoWindTurbineNames.TEST.value) .rename(columns={WeDoWindScadaColumns.Y_TEST_NORM.value: "normalized_power"}) ) @@ -102,15 +118,27 @@ def _construct_scada_df_ref(scada_df_raw: pd.DataFrame) -> pd.DataFrame: return ( scada_df_raw.drop(columns=[WeDoWindScadaColumns.Y_TEST_NORM.value]) .copy() - .assign(TurbineName=TurbineNames.REF.value) + .assign(TurbineName=WeDoWindTurbineNames.REF.value) .rename(columns={WeDoWindScadaColumns.Y_CTRL_NORM.value: "normalized_power"}) ) -def make_wdw_metadata_df() -> pd.DataFrame: +class WeDoWindAnalysisConf(BaseModel): + scada_file_name: str = Field(description="e.g. 'Turbine Upgrade Dataset(Pitch Angle Pair).csv'") + wd_ranges_polluted_with_mast_shadow: list[tuple[int, int]] = Field(description="e.g. [(250, 315)]") + clip_rated_power_pp: bool + + +def download_wedowind_data_from_zenodo() -> None: + logger.info("Downloading example data from Zenodo") + # https://zenodo.org/records/5516556 + download_zenodo_data(record_id="5516556", output_dir=CACHE_DIR, filenames={ZIP_FILENAME}) + + +def create_fake_wedowind_metadata_df() -> pd.DataFrame: coords_df = pd.DataFrame( { - MetadataColumns.NAME.value: ["WT1", "WT2", "WT3", "WT4", "MAST1", "MAST2"], + "Name": ["WT1", "WT2", "WT3", "WT4", "MAST1", "MAST2"], "X": [500, 2200, 9836, 7571, 0, 9571], "Y": [9136, 9436, 0, 2050, 9836, 50], } @@ -118,21 +146,94 @@ def make_wdw_metadata_df() -> pd.DataFrame: assumed_wf_lat = 40 assumed_wf_lon = -89 m_per_deglat = 40_075_000 / 360 - coords_df[MetadataColumns.LATITUDE.value] = assumed_wf_lat + (coords_df["Y"] - coords_df["Y"].mean()) / m_per_deglat - coords_df[MetadataColumns.LONGITUDE.value] = assumed_wf_lon + (coords_df["X"] - coords_df["X"].mean()) / ( + coords_df["Latitude"] = assumed_wf_lat + (coords_df["Y"] - coords_df["Y"].mean()) / m_per_deglat + coords_df["Longitude"] = assumed_wf_lon + (coords_df["X"] - coords_df["X"].mean()) / ( m_per_deglat * math.cos(assumed_wf_lat * math.pi / 180) ) - return coords_df.loc[ - :, [MetadataColumns.NAME.value, MetadataColumns.LATITUDE.value, MetadataColumns.LONGITUDE.value] - ].assign(TimeZone="UTC", TimeSpanMinutes=10, TimeFormat="Start") + return coords_df.loc[:, ["Name", "Latitude", "Longitude"]].assign( + TimeZone="UTC", TimeSpanMinutes=10, TimeFormat="Start" + ) + + +def create_fake_wedowind_reanalysis_dataset(scada_df_index_minimum: dt.datetime) -> ReanalysisDataset: + rng = np.random.default_rng(0) + rows = 100 + return ReanalysisDataset( + id="dummy_reanalysis_data", + data=pd.DataFrame( + data={ + "100_m_hws_mean_mps": rng.uniform(5, 10, rows), + "100_m_hwd_mean_deg-n_true": rng.uniform(0, 360, rows), + }, + index=pd.DatetimeIndex(pd.date_range(start=scada_df_index_minimum, periods=rows, freq="h", tz="UTC")), + ), + ) -def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, rotor_diameter_m: int) -> Path: +def establish_wedowind_key_dates(scada_df: pd.DataFrame) -> KeyDates: """ - It is unclear how the scada data is related to the metadata so look for wakes in the data + Extracts important dates from the SCADA data. These dates may then be used in the WindUpConfig. + + Args: + scada_df: - Returns: None (but displays plots) + Returns: tuple of dates that may be passed to the WindUpConfig + """ + analysis_first_dt_utc_start = scada_df.index.min() + upgrade_first_dt_utc_start = scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.min() + analysis_last_dt_utc_start = scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.max() + lt_first_dt_utc_start = scada_df.index.min() + lt_last_dt_utc_start = ( + scada_df.index.min() + + ( + scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.max() + - scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.min() + ) + - pd.Timedelta(minutes=10) + ) + detrend_first_dt_utc_start = scada_df.index.min() + detrend_last_dt_utc_start = ( + scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.min() + - pd.DateOffset(weeks=1) + - pd.Timedelta(minutes=10) + ) + pre_first_dt_utc_start = scada_df.index.min() + pre_last_dt_utc_start = ( + scada_df.index.min() + + ( + scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.max() + - scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.min() + ) + - pd.Timedelta(minutes=10) + ) + post_first_dt_utc_start = scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.min() + post_last_dt_utc_start = scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.max() + + return KeyDates( + analysis_first_dt_utc_start=analysis_first_dt_utc_start, + upgrade_first_dt_utc_start=upgrade_first_dt_utc_start, + analysis_last_dt_utc_start=analysis_last_dt_utc_start, + lt_first_dt_utc_start=lt_first_dt_utc_start, + lt_last_dt_utc_start=lt_last_dt_utc_start, + detrend_first_dt_utc_start=detrend_first_dt_utc_start, + detrend_last_dt_utc_start=detrend_last_dt_utc_start, + pre_first_dt_utc_start=pre_first_dt_utc_start, + pre_last_dt_utc_start=pre_last_dt_utc_start, + post_first_dt_utc_start=post_first_dt_utc_start, + post_last_dt_utc_start=post_last_dt_utc_start, + ) + + +def generate_custom_exploratory_plots( + scada_df: pd.DataFrame, assumed_rated_power_kw: float, rotor_diameter_m: int +) -> Path: + """ + These custom plots are to help with SCADA data exploration. + It was created because it was unclear how the SCADA data is related to the metadata so helped in looking for wakes + in the data. + + Returns: path to directory containing the plots """ custom_plots_dir_root = ANALYSIS_OUTPUT_DIR / "custom_plots" custom_plots_dir_timeseries = custom_plots_dir_root / "timeseries" @@ -140,7 +241,7 @@ def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, roto custom_plots_dir_root.mkdir(exist_ok=True, parents=True) custom_plots_dir_timeseries.mkdir(exist_ok=True) - for name, df in scada_df.groupby("TurbineName"): + for name, df in scada_df.groupby(DataColumns.turbine_name): for col in df.columns: plt.figure() plt.scatter(df.index, df[col], s=1) @@ -156,28 +257,32 @@ def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, roto region2_df = scada_df[(scada_df["normalized_power"] > 0.2) & (scada_df["normalized_power"] < 0.8)] # noqa PLR2004 binned_by_turbine = {} - for name, df in region2_df.groupby("TurbineName"): + for name, df in region2_df.groupby(DataColumns.turbine_name): if name == "Mast": continue # find mean normalized_power and V binned by D _df = df.copy() - _df["D_bin"] = pd.cut(_df["D"], bins=range(0, 361, 5)) - binned = _df.groupby("D_bin", observed=False)[["D", "normalized_power", "V"]].mean() + _df[f"{WeDoWindScadaColumns.WIND_DIRECTION.value}_bin"] = pd.cut( + _df[WeDoWindScadaColumns.WIND_DIRECTION.value], bins=range(0, 361, 5) + ) + binned = _df.groupby(f"{WeDoWindScadaColumns.WIND_DIRECTION.value}_bin", observed=False)[ + [WeDoWindScadaColumns.WIND_DIRECTION.value, "normalized_power", "V"] + ].mean() binned_by_turbine[name] = binned plt.figure() plt.plot( - binned["D"], + binned[WeDoWindScadaColumns.WIND_DIRECTION.value], calc_cp( power_kw=binned["normalized_power"] * assumed_rated_power_kw, - ws_ms=binned["V"], + ws_ms=binned[WeDoWindScadaColumns.WIND_SPEED.value], air_density_kgpm3=1.2, rotor_diameter_m=rotor_diameter_m, ), marker=".", ) - title = f"{name} Cp vs D" + title = f"{name} Cp vs {WeDoWindScadaColumns.WIND_DIRECTION.value}" plt.title(title) - plt.xlabel("D") + plt.xlabel(WeDoWindScadaColumns.WIND_DIRECTION.value) plt.ylabel("Cp") plt.xticks(rotation=90) plt.grid() @@ -188,10 +293,10 @@ def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, roto plt.figure() for name, binned in binned_by_turbine.items(): plt.plot( - binned["D"], + binned[WeDoWindScadaColumns.WIND_DIRECTION.value], calc_cp( power_kw=binned["normalized_power"] * assumed_rated_power_kw, - ws_ms=binned["V"], + ws_ms=binned[WeDoWindScadaColumns.WIND_SPEED.value], air_density_kgpm3=1.2, rotor_diameter_m=rotor_diameter_m, ), @@ -199,9 +304,9 @@ def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, roto marker=".", ) plt.ylim(0.2, 0.7) - title = "Cp vs D" + title = f"Cp vs {WeDoWindScadaColumns.WIND_DIRECTION.value}" plt.title(title) - plt.xlabel("D") + plt.xlabel(WeDoWindScadaColumns.WIND_DIRECTION.value) plt.ylabel("Cp") plt.xticks(rotation=90) plt.grid() @@ -213,49 +318,59 @@ def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, roto return custom_plots_dir_root -def download_wdw_data_from_zenodo() -> None: - logger.info("Downloading example data from Zenodo") - download_zenodo_data(record_id="5516556", output_dir=CACHE_DIR, filenames={ZIP_FILENAME}) - download_zenodo_data( - record_id="5516552", output_dir=CACHE_DIR, filenames={"Inland_Offshore_Wind_Farm_Dataset1.zip"} - ) - - def main() -> None: - download_wdw_data_from_zenodo() + generate_custom_plots = False # whether to create exploratory scada data plots + analysis_name = "Pitch Angle" # "Pitch Angle" or "Vortex Generator" + + download_wedowind_data_from_zenodo() assumed_rated_power_kw = 1500 rotor_diameter_m = 80 cutout_ws_mps = 20 - scada_file_name = "Turbine Upgrade Dataset(Pitch Angle Pair).csv" # or Turbine Upgrade Dataset(VG Pair).csv - logger.info("Preprocessing turbine SCADA data") - scada_df = WDWScadaUnpacker(scada_file_name=scada_file_name).unpack(rated_power_kw=assumed_rated_power_kw) - metadata_df = make_wdw_metadata_df() + analysis_specific_config = { + "Pitch Angle": WeDoWindAnalysisConf( + scada_file_name="Turbine Upgrade Dataset(Pitch Angle Pair).csv", + wd_ranges_polluted_with_mast_shadow=[(70, 150)], # determined by inspecting the custom plots + clip_rated_power_pp=False, + ), + "Vortex Generator": WeDoWindAnalysisConf( + scada_file_name="Turbine Upgrade Dataset(VG Pair).csv", + wd_ranges_polluted_with_mast_shadow=[(30, 115), (250, 315)], # determined by inspecting the custom plots + clip_rated_power_pp=True, # Vortex Generators are not expected to increase rated power + ), + } + if analysis_name not in analysis_specific_config: + msg = f"analysis_name must be one of {list(analysis_specific_config.keys())}" + raise ValueError(msg) - run_custom_plots( - scada_df=scada_df, assumed_rated_power_kw=assumed_rated_power_kw, rotor_diameter_m=rotor_diameter_m + analysis_conf = analysis_specific_config[analysis_name] + + logger.info("Preprocessing turbine SCADA data") + scada_df = WeDoWindScadaUnpacker(scada_file_name=analysis_conf.scada_file_name).unpack( + rated_power_kw=assumed_rated_power_kw ) + metadata_df = create_fake_wedowind_metadata_df() + + if generate_custom_plots: + generate_custom_exploratory_plots( + scada_df=scada_df, assumed_rated_power_kw=assumed_rated_power_kw, rotor_diameter_m=rotor_diameter_m + ) # based on the above I think the objects are MAST1, test=WT1 and ref=WT2 scada_df = scada_df.replace( - {"TurbineName": {TurbineNames.TEST.value: "WT1", TurbineNames.REF.value: "WT2", "Mast": "MAST1"}} + { + DataColumns.turbine_name: { + WeDoWindTurbineNames.TEST.value: "WT1", + WeDoWindTurbineNames.REF.value: "WT2", + "Mast": "MAST1", + } + } ) # drop everything except the turbines from the metadata metadata_df = metadata_df[metadata_df["Name"].isin(["WT1", "WT2"])] - - rng = np.random.default_rng(0) - rows = 100 - reanalysis_dataset = ReanalysisDataset( - id="dummy_reanalysis_data", - data=pd.DataFrame( - data={ - "100_m_hws_mean_mps": rng.uniform(5, 10, rows), - "100_m_hwd_mean_deg-n_true": rng.uniform(0, 360, rows), - }, - index=pd.DatetimeIndex(pd.date_range(start=scada_df.index.min(), periods=rows, freq="h", tz="UTC")), - ), - ) + # Reanalysis data is required by WindUp, therefore create a fake reanalysis object + reanalysis_dataset = create_fake_wedowind_reanalysis_dataset(scada_df_index_minimum=scada_df.index.min()) # Construct wind-up Configurations @@ -274,56 +389,59 @@ def main() -> None: for x in ["WT1", "WT2"] } + key_dates = establish_wedowind_key_dates(scada_df=scada_df) + cfg = WindUpConfig( - assessment_name=ASSESSMENT_NAME, - ref_wd_filter=[150, 240], # apparent wake free sector + assessment_name=analysis_name, + ref_wd_filter=[150, 240], # apparent wake free sector (determined by inspecting the custom plots) use_lt_distribution=False, - out_dir=OUTPUT_DIR / ASSESSMENT_NAME, + out_dir=OUTPUT_DIR / analysis_name, test_wtgs=[wtg_map[x] for x in ["WT1"]], ref_wtgs=[wtg_map[x] for x in ["WT2"]], - analysis_first_dt_utc_start=scada_df.index.min(), - upgrade_first_dt_utc_start=scada_df[scada_df["upgrade status"] > 0].index.min(), - analysis_last_dt_utc_start=scada_df[scada_df["upgrade status"] > 0].index.max(), years_offset_for_pre_period=1, - lt_first_dt_utc_start=scada_df.index.min(), - lt_last_dt_utc_start=scada_df.index.min() - + (scada_df[scada_df["upgrade status"] > 0].index.max() - scada_df[scada_df["upgrade status"] > 0].index.min()) - - pd.Timedelta(minutes=10), - detrend_first_dt_utc_start=scada_df.index.min(), - detrend_last_dt_utc_start=scada_df[scada_df["upgrade status"] > 0].index.min() - - pd.DateOffset(weeks=1) - - pd.Timedelta(minutes=10), years_for_lt_distribution=1, years_for_detrend=1, ws_bin_width=1.0, - asset={ - "name": "Mystery Wind Farm", - "wtgs": list(wtg_map.values()), - }, - missing_scada_data_fields=["YawAngleMin", "YawAngleMax"], + analysis_first_dt_utc_start=key_dates.analysis_first_dt_utc_start, + upgrade_first_dt_utc_start=key_dates.upgrade_first_dt_utc_start, + analysis_last_dt_utc_start=key_dates.analysis_last_dt_utc_start, + lt_first_dt_utc_start=key_dates.lt_first_dt_utc_start, + lt_last_dt_utc_start=key_dates.lt_last_dt_utc_start, + detrend_first_dt_utc_start=key_dates.detrend_first_dt_utc_start, + detrend_last_dt_utc_start=key_dates.detrend_last_dt_utc_start, + asset={"name": "Mystery Wind Farm", "wtgs": list(wtg_map.values())}, + missing_scada_data_fields=[DataColumns.yaw_angle_min, DataColumns.yaw_angle_max], prepost={ - "pre_first_dt_utc_start": scada_df.index.min(), - "pre_last_dt_utc_start": scada_df.index.min() - + ( - scada_df[scada_df["upgrade status"] > 0].index.max() - - scada_df[scada_df["upgrade status"] > 0].index.min() - ) - - pd.Timedelta(minutes=10), - "post_first_dt_utc_start": scada_df[scada_df["upgrade status"] > 0].index.min(), - "post_last_dt_utc_start": scada_df[scada_df["upgrade status"] > 0].index.max(), + "pre_first_dt_utc_start": key_dates.pre_first_dt_utc_start, + "pre_last_dt_utc_start": key_dates.pre_last_dt_utc_start, + "post_first_dt_utc_start": key_dates.post_first_dt_utc_start, + "post_last_dt_utc_start": key_dates.post_last_dt_utc_start, }, optimize_northing_corrections=False, + clip_rated_power_pp=analysis_conf.clip_rated_power_pp, ) plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / "plots") + wd_ranges_to_exclude = analysis_conf.wd_ranges_polluted_with_mast_shadow + scada_df_for_assessment = scada_df.copy() + for wdr in wd_ranges_to_exclude: + logger.info("Filtering out wind directions between %s", wdr) + mask = (scada_df_for_assessment[DataColumns.yaw_angle_mean] >= wdr[0]) & ( + scada_df_for_assessment[DataColumns.yaw_angle_mean] <= wdr[1] + ) + scada_df_for_assessment = scada_df_for_assessment.loc[~mask, :] + + cache_assessment = CACHE_DIR / analysis_name + cache_assessment.mkdir(parents=True, exist_ok=True) + assessment_inputs = AssessmentInputs.from_cfg( cfg=cfg, plot_cfg=plot_cfg, - scada_df=scada_df[(scada_df["D"] < 70) | (scada_df["D"] > 150)], # noqa PLR2004 filter out apparent mast waked sector + scada_df=scada_df_for_assessment, # noqa PLR2004 filter out apparent mast waked sector metadata_df=metadata_df, reanalysis_datasets=[reanalysis_dataset], - cache_dir=CACHE_DIR, + cache_dir=cache_assessment, ) # Run Analysis diff --git a/wind_up/models.py b/wind_up/models.py index 54eefd5..4f3af82 100644 --- a/wind_up/models.py +++ b/wind_up/models.py @@ -269,6 +269,7 @@ class WindUpConfig(BaseModel): ) toggle: Toggle | None = None prepost: PrePost | None = None + clip_rated_power_pp: bool = Field(default=True, description="Clip rated power in Pre-Post analysis") @model_validator(mode="after") def check_years_offset_for_pre_period(self: WindUpConfig) -> WindUpConfig: diff --git a/wind_up/plots/scada_funcs_plots.py b/wind_up/plots/scada_funcs_plots.py index 4ca01ff..8f79750 100644 --- a/wind_up/plots/scada_funcs_plots.py +++ b/wind_up/plots/scada_funcs_plots.py @@ -20,12 +20,12 @@ def plot_data_coverage_heatmap(df: pd.DataFrame, plot_title: str, plot_cfg: PlotConfig) -> None: # calculate data coverage per turbine - covdf = df.groupby("TurbineName", observed=False).agg( - power=pd.NamedAgg(column="ActivePowerMean", aggfunc=lambda x: x.count() / x.size), - windspeed=pd.NamedAgg(column="WindSpeedMean", aggfunc=lambda x: x.count() / x.size), - yaw=pd.NamedAgg(column="YawAngleMean", aggfunc=lambda x: x.count() / x.size), - rpm=pd.NamedAgg(column="GenRpmMean", aggfunc=lambda x: x.count() / x.size), - pitch=pd.NamedAgg(column="PitchAngleMean", aggfunc=lambda x: x.count() / x.size), + covdf = df.groupby(DataColumns.turbine_name, observed=False).agg( + power=pd.NamedAgg(column=DataColumns.active_power_mean, aggfunc=lambda x: x.count() / x.size), + windspeed=pd.NamedAgg(column=DataColumns.wind_speed_mean, aggfunc=lambda x: x.count() / x.size), + yaw=pd.NamedAgg(column=DataColumns.yaw_angle_mean, aggfunc=lambda x: x.count() / x.size), + rpm=pd.NamedAgg(column=DataColumns.gen_rpm_mean, aggfunc=lambda x: x.count() / x.size), + pitch=pd.NamedAgg(column=DataColumns.pitch_angle_mean, aggfunc=lambda x: x.count() / x.size), ) plt.figure() @@ -40,9 +40,9 @@ def plot_data_coverage_heatmap(df: pd.DataFrame, plot_title: str, plot_cfg: Plot def calc_cf_by_turbine(scada_df: pd.DataFrame, cfg: WindUpConfig) -> pd.DataFrame: rows_per_hour = 3600 / cfg.timebase_s - cf_df = scada_df.groupby("TurbineName", observed=False).agg( - hours=pd.NamedAgg(column="TurbineName", aggfunc=lambda x: x.count() / rows_per_hour), - MWh=pd.NamedAgg(column="ActivePowerMean", aggfunc=lambda x: x.sum() / rows_per_hour / 1000), + cf_df = scada_df.groupby(DataColumns.turbine_name, observed=False).agg( + hours=pd.NamedAgg(column=DataColumns.turbine_name, aggfunc=lambda x: x.count() / rows_per_hour), + MWh=pd.NamedAgg(column=DataColumns.active_power_mean, aggfunc=lambda x: x.sum() / rows_per_hour / 1000), ) for i, rp in strict_zip( [x.name for x in cfg.asset.wtgs], @@ -105,11 +105,11 @@ def plot_ops_curves_per_ttype(cfg: WindUpConfig, df: pd.DataFrame, title_end: st def plot_ops_curves_one_ttype_or_wtg(df: pd.DataFrame, ttype_or_wtg: str, title_end: str, plot_cfg: PlotConfig) -> None: plt.figure() - plt.scatter(df["WindSpeedMean"], df["ActivePowerMean"], s=SCATTER_S, alpha=SCATTER_ALPHA) + plt.scatter(df[DataColumns.wind_speed_mean], df[DataColumns.active_power_mean], s=SCATTER_S, alpha=SCATTER_ALPHA) plot_title = f"{ttype_or_wtg} power curve {title_end}" plt.title(plot_title) - plt.xlabel("WindSpeedMean [m/s]") - plt.ylabel("ActivePowerMean [kW]") + plt.xlabel(f"{DataColumns.wind_speed_mean} [m/s]") + plt.ylabel(f"{DataColumns.active_power_mean} [kW]") plt.grid() if plot_cfg.show_plots: plt.show() @@ -122,27 +122,27 @@ def plot_ops_curves_one_ttype_or_wtg(df: pd.DataFrame, ttype_or_wtg: str, title_ # plot rpm and pitch vs power and wind speed in a 2 by 2 grid plt.figure(figsize=(12, 8)) plt.subplot(2, 2, 1) - plt.scatter(df["ActivePowerMean"], df["GenRpmMean"], s=SCATTER_S, alpha=SCATTER_ALPHA) - plt.xlabel("ActivePowerMean [kW]") - plt.ylabel("GenRpmMean [RPM]") + plt.scatter(df[DataColumns.active_power_mean], df[DataColumns.gen_rpm_mean], s=SCATTER_S, alpha=SCATTER_ALPHA) + plt.xlabel(f"{DataColumns.active_power_mean} [kW]") + plt.ylabel(f"{DataColumns.gen_rpm_mean} [RPM]") plt.grid() plt.subplot(2, 2, 2) - plt.scatter(df["WindSpeedMean"], df["GenRpmMean"], s=SCATTER_S, alpha=SCATTER_ALPHA) - plt.xlabel("WindSpeedMean [m/s]") - plt.ylabel("GenRpmMean [RPM]") + plt.scatter(df[DataColumns.wind_speed_mean], df[DataColumns.gen_rpm_mean], s=SCATTER_S, alpha=SCATTER_ALPHA) + plt.xlabel(f"{DataColumns.wind_speed_mean} [m/s]") + plt.ylabel(f"{DataColumns.gen_rpm_mean} [RPM]") plt.grid() plt.subplot(2, 2, 3) - plt.scatter(df["ActivePowerMean"], df["PitchAngleMean"], s=SCATTER_S, alpha=SCATTER_ALPHA) - plt.xlabel("ActivePowerMean [kW]") - plt.ylabel("PitchAngleMean [deg]") + plt.scatter(df[DataColumns.active_power_mean], df[DataColumns.pitch_angle_mean], s=SCATTER_S, alpha=SCATTER_ALPHA) + plt.xlabel(f"{DataColumns.active_power_mean} [kW]") + plt.ylabel(f"{DataColumns.pitch_angle_mean} [deg]") plt.grid() plt.subplot(2, 2, 4) - plt.scatter(df["WindSpeedMean"], df["PitchAngleMean"], s=SCATTER_S, alpha=SCATTER_ALPHA) - plt.xlabel("WindSpeedMean [m/s]") - plt.ylabel("PitchAngleMean [deg]") + plt.scatter(df[DataColumns.wind_speed_mean], df[DataColumns.pitch_angle_mean], s=SCATTER_S, alpha=SCATTER_ALPHA) + plt.xlabel(f"{DataColumns.wind_speed_mean} [m/s]") + plt.ylabel(f"{DataColumns.pitch_angle_mean} [deg]") plt.grid() plot_title = f"{ttype_or_wtg} ops curves, {title_end}" @@ -362,39 +362,39 @@ def plot_filter_rpm_and_pt_curve_one_ttype_or_wtg( ) -> None: plt.figure(figsize=(12, 8)) plt.subplot(2, 2, 1) - plt.scatter(df["pw_clipped"], df["GenRpmMean"], s=SCATTER_S, alpha=SCATTER_ALPHA) + plt.scatter(df["pw_clipped"], df[DataColumns.gen_rpm_mean], s=SCATTER_S, alpha=SCATTER_ALPHA) x = [rpm_v_pw_curve.index[0].left] + [x.mid for x in rpm_v_pw_curve.index] + [rpm_v_pw_curve.index[-1].right] y = [rpm_v_pw_curve["y_limit"].iloc[0], *list(rpm_v_pw_curve["y_limit"]), rpm_v_pw_curve["y_limit"].iloc[-1]] plt.plot(x, y, color="red") plt.xlabel("pw_clipped [kW]") - plt.ylabel("GenRpmMean [deg]") + plt.ylabel(f"{DataColumns.gen_rpm_mean} [deg]") plt.grid() plt.subplot(2, 2, 2) - plt.scatter(df["WindSpeedMean"], df["GenRpmMean"], s=SCATTER_S, alpha=SCATTER_ALPHA) + plt.scatter(df[DataColumns.wind_speed_mean], df[DataColumns.gen_rpm_mean], s=SCATTER_S, alpha=SCATTER_ALPHA) x = [rpm_v_ws_curve.index[0].left] + [x.mid for x in rpm_v_ws_curve.index] + [rpm_v_ws_curve.index[-1].right] y = [rpm_v_ws_curve["y_limit"].iloc[0], *list(rpm_v_ws_curve["y_limit"]), rpm_v_ws_curve["y_limit"].iloc[-1]] plt.plot(x, y, color="red") - plt.xlabel("WindSpeedMean [m/s]") - plt.ylabel("GenRpmMean [deg]") + plt.xlabel(f"{DataColumns.wind_speed_mean} [m/s]") + plt.ylabel(f"{DataColumns.gen_rpm_mean} [deg]") plt.grid() plt.subplot(2, 2, 3) - plt.scatter(df["pw_clipped"], df["PitchAngleMean"], s=SCATTER_S, alpha=SCATTER_ALPHA) + plt.scatter(df["pw_clipped"], df[DataColumns.pitch_angle_mean], s=SCATTER_S, alpha=SCATTER_ALPHA) x = [pt_v_pw_curve.index[0].left] + [x.mid for x in pt_v_pw_curve.index] + [pt_v_pw_curve.index[-1].right] y = [pt_v_pw_curve["y_limit"].iloc[0], *list(pt_v_pw_curve["y_limit"]), pt_v_pw_curve["y_limit"].iloc[-1]] plt.plot(x, y, color="red") plt.xlabel("pw_clipped [kW]") - plt.ylabel("PitchAngleMean [deg]") + plt.ylabel(f"{DataColumns.pitch_angle_mean} [deg]") plt.grid() plt.subplot(2, 2, 4) - plt.scatter(df["WindSpeedMean"], df["PitchAngleMean"], s=SCATTER_S, alpha=SCATTER_ALPHA) + plt.scatter(df[DataColumns.wind_speed_mean], df[DataColumns.pitch_angle_mean], s=SCATTER_S, alpha=SCATTER_ALPHA) x = [pt_v_ws_curve.index[0].left] + [x.mid for x in pt_v_ws_curve.index] + [pt_v_ws_curve.index[-1].right] y = [pt_v_ws_curve["y_limit"].iloc[0], *list(pt_v_ws_curve["y_limit"]), pt_v_ws_curve["y_limit"].iloc[-1]] plt.plot(x, y, color="red") - plt.xlabel("WindSpeedMean [m/s]") - plt.ylabel("PitchAngleMean [deg]") + plt.xlabel(f"{DataColumns.wind_speed_mean} [m/s]") + plt.ylabel(f"{DataColumns.pitch_angle_mean} [deg]") plt.grid() plot_title = f"{ttype_or_wtg} rpm and pitch curve filters" diff --git a/wind_up/pp_analysis.py b/wind_up/pp_analysis.py index de2f109..28f439f 100644 --- a/wind_up/pp_analysis.py +++ b/wind_up/pp_analysis.py @@ -58,7 +58,7 @@ def pp_raw_df( def cook_pp( - pp_df: pd.DataFrame, *, pre_or_post: str, ws_bin_width: float, rated_power: float, clip_to_rated: bool = True + pp_df: pd.DataFrame, *, pre_or_post: str, ws_bin_width: float, rated_power: float, clip_to_rated: bool ) -> pd.DataFrame: pp_df = pp_df.copy() @@ -89,12 +89,10 @@ def cook_pp( rated_ws = pp_df.loc[pp_df[raw_pw_col] >= rated_power * 0.995, "bin_mid"].min() + 1 empty_rated_bins_fill_value = rated_power if not clip_to_rated: - try: + with contextlib.suppress(IndexError): empty_rated_bins_fill_value = pp_df.loc[ (pp_df["bin_mid"] >= rated_ws) & ~pp_df[pw_col].isna(), pw_col ].iloc[-1] - except IndexError: - pass pp_df.loc[(pp_df["bin_mid"] >= rated_ws) & pp_df[pw_col].isna(), pw_col] = empty_rated_bins_fill_value pp_df[pw_sem_col] = pp_df[pw_sem_col].ffill() @@ -181,14 +179,18 @@ def pre_post_pp_analysis( ) pre_pp_df = cook_pp( - pp_df=pre_pp_df, pre_or_post="pre", ws_bin_width=cfg.ws_bin_width, rated_power=rated_power, clip_to_rated=False + pp_df=pre_pp_df, + pre_or_post="pre", + ws_bin_width=cfg.ws_bin_width, + rated_power=rated_power, + clip_to_rated=cfg.clip_rated_power_pp, ) post_pp_df = cook_pp( pp_df=post_pp_df, pre_or_post="post", ws_bin_width=cfg.ws_bin_width, rated_power=rated_power, - clip_to_rated=False, + clip_to_rated=cfg.clip_rated_power_pp, ) pp_df = pre_pp_df.merge( post_pp_df[[x for x in post_pp_df.columns if x not in pre_pp_df.columns]],