From a2d75db7a7275498a64752582fa1facc79d2930f Mon Sep 17 00:00:00 2001
From: Samuel Naylor <samuel.naylor@res-group.com>
Date: Thu, 12 Sep 2024 15:06:09 +0100
Subject: [PATCH] refactor: improve readability and support both WeDoWind
 datasets

- Support both Pitch Angle Pair and Vortex Generator Pair WeDoWind
datasets
- Support not clipping rated power
- Improve readability and replace some hard-coded column name strings
---
 examples/helpers.py                |    4 +-
 examples/wdw_example.ipynb         | 1287 ----------------------------
 examples/wedowind_example.py       |  310 ++++---
 wind_up/models.py                  |    1 +
 wind_up/plots/scada_funcs_plots.py |   68 +-
 wind_up/pp_analysis.py             |   14 +-
 6 files changed, 259 insertions(+), 1425 deletions(-)
 delete mode 100644 examples/wdw_example.ipynb

diff --git a/examples/helpers.py b/examples/helpers.py
index ca49d38..cd14666 100644
--- a/examples/helpers.py
+++ b/examples/helpers.py
@@ -15,7 +15,7 @@
 
 
 def setup_logger(log_fpath: Path, level: int = logging.INFO) -> None:
-    log_formatter_file = logging.Formatter("%(asctime)s [%(levelname)-5.5s]  %(message)s")
+    log_formatter_file = logging.Formatter("%(asctime)s [%(levelname)-8s]  %(message)s")
     root_logger = logging.getLogger()
     root_logger.setLevel(level)
 
@@ -23,7 +23,7 @@ def setup_logger(log_fpath: Path, level: int = logging.INFO) -> None:
     file_handler.setFormatter(log_formatter_file)
     root_logger.addHandler(file_handler)
 
-    log_formatter_console = logging.Formatter("%(message)s")
+    log_formatter_console = logging.Formatter("%(asctime)s [%(levelname)-8s] %(message)s")
     console_handler = logging.StreamHandler()
     console_handler.setFormatter(log_formatter_console)
     root_logger.addHandler(console_handler)
diff --git a/examples/wdw_example.ipynb b/examples/wdw_example.ipynb
deleted file mode 100644
index 3599bdc..0000000
--- a/examples/wdw_example.ipynb
+++ /dev/null
@@ -1,1287 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "a16a48dc-14f9-4ed3-98c2-6b7198cdecdf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# ruff: noqa: F405"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "8de26d60-249e-46e9-ae5e-d4f6ed38dc2f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2\n",
-    "\n",
-    "%matplotlib inline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "ea667b9b-19e7-455b-909a-1c3bc6e5a2f6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from wedowind_example import *  # noqa: F403"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "c99e2a22-d632-4f66-8fa8-1294614a657b",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Downloading example data from Zenodo\n",
-      "File C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\cache\\wedowind_example_data\\Turbine_Upgrade_Dataset.zip already exists. Skipping download.\n",
-      "File C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\cache\\wedowind_example_data\\Inland_Offshore_Wind_Farm_Dataset1.zip already exists. Skipping download.\n"
-     ]
-    }
-   ],
-   "source": [
-    "download_wdw_data_from_zenodo()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "61992e29-545d-4d81-b5cc-9279517ec9cd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "assumed_rated_power_kw = 1500\n",
-    "rotor_diameter_m = 80\n",
-    "cutout_ws_mps = 20\n",
-    "scada_file_name = \"Turbine Upgrade Dataset(Pitch Angle Pair).csv\"  # or Turbine Upgrade Dataset(VG Pair).csv"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "7009cf71-3418-435a-a983-ee194453b465",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Preprocessing turbine SCADA data\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>upgrade status</th>\n",
-       "      <th>V</th>\n",
-       "      <th>D</th>\n",
-       "      <th>rho</th>\n",
-       "      <th>S</th>\n",
-       "      <th>I</th>\n",
-       "      <th>normalized_power</th>\n",
-       "      <th>TurbineName</th>\n",
-       "      <th>ActivePowerMean</th>\n",
-       "      <th>WindSpeedMean</th>\n",
-       "      <th>YawAngleMean</th>\n",
-       "      <th>PitchAngleMean</th>\n",
-       "      <th>GenRpmMean</th>\n",
-       "      <th>ShutdownDuration</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>TimeStamp_StartFormat</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2010-07-30 22:40:00+00:00</th>\n",
-       "      <td>0</td>\n",
-       "      <td>7.96</td>\n",
-       "      <td>138.9</td>\n",
-       "      <td>1.140224</td>\n",
-       "      <td>0.266512</td>\n",
-       "      <td>0.090452</td>\n",
-       "      <td>0.393152</td>\n",
-       "      <td>Test</td>\n",
-       "      <td>589.727273</td>\n",
-       "      <td>7.96</td>\n",
-       "      <td>138.9</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1000</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2010-07-30 22:50:00+00:00</th>\n",
-       "      <td>0</td>\n",
-       "      <td>8.19</td>\n",
-       "      <td>140.6</td>\n",
-       "      <td>1.140522</td>\n",
-       "      <td>0.286167</td>\n",
-       "      <td>0.083028</td>\n",
-       "      <td>0.457455</td>\n",
-       "      <td>Test</td>\n",
-       "      <td>686.181817</td>\n",
-       "      <td>8.19</td>\n",
-       "      <td>140.6</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1000</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2010-07-30 23:00:00+00:00</th>\n",
-       "      <td>0</td>\n",
-       "      <td>7.20</td>\n",
-       "      <td>139.3</td>\n",
-       "      <td>1.140771</td>\n",
-       "      <td>0.339321</td>\n",
-       "      <td>0.098611</td>\n",
-       "      <td>0.382121</td>\n",
-       "      <td>Test</td>\n",
-       "      <td>573.181818</td>\n",
-       "      <td>7.20</td>\n",
-       "      <td>139.3</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1000</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2010-07-30 23:10:00+00:00</th>\n",
-       "      <td>0</td>\n",
-       "      <td>6.81</td>\n",
-       "      <td>137.4</td>\n",
-       "      <td>1.141186</td>\n",
-       "      <td>0.375815</td>\n",
-       "      <td>0.101322</td>\n",
-       "      <td>0.282182</td>\n",
-       "      <td>Test</td>\n",
-       "      <td>423.272727</td>\n",
-       "      <td>6.81</td>\n",
-       "      <td>137.4</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1000</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2010-07-30 23:20:00+00:00</th>\n",
-       "      <td>0</td>\n",
-       "      <td>5.09</td>\n",
-       "      <td>137.5</td>\n",
-       "      <td>1.141464</td>\n",
-       "      <td>0.303472</td>\n",
-       "      <td>0.165029</td>\n",
-       "      <td>0.127212</td>\n",
-       "      <td>Test</td>\n",
-       "      <td>190.818182</td>\n",
-       "      <td>5.09</td>\n",
-       "      <td>137.5</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1000</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                           upgrade status     V      D       rho         S  \\\n",
-       "TimeStamp_StartFormat                                                        \n",
-       "2010-07-30 22:40:00+00:00               0  7.96  138.9  1.140224  0.266512   \n",
-       "2010-07-30 22:50:00+00:00               0  8.19  140.6  1.140522  0.286167   \n",
-       "2010-07-30 23:00:00+00:00               0  7.20  139.3  1.140771  0.339321   \n",
-       "2010-07-30 23:10:00+00:00               0  6.81  137.4  1.141186  0.375815   \n",
-       "2010-07-30 23:20:00+00:00               0  5.09  137.5  1.141464  0.303472   \n",
-       "\n",
-       "                                  I  normalized_power TurbineName  \\\n",
-       "TimeStamp_StartFormat                                               \n",
-       "2010-07-30 22:40:00+00:00  0.090452          0.393152        Test   \n",
-       "2010-07-30 22:50:00+00:00  0.083028          0.457455        Test   \n",
-       "2010-07-30 23:00:00+00:00  0.098611          0.382121        Test   \n",
-       "2010-07-30 23:10:00+00:00  0.101322          0.282182        Test   \n",
-       "2010-07-30 23:20:00+00:00  0.165029          0.127212        Test   \n",
-       "\n",
-       "                           ActivePowerMean  WindSpeedMean  YawAngleMean  \\\n",
-       "TimeStamp_StartFormat                                                     \n",
-       "2010-07-30 22:40:00+00:00       589.727273           7.96         138.9   \n",
-       "2010-07-30 22:50:00+00:00       686.181817           8.19         140.6   \n",
-       "2010-07-30 23:00:00+00:00       573.181818           7.20         139.3   \n",
-       "2010-07-30 23:10:00+00:00       423.272727           6.81         137.4   \n",
-       "2010-07-30 23:20:00+00:00       190.818182           5.09         137.5   \n",
-       "\n",
-       "                           PitchAngleMean  GenRpmMean  ShutdownDuration  \n",
-       "TimeStamp_StartFormat                                                    \n",
-       "2010-07-30 22:40:00+00:00               0        1000                 0  \n",
-       "2010-07-30 22:50:00+00:00               0        1000                 0  \n",
-       "2010-07-30 23:00:00+00:00               0        1000                 0  \n",
-       "2010-07-30 23:10:00+00:00               0        1000                 0  \n",
-       "2010-07-30 23:20:00+00:00               0        1000                 0  "
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "logger.info(\"Preprocessing turbine SCADA data\")\n",
-    "scada_df = WDWScadaUnpacker(scada_file_name=scada_file_name).unpack(rated_power_kw=assumed_rated_power_kw)\n",
-    "scada_df.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "c0922049-6159-41d8-ba4b-94affe82b296",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Name</th>\n",
-       "      <th>Latitude</th>\n",
-       "      <th>Longitude</th>\n",
-       "      <th>TimeZone</th>\n",
-       "      <th>TimeSpanMinutes</th>\n",
-       "      <th>TimeFormat</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>WT1</td>\n",
-       "      <td>40.036394</td>\n",
-       "      <td>-89.052141</td>\n",
-       "      <td>UTC</td>\n",
-       "      <td>10</td>\n",
-       "      <td>Start</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>WT2</td>\n",
-       "      <td>40.039089</td>\n",
-       "      <td>-89.032205</td>\n",
-       "      <td>UTC</td>\n",
-       "      <td>10</td>\n",
-       "      <td>Start</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>WT3</td>\n",
-       "      <td>39.954324</td>\n",
-       "      <td>-88.942660</td>\n",
-       "      <td>UTC</td>\n",
-       "      <td>10</td>\n",
-       "      <td>Start</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>WT4</td>\n",
-       "      <td>39.972739</td>\n",
-       "      <td>-88.969221</td>\n",
-       "      <td>UTC</td>\n",
-       "      <td>10</td>\n",
-       "      <td>Start</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>MAST1</td>\n",
-       "      <td>40.042682</td>\n",
-       "      <td>-89.058004</td>\n",
-       "      <td>UTC</td>\n",
-       "      <td>10</td>\n",
-       "      <td>Start</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    Name   Latitude  Longitude TimeZone  TimeSpanMinutes TimeFormat\n",
-       "0    WT1  40.036394 -89.052141      UTC               10      Start\n",
-       "1    WT2  40.039089 -89.032205      UTC               10      Start\n",
-       "2    WT3  39.954324 -88.942660      UTC               10      Start\n",
-       "3    WT4  39.972739 -88.969221      UTC               10      Start\n",
-       "4  MAST1  40.042682 -89.058004      UTC               10      Start"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "metadata_df = make_wdw_metadata_df()\n",
-    "metadata_df.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "3734d8fd-e4c8-4e5e-820d-a95589998c77",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Custom plots saved to directory: C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\output\\wedowind_example\\custom_plots\n",
-      "Custom plots saved to directory: C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\output\\wedowind_example\\custom_plots\n"
-     ]
-    }
-   ],
-   "source": [
-    "run_custom_plots(scada_df=scada_df, assumed_rated_power_kw=assumed_rated_power_kw, rotor_diameter_m=rotor_diameter_m)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "774b80d0-476f-4a86-a774-b6aca2462181",
-   "metadata": {},
-   "source": [
-    "Amend dataframes based on reviewing the Custom Plots."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "3685f588-060d-4209-aadc-5f87f606887e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# based on the above I think the objects are MAST1, test=WT1 and ref=WT2\n",
-    "scada_df = scada_df.replace(\n",
-    "    {\"TurbineName\": {TurbineNames.TEST.value: \"WT1\", TurbineNames.REF.value: \"WT2\", \"Mast\": \"MAST1\"}}\n",
-    ")\n",
-    "# drop everything except the turbines from the metadata\n",
-    "metadata_df = metadata_df[metadata_df[\"Name\"].isin([\"WT1\", \"WT2\"])]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a346cc79-1012-4d24-b340-ae4115f34fe4",
-   "metadata": {},
-   "source": [
-    "### Construct Reanalysis"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "47ed234d-5c55-4165-902f-f808408f82a1",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>100_m_hws_mean_mps</th>\n",
-       "      <th>100_m_hwd_mean_deg-n_true</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2010-07-30 22:40:00+00:00</th>\n",
-       "      <td>8.184808</td>\n",
-       "      <td>172.795653</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2010-07-30 23:40:00+00:00</th>\n",
-       "      <td>6.348934</td>\n",
-       "      <td>83.654251</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2010-07-31 00:40:00+00:00</th>\n",
-       "      <td>5.204868</td>\n",
-       "      <td>288.677008</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2010-07-31 01:40:00+00:00</th>\n",
-       "      <td>5.082638</td>\n",
-       "      <td>332.470858</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2010-07-31 02:40:00+00:00</th>\n",
-       "      <td>9.066351</td>\n",
-       "      <td>95.806898</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                           100_m_hws_mean_mps  100_m_hwd_mean_deg-n_true\n",
-       "2010-07-30 22:40:00+00:00            8.184808                 172.795653\n",
-       "2010-07-30 23:40:00+00:00            6.348934                  83.654251\n",
-       "2010-07-31 00:40:00+00:00            5.204868                 288.677008\n",
-       "2010-07-31 01:40:00+00:00            5.082638                 332.470858\n",
-       "2010-07-31 02:40:00+00:00            9.066351                  95.806898"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from wind_up.reanalysis_data import ReanalysisDataset\n",
-    "\n",
-    "rng = np.random.default_rng(0)\n",
-    "rows = 100\n",
-    "reanalysis_dataset = ReanalysisDataset(\n",
-    "    id=\"dummy_reanalysis_data\",\n",
-    "    data=pd.DataFrame(\n",
-    "        data={\n",
-    "            \"100_m_hws_mean_mps\": rng.uniform(5, 10, rows),\n",
-    "            \"100_m_hwd_mean_deg-n_true\": rng.uniform(0, 360, rows),\n",
-    "        },\n",
-    "        index=pd.DatetimeIndex(pd.date_range(start=scada_df.index.min(), periods=rows, freq=\"h\", tz=\"UTC\")),\n",
-    "    ),\n",
-    ")\n",
-    "\n",
-    "reanalysis_dataset.data.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d989a0a1-4b3f-4e9d-b3f0-fa4061111616",
-   "metadata": {},
-   "source": [
-    "# Construct `wind-up` Configuration"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6aa9d215-e58b-409b-a1ab-bb5eb1739ebf",
-   "metadata": {},
-   "source": [
-    "## Wind Farm Config"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "63e01414-8585-4cf8-9169-c3229eaa1f2e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "loaded WindUpConfig assessment_name: wedowind_example\n",
-      "loaded WindUpConfig assessment_name: wedowind_example\n",
-      "loaded WindUpConfig assessment_name: wedowind_example\n",
-      "pre analysis period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
-      "pre analysis period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
-      "pre analysis period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
-      "post analysis period (UTC): 2011-04-25 21:50 to 2011-06-25 18:40\n",
-      "post analysis period (UTC): 2011-04-25 21:50 to 2011-06-25 18:40\n",
-      "post analysis period (UTC): 2011-04-25 21:50 to 2011-06-25 18:40\n",
-      "long term period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
-      "long term period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
-      "long term period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
-      "detrend period (UTC): 2010-07-30 22:40 to 2011-04-18 21:50\n",
-      "detrend period (UTC): 2010-07-30 22:40 to 2011-04-18 21:50\n",
-      "detrend period (UTC): 2010-07-30 22:40 to 2011-04-18 21:50\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "WindUpConfig(assessment_name='wedowind_example', timebase_s=600, ignore_turbine_anemometer_data=False, require_test_wake_free=False, require_ref_wake_free=False, detrend_min_hours=24, ref_wd_filter=[150.0, 240.0], ref_hod_filter=None, filter_all_test_wtgs_together=False, use_lt_distribution=False, use_test_wtg_lt_distribution=True, out_dir=WindowsPath('C:/Users/snaylor/Documents/GitHub/wind-up/output/wedowind_example'), test_wtgs=[Turbine(name='WT1', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan)], ref_wtgs=[Turbine(name='WT2', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan)], ref_super_wtgs=[], non_wtg_ref_names=[], upgrade_first_dt_utc_start=Timestamp('2011-04-25 21:50:00+0000', tz='UTC'), analysis_last_dt_utc_start=Timestamp('2011-06-25 18:30:00+0000', tz='UTC'), analysis_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), lt_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), lt_last_dt_utc_start=Timestamp('2010-09-29 19:10:00+0000', tz='UTC'), detrend_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), detrend_last_dt_utc_start=Timestamp('2011-04-18 21:40:00+0000', tz='UTC'), years_offset_for_pre_period=1, years_for_lt_distribution=1, years_for_detrend=1, ws_bin_width=1.0, bootstrap_runs_override=None, reanalysis_method='node_with_best_ws_corr', missing_scada_data_fields=['YawAngleMin', 'YawAngleMax'], asset=Asset(name='Mystery Wind Farm', wtgs=[Turbine(name='WT1', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan), Turbine(name='WT2', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan)], masts_and_lidars=[]), exclusion_periods_utc=[], yaw_data_exclusions_utc=[], optimize_northing_corrections=False, northing_corrections_utc=[], toggle=None, prepost=PrePost(pre_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), pre_last_dt_utc_start=Timestamp('2010-09-29 19:10:00+0000', tz='UTC'), post_first_dt_utc_start=Timestamp('2011-04-25 21:50:00+0000', tz='UTC'), post_last_dt_utc_start=Timestamp('2011-06-25 18:30:00+0000', tz='UTC')))"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "wtg_map = {\n",
-    "    x: {\n",
-    "        \"name\": x,\n",
-    "        \"turbine_type\": {\n",
-    "            \"turbine_type\": \"unknown turbine type\",\n",
-    "            \"rotor_diameter_m\": rotor_diameter_m,\n",
-    "            \"rated_power_kw\": assumed_rated_power_kw,\n",
-    "            \"cutout_ws_mps\": cutout_ws_mps,\n",
-    "            \"normal_operation_pitch_range\": (-10.0, 35.0),\n",
-    "            \"normal_operation_genrpm_range\": (0, 2000.0),\n",
-    "        },\n",
-    "    }\n",
-    "    for x in [\"WT1\", \"WT2\"]\n",
-    "}\n",
-    "\n",
-    "cfg = WindUpConfig(\n",
-    "    assessment_name=ASSESSMENT_NAME,\n",
-    "    ref_wd_filter=[150, 240],  # apparent wake free sector\n",
-    "    use_lt_distribution=False,\n",
-    "    out_dir=OUTPUT_DIR / ASSESSMENT_NAME,\n",
-    "    test_wtgs=[wtg_map[x] for x in [\"WT1\"]],\n",
-    "    ref_wtgs=[wtg_map[x] for x in [\"WT2\"]],\n",
-    "    analysis_first_dt_utc_start=scada_df.index.min(),\n",
-    "    upgrade_first_dt_utc_start=scada_df[scada_df[\"upgrade status\"] > 0].index.min(),\n",
-    "    analysis_last_dt_utc_start=scada_df[scada_df[\"upgrade status\"] > 0].index.max(),\n",
-    "    years_offset_for_pre_period=1,\n",
-    "    lt_first_dt_utc_start=scada_df.index.min(),\n",
-    "    lt_last_dt_utc_start=scada_df.index.min()\n",
-    "    + (scada_df[scada_df[\"upgrade status\"] > 0].index.max() - scada_df[scada_df[\"upgrade status\"] > 0].index.min())\n",
-    "    - pd.Timedelta(minutes=10),\n",
-    "    detrend_first_dt_utc_start=scada_df.index.min(),\n",
-    "    detrend_last_dt_utc_start=scada_df[scada_df[\"upgrade status\"] > 0].index.min()\n",
-    "    - pd.DateOffset(weeks=1)\n",
-    "    - pd.Timedelta(minutes=10),\n",
-    "    years_for_lt_distribution=1,\n",
-    "    years_for_detrend=1,\n",
-    "    ws_bin_width=1.0,\n",
-    "    asset={\n",
-    "        \"name\": \"Mystery Wind Farm\",\n",
-    "        \"wtgs\": list(wtg_map.values()),\n",
-    "    },\n",
-    "    missing_scada_data_fields=[\"YawAngleMin\", \"YawAngleMax\"],\n",
-    "    prepost={\n",
-    "        \"pre_first_dt_utc_start\": scada_df.index.min(),\n",
-    "        \"pre_last_dt_utc_start\": scada_df.index.min()\n",
-    "        + (scada_df[scada_df[\"upgrade status\"] > 0].index.max() - scada_df[scada_df[\"upgrade status\"] > 0].index.min())\n",
-    "        - pd.Timedelta(minutes=10),\n",
-    "        \"post_first_dt_utc_start\": scada_df[scada_df[\"upgrade status\"] > 0].index.min(),\n",
-    "        \"post_last_dt_utc_start\": scada_df[scada_df[\"upgrade status\"] > 0].index.max(),\n",
-    "    },\n",
-    "    optimize_northing_corrections=False,\n",
-    ")\n",
-    "\n",
-    "cfg"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "95dd5c37-0945-4384-9019-34b0dab1ce0b",
-   "metadata": {},
-   "source": [
-    "## Plot Configuration"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "9785a31a-ed13-4249-beec-36e91d5458f6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / \"plots\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a4944d52-3e0d-4159-a6a7-7bdf4d6b5838",
-   "metadata": {},
-   "source": [
-    "## Assessment Configs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "092491ca-55be-480e-9494-0cb919c5dae6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "running wind_up analysis for wedowind_example\n",
-      "running wind_up analysis for wedowind_example\n",
-      "running wind_up analysis for wedowind_example\n",
-      "running load_smart_scada_and_md_from_file for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
-      "running load_smart_scada_and_md_from_file for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
-      "running load_smart_scada_and_md_from_file for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
-      "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\wind_up\\smart_data.py:99: SettingWithCopyWarning: \n",
-      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
-      "Try using .loc[row_indexer,col_indexer] = value instead\n",
-      "\n",
-      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
-      "  scada_raw[\"TurbineName\"] = scada_raw[\"TurbineName\"].astype(\"category\")\n",
-      "loaded 2 turbines, 0.5 years per turbine\n",
-      "loaded 2 turbines, 0.5 years per turbine\n",
-      "loaded 2 turbines, 0.5 years per turbine\n",
-      "finished load_smart_scada_and_md for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
-      "finished load_smart_scada_and_md for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
-      "finished load_smart_scada_and_md for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
-      "average capacity factor: 52.9%\n",
-      "average capacity factor: 52.9%\n",
-      "average capacity factor: 52.9%\n",
-      "top 3 capacity factor [%]:\n",
-      "+-----+------+\n",
-      "| WT2 | 53.3 |\n",
-      "| WT1 | 52.4 |\n",
-      "+-----+------+\n",
-      "top 3 capacity factor [%]:\n",
-      "+-----+------+\n",
-      "| WT2 | 53.3 |\n",
-      "| WT1 | 52.4 |\n",
-      "+-----+------+\n",
-      "top 3 capacity factor [%]:\n",
-      "+-----+------+\n",
-      "| WT2 | 53.3 |\n",
-      "| WT1 | 52.4 |\n",
-      "+-----+------+\n",
-      "bottom 3 capacity factor [%]:\n",
-      "+-----+------+\n",
-      "| WT1 | 52.4 |\n",
-      "| WT2 | 53.3 |\n",
-      "+-----+------+\n",
-      "bottom 3 capacity factor [%]:\n",
-      "+-----+------+\n",
-      "| WT1 | 52.4 |\n",
-      "| WT2 | 53.3 |\n",
-      "+-----+------+\n",
-      "bottom 3 capacity factor [%]:\n",
-      "+-----+------+\n",
-      "| WT1 | 52.4 |\n",
-      "| WT2 | 53.3 |\n",
-      "+-----+------+\n",
-      "0 rows [0.0%] of power data is missing before filtering\n",
-      "0 rows [0.0%] of power data is missing before filtering\n",
-      "0 rows [0.0%] of power data is missing before filtering\n",
-      "filter_stuck_data set 0 rows [0.0%] to NA\n",
-      "filter_stuck_data set 0 rows [0.0%] to NA\n",
-      "filter_stuck_data set 0 rows [0.0%] to NA\n",
-      "filter_bad_pw_ws set 0 rows [0.0%] to NA\n",
-      "filter_bad_pw_ws set 0 rows [0.0%] to NA\n",
-      "filter_bad_pw_ws set 0 rows [0.0%] to NA\n",
-      "filter_exclusions set 0 rows [0.0%] to NA\n",
-      "filter_exclusions set 0 rows [0.0%] to NA\n",
-      "filter_exclusions set 0 rows [0.0%] to NA\n",
-      "filter_yaw_exclusions set 0 rows [0.0%] to NA yaw\n",
-      "filter_yaw_exclusions set 0 rows [0.0%] to NA yaw\n",
-      "filter_yaw_exclusions set 0 rows [0.0%] to NA yaw\n",
-      "filter_downtime set 0 rows [0.0%] to NA\n",
-      "filter_downtime set 0 rows [0.0%] to NA\n",
-      "filter_downtime set 0 rows [0.0%] to NA\n",
-      "filter_missing_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are NA\n",
-      "filter_missing_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are NA\n",
-      "filter_missing_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are NA\n",
-      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are out of range\n",
-      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are out of range\n",
-      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are out of range\n",
-      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch curve filtering\n",
-      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch curve filtering\n",
-      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch curve filtering\n",
-      "0 rows [0.0%] of power data is missing after filtering\n",
-      "0 rows [0.0%] of power data is missing after filtering\n",
-      "0 rows [0.0%] of power data is missing after filtering\n",
-      "dummy_reanalysis_data best correlation is 0.282501 with a shift of -86\n",
-      "dummy_reanalysis_data best correlation is 0.282501 with a shift of -86\n",
-      "dummy_reanalysis_data best correlation is 0.282501 with a shift of -86\n",
-      "dummy_reanalysis_data has best correlation: 0.283 with a shift of -86\n",
-      "dummy_reanalysis_data has best correlation: 0.283 with a shift of -86\n",
-      "dummy_reanalysis_data has best correlation: 0.283 with a shift of -86\n",
-      "top 3 turbines needing northing correction vs reanalysis_wd before northing:\n",
-      "+-----+-----+\n",
-      "| WT1 | nan |\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "top 3 turbines needing northing correction vs reanalysis_wd before northing:\n",
-      "+-----+-----+\n",
-      "| WT1 | nan |\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "top 3 turbines needing northing correction vs reanalysis_wd before northing:\n",
-      "+-----+-----+\n",
-      "| WT1 | nan |\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd before northing:\n",
-      "+-----+-----+\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd before northing:\n",
-      "+-----+-----+\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd before northing:\n",
-      "+-----+-----+\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "no northing corrections to apply\n",
-      "no northing corrections to apply\n",
-      "no northing corrections to apply\n",
-      "applied 0 northing corrections\n",
-      "applied 0 northing corrections\n",
-      "applied 0 northing corrections\n",
-      "top 3 turbines needing northing correction vs reanalysis_wd after northing:\n",
-      "+-----+-----+\n",
-      "| WT1 | nan |\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "top 3 turbines needing northing correction vs reanalysis_wd after northing:\n",
-      "+-----+-----+\n",
-      "| WT1 | nan |\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "top 3 turbines needing northing correction vs reanalysis_wd after northing:\n",
-      "+-----+-----+\n",
-      "| WT1 | nan |\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd after northing:\n",
-      "+-----+-----+\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd after northing:\n",
-      "+-----+-----+\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd after northing:\n",
-      "+-----+-----+\n",
-      "| WT2 | nan |\n",
-      "+-----+-----+\n",
-      "estimated rated wind speed = 14.2 m/s\n",
-      "estimated rated wind speed = 14.2 m/s\n",
-      "estimated rated wind speed = 14.2 m/s\n",
-      "estimated cut-in wind speed = 3.5 m/s\n",
-      "estimated cut-in wind speed = 3.5 m/s\n",
-      "estimated cut-in wind speed = 3.5 m/s\n",
-      "##############################################################################\n",
-      "# estimate wind speed from power\n",
-      "##############################################################################\n",
-      "##############################################################################\n",
-      "# estimate wind speed from power\n",
-      "##############################################################################\n",
-      "##############################################################################\n",
-      "# estimate wind speed from power\n",
-      "##############################################################################\n",
-      "WT1 cp correction factor = 0.99\n",
-      "WT1 cp correction factor = 0.99\n",
-      "WT1 cp correction factor = 0.99\n",
-      "WT2 cp correction factor = 1.01\n",
-      "WT2 cp correction factor = 1.01\n",
-      "WT2 cp correction factor = 1.01\n",
-      "unknown turbine type 100.0% of rows are waking\n",
-      "unknown turbine type 100.0% of rows are waking\n",
-      "unknown turbine type 100.0% of rows are waking\n",
-      "unknown turbine type 0.0% of rows are not waking\n",
-      "unknown turbine type 0.0% of rows are not waking\n",
-      "unknown turbine type 0.0% of rows are not waking\n",
-      "unknown turbine type 0.0% of rows have unknown or partial waking\n",
-      "unknown turbine type 0.0% of rows have unknown or partial waking\n",
-      "unknown turbine type 0.0% of rows have unknown or partial waking\n"
-     ]
-    }
-   ],
-   "source": [
-    "assessment_inputs = AssessmentInputs.from_cfg(\n",
-    "    cfg=cfg,\n",
-    "    plot_cfg=plot_cfg,\n",
-    "    scada_df=scada_df[(scada_df[\"D\"] < 70) | (scada_df[\"D\"] > 150)],  # noqa PLR2004 filter out apparent mast waked sector\n",
-    "    metadata_df=metadata_df,\n",
-    "    reanalysis_datasets=[reanalysis_dataset],\n",
-    "    cache_dir=CACHE_DIR,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5f80aa62-d7cd-4ec5-a28c-3e4ffe2e006d",
-   "metadata": {},
-   "source": [
-    "# Run Analysis"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "d12503f2-49b4-473f-b98f-5cb76b636aba",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "test turbines: ['WT1']\n",
-      "test turbines: ['WT1']\n",
-      "test turbines: ['WT1']\n",
-      "test turbines: ['WT1']\n",
-      "ref list: ['WT2']\n",
-      "ref list: ['WT2']\n",
-      "ref list: ['WT2']\n",
-      "ref list: ['WT2']\n",
-      "turbines to test: ['WT1']\n",
-      "turbines to test: ['WT1']\n",
-      "turbines to test: ['WT1']\n",
-      "turbines to test: ['WT1']\n",
-      "could not calculate rolling windspeed diff\n",
-      "could not calculate rolling windspeed diff\n",
-      "could not calculate rolling windspeed diff\n",
-      "could not calculate rolling windspeed diff\n",
-      "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n",
-      "  return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
-      "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n",
-      "  return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
-      "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n",
-      "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n",
-      "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n",
-      "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n",
-      "analysing WT1 WT2, loop_counter=0\n",
-      "analysing WT1 WT2, loop_counter=0\n",
-      "analysing WT1 WT2, loop_counter=0\n",
-      "analysing WT1 WT2, loop_counter=0\n",
-      "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n",
-      "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n",
-      "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n",
-      "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n",
-      "could not calculate rolling windspeed diff\n",
-      "could not calculate rolling windspeed diff\n",
-      "could not calculate rolling windspeed diff\n",
-      "could not calculate rolling windspeed diff\n",
-      "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n",
-      "  return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
-      "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n",
-      "  return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
-      "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n",
-      "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n",
-      "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n",
-      "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n",
-      "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n",
-      "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n",
-      "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n",
-      "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "removed 0 bad detrend results\n",
-      "removed 0 bad detrend results\n",
-      "removed 0 bad detrend results\n",
-      "removed 0 bad detrend results\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "top 1 WT1 WT2 waking scenarios [%]:\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "+--------------+-------+\n",
-      "| none offline | 100.0 |\n",
-      "+--------------+-------+\n",
-      "detrend applied to 91 scenario - directions\n",
-      "detrend applied to 91 scenario - directions\n",
-      "detrend applied to 91 scenario - directions\n",
-      "detrend applied to 91 scenario - directions\n",
-      "detrend applied to 91 scenario - directions\n",
-      "detrend applied to 91 scenario - directions\n",
-      "detrend applied to 91 scenario - directions\n",
-      "detrend applied to 91 scenario - directions\n",
-      "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n",
-      "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n",
-      "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n",
-      "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n",
-      "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n",
-      "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n",
-      "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n",
-      "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n",
-      "removed 0 bad detrend results\n",
-      "removed 0 bad detrend results\n",
-      "removed 0 bad detrend results\n",
-      "removed 0 bad detrend results\n",
-      "removed 0 bad detrend results\n",
-      "removed 0 bad detrend results\n",
-      "removed 0 bad detrend results\n",
-      "removed 0 bad detrend results\n",
-      "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n",
-      "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n",
-      "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n",
-      "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n",
-      "\n",
-      "results for test=WT1 ref=WT2:\n",
-      "\n",
-      "\n",
-      "results for test=WT1 ref=WT2:\n",
-      "\n",
-      "\n",
-      "results for test=WT1 ref=WT2:\n",
-      "\n",
-      "\n",
-      "results for test=WT1 ref=WT2:\n",
-      "\n",
-      "hours pre = 459.3\n",
-      "hours pre = 459.3\n",
-      "hours pre = 459.3\n",
-      "hours pre = 459.3\n",
-      "hours post = 522.2\n",
-      "hours post = 522.2\n",
-      "hours post = 522.2\n",
-      "hours post = 522.2\n",
-      "\n",
-      "uplift estimate before adjustments = 2.6 %\n",
-      "\n",
-      "uplift estimate before adjustments = 2.6 %\n",
-      "\n",
-      "uplift estimate before adjustments = 2.6 %\n",
-      "\n",
-      "uplift estimate before adjustments = 2.6 %\n",
-      "\n",
-      "power only uplift estimate = 2.3 %\n",
-      "\n",
-      "power only uplift estimate = 2.3 %\n",
-      "\n",
-      "power only uplift estimate = 2.3 %\n",
-      "\n",
-      "power only uplift estimate = 2.3 %\n",
-      "reversed (power only) uplift estimate = 2.6 %\n",
-      "\n",
-      "reversed (power only) uplift estimate = 2.6 %\n",
-      "\n",
-      "reversed (power only) uplift estimate = 2.6 %\n",
-      "\n",
-      "reversed (power only) uplift estimate = 2.6 %\n",
-      "\n",
-      "Running block bootstrapping uncertainty analysis n_samples = 400\n",
-      "Running block bootstrapping uncertainty analysis n_samples = 400\n",
-      "Running block bootstrapping uncertainty analysis n_samples = 400\n",
-      "Running block bootstrapping uncertainty analysis n_samples = 400\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8b9f3c80d5e94e4e8c49d9c67f12325d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/400 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "block bootstrapping uncertainty analysis results (conf=90%):\n",
-      "  median = 2.7 %\n",
-      "  lower = 1.9 %\n",
-      "  upper = 3.8 %\n",
-      "  unc_one_sigma = 0.6 %\n",
-      "block bootstrapping uncertainty analysis results (conf=90%):\n",
-      "  median = 2.7 %\n",
-      "  lower = 1.9 %\n",
-      "  upper = 3.8 %\n",
-      "  unc_one_sigma = 0.6 %\n",
-      "block bootstrapping uncertainty analysis results (conf=90%):\n",
-      "  median = 2.7 %\n",
-      "  lower = 1.9 %\n",
-      "  upper = 3.8 %\n",
-      "  unc_one_sigma = 0.6 %\n",
-      "block bootstrapping uncertainty analysis results (conf=90%):\n",
-      "  median = 2.7 %\n",
-      "  lower = 1.9 %\n",
-      "  upper = 3.8 %\n",
-      "  unc_one_sigma = 0.6 %\n",
-      "\n",
-      "cat A 1 sigma unc = 0.3 %\n",
-      "\n",
-      "cat A 1 sigma unc = 0.3 %\n",
-      "\n",
-      "cat A 1 sigma unc = 0.3 %\n",
-      "\n",
-      "cat A 1 sigma unc = 0.3 %\n",
-      "abs reversal error / 2 = 0.1 %\n",
-      "abs reversal error / 2 = 0.1 %\n",
-      "abs reversal error / 2 = 0.1 %\n",
-      "abs reversal error / 2 = 0.1 %\n",
-      "bootstrap 1 sigma unc = 0.6 %\n",
-      "bootstrap 1 sigma unc = 0.6 %\n",
-      "bootstrap 1 sigma unc = 0.6 %\n",
-      "bootstrap 1 sigma unc = 0.6 %\n",
-      "missing bins scale factor = 1.000\n",
-      "missing bins scale factor = 1.000\n",
-      "missing bins scale factor = 1.000\n",
-      "missing bins scale factor = 1.000\n",
-      "final 1 sigma unc = 0.6 %\n",
-      "\n",
-      "final 1 sigma unc = 0.6 %\n",
-      "\n",
-      "final 1 sigma unc = 0.6 %\n",
-      "\n",
-      "final 1 sigma unc = 0.6 %\n",
-      "\n",
-      "final uplift estimate = 2.7 %\n",
-      "final uplift estimate = 2.7 %\n",
-      "final uplift estimate = 2.7 %\n",
-      "final uplift estimate = 2.7 %\n",
-      "final P95 uplift estimate = 1.8 %\n",
-      "final P95 uplift estimate = 1.8 %\n",
-      "final P95 uplift estimate = 1.8 %\n",
-      "final P95 uplift estimate = 1.8 %\n",
-      "final P5 uplift estimate = 3.7 %\n",
-      "final P5 uplift estimate = 3.7 %\n",
-      "final P5 uplift estimate = 3.7 %\n",
-      "final P5 uplift estimate = 3.7 %\n",
-      "{'ref': 'WT2', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 1727.532358659697, 'bearing_deg': 80.01879303945559, 'ref_max_northing_error_v_reanalysis': np.float64(nan), 'ref_max_northing_error_v_wf': np.float64(nan), 'ref_max_ws_drift': np.float64(nan), 'ref_max_ws_drift_pp_period': np.float64(nan), 'ref_powercurve_shift': np.float64(-0.0072952693256543855), 'ref_rpm_shift': np.float64(0.0), 'ref_pitch_shift': np.float64(0.0), 'detrend_pre_r2_improvement': np.float64(2.1744130036194242e-05), 'detrend_post_r2_improvement': np.float64(4.176406496059215e-05), 'mean_power_pre': np.float64(736.9209528741912), 'mean_power_post': np.float64(886.8971224584385), 'mean_test_yaw_offset_pre': np.float64(0.0), 'mean_test_yaw_offset_post': np.float64(0.0), 'test_ref_warning_counts': 2, 'time_calculated': Timestamp('2024-09-10 14:56:31.754381+0000', tz='UTC'), 'uplift_frc': np.float64(0.02731091991401073), 'unc_one_sigma_frc': np.float64(0.005683971945706012), 't_value_one_sigma': np.float64(1.0001815211442238), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(459.33333333333337), 'pp_valid_hours_post': np.float64(522.1666666666666), 'pp_valid_hours': np.float64(981.5), 'pp_data_coverage': np.float64(0.12398938858009095), 'pp_invalid_bin_count': np.int64(8), 'uplift_noadj_frc': np.float64(0.026174088035614145), 'unc_one_sigma_noadj_frc': np.float64(0.00267454822464343), 'poweronly_uplift_frc': np.float64(0.023436708877978316), 'reversed_uplift_frc': np.float64(0.02571037263477149), 'reversal_error': np.float64(0.0022736637567931754), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011368318783965877), 'unc_one_sigma_bootstrap_frc': np.float64(0.005683971945706012), 'uplift_p5_frc': np.float64(0.03666022178439568), 'uplift_p95_frc': np.float64(0.017961618043625782), 'wind_up_version': '0.1.9', 'test_wtg': 'WT1', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(nan), 'test_max_ws_drift_pp_period': np.float64(nan), 'test_powercurve_shift': np.float64(0.07162130888044582), 'test_rpm_shift': np.float64(0.0), 'test_pitch_shift': np.float64(0.0), 'preprocess_warning_counts': 0, 'test_warning_counts': 2}\n",
-      "{'ref': 'WT2', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 1727.532358659697, 'bearing_deg': 80.01879303945559, 'ref_max_northing_error_v_reanalysis': np.float64(nan), 'ref_max_northing_error_v_wf': np.float64(nan), 'ref_max_ws_drift': np.float64(nan), 'ref_max_ws_drift_pp_period': np.float64(nan), 'ref_powercurve_shift': np.float64(-0.0072952693256543855), 'ref_rpm_shift': np.float64(0.0), 'ref_pitch_shift': np.float64(0.0), 'detrend_pre_r2_improvement': np.float64(2.1744130036194242e-05), 'detrend_post_r2_improvement': np.float64(4.176406496059215e-05), 'mean_power_pre': np.float64(736.9209528741912), 'mean_power_post': np.float64(886.8971224584385), 'mean_test_yaw_offset_pre': np.float64(0.0), 'mean_test_yaw_offset_post': np.float64(0.0), 'test_ref_warning_counts': 2, 'time_calculated': Timestamp('2024-09-10 14:56:31.754381+0000', tz='UTC'), 'uplift_frc': np.float64(0.02731091991401073), 'unc_one_sigma_frc': np.float64(0.005683971945706012), 't_value_one_sigma': np.float64(1.0001815211442238), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(459.33333333333337), 'pp_valid_hours_post': np.float64(522.1666666666666), 'pp_valid_hours': np.float64(981.5), 'pp_data_coverage': np.float64(0.12398938858009095), 'pp_invalid_bin_count': np.int64(8), 'uplift_noadj_frc': np.float64(0.026174088035614145), 'unc_one_sigma_noadj_frc': np.float64(0.00267454822464343), 'poweronly_uplift_frc': np.float64(0.023436708877978316), 'reversed_uplift_frc': np.float64(0.02571037263477149), 'reversal_error': np.float64(0.0022736637567931754), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011368318783965877), 'unc_one_sigma_bootstrap_frc': np.float64(0.005683971945706012), 'uplift_p5_frc': np.float64(0.03666022178439568), 'uplift_p95_frc': np.float64(0.017961618043625782), 'wind_up_version': '0.1.9', 'test_wtg': 'WT1', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(nan), 'test_max_ws_drift_pp_period': np.float64(nan), 'test_powercurve_shift': np.float64(0.07162130888044582), 'test_rpm_shift': np.float64(0.0), 'test_pitch_shift': np.float64(0.0), 'preprocess_warning_counts': 0, 'test_warning_counts': 2}\n",
-      "{'ref': 'WT2', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 1727.532358659697, 'bearing_deg': 80.01879303945559, 'ref_max_northing_error_v_reanalysis': np.float64(nan), 'ref_max_northing_error_v_wf': np.float64(nan), 'ref_max_ws_drift': np.float64(nan), 'ref_max_ws_drift_pp_period': np.float64(nan), 'ref_powercurve_shift': np.float64(-0.0072952693256543855), 'ref_rpm_shift': np.float64(0.0), 'ref_pitch_shift': np.float64(0.0), 'detrend_pre_r2_improvement': np.float64(2.1744130036194242e-05), 'detrend_post_r2_improvement': np.float64(4.176406496059215e-05), 'mean_power_pre': np.float64(736.9209528741912), 'mean_power_post': np.float64(886.8971224584385), 'mean_test_yaw_offset_pre': np.float64(0.0), 'mean_test_yaw_offset_post': np.float64(0.0), 'test_ref_warning_counts': 2, 'time_calculated': Timestamp('2024-09-10 14:56:31.754381+0000', tz='UTC'), 'uplift_frc': np.float64(0.02731091991401073), 'unc_one_sigma_frc': np.float64(0.005683971945706012), 't_value_one_sigma': np.float64(1.0001815211442238), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(459.33333333333337), 'pp_valid_hours_post': np.float64(522.1666666666666), 'pp_valid_hours': np.float64(981.5), 'pp_data_coverage': np.float64(0.12398938858009095), 'pp_invalid_bin_count': np.int64(8), 'uplift_noadj_frc': np.float64(0.026174088035614145), 'unc_one_sigma_noadj_frc': np.float64(0.00267454822464343), 'poweronly_uplift_frc': np.float64(0.023436708877978316), 'reversed_uplift_frc': np.float64(0.02571037263477149), 'reversal_error': np.float64(0.0022736637567931754), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011368318783965877), 'unc_one_sigma_bootstrap_frc': np.float64(0.005683971945706012), 'uplift_p5_frc': np.float64(0.03666022178439568), 'uplift_p95_frc': np.float64(0.017961618043625782), 'wind_up_version': '0.1.9', 'test_wtg': 'WT1', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(nan), 'test_max_ws_drift_pp_period': np.float64(nan), 'test_powercurve_shift': np.float64(0.07162130888044582), 'test_rpm_shift': np.float64(0.0), 'test_pitch_shift': np.float64(0.0), 'preprocess_warning_counts': 0, 'test_warning_counts': 2}\n",
-      "{'ref': 'WT2', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 1727.532358659697, 'bearing_deg': 80.01879303945559, 'ref_max_northing_error_v_reanalysis': np.float64(nan), 'ref_max_northing_error_v_wf': np.float64(nan), 'ref_max_ws_drift': np.float64(nan), 'ref_max_ws_drift_pp_period': np.float64(nan), 'ref_powercurve_shift': np.float64(-0.0072952693256543855), 'ref_rpm_shift': np.float64(0.0), 'ref_pitch_shift': np.float64(0.0), 'detrend_pre_r2_improvement': np.float64(2.1744130036194242e-05), 'detrend_post_r2_improvement': np.float64(4.176406496059215e-05), 'mean_power_pre': np.float64(736.9209528741912), 'mean_power_post': np.float64(886.8971224584385), 'mean_test_yaw_offset_pre': np.float64(0.0), 'mean_test_yaw_offset_post': np.float64(0.0), 'test_ref_warning_counts': 2, 'time_calculated': Timestamp('2024-09-10 14:56:31.754381+0000', tz='UTC'), 'uplift_frc': np.float64(0.02731091991401073), 'unc_one_sigma_frc': np.float64(0.005683971945706012), 't_value_one_sigma': np.float64(1.0001815211442238), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(459.33333333333337), 'pp_valid_hours_post': np.float64(522.1666666666666), 'pp_valid_hours': np.float64(981.5), 'pp_data_coverage': np.float64(0.12398938858009095), 'pp_invalid_bin_count': np.int64(8), 'uplift_noadj_frc': np.float64(0.026174088035614145), 'unc_one_sigma_noadj_frc': np.float64(0.00267454822464343), 'poweronly_uplift_frc': np.float64(0.023436708877978316), 'reversed_uplift_frc': np.float64(0.02571037263477149), 'reversal_error': np.float64(0.0022736637567931754), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011368318783965877), 'unc_one_sigma_bootstrap_frc': np.float64(0.005683971945706012), 'uplift_p5_frc': np.float64(0.03666022178439568), 'uplift_p95_frc': np.float64(0.017961618043625782), 'wind_up_version': '0.1.9', 'test_wtg': 'WT1', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(nan), 'test_max_ws_drift_pp_period': np.float64(nan), 'test_powercurve_shift': np.float64(0.07162130888044582), 'test_rpm_shift': np.float64(0.0), 'test_pitch_shift': np.float64(0.0), 'preprocess_warning_counts': 0, 'test_warning_counts': 2}\n",
-      "warning summary: preprocess_warning_counts=0, test_warning_counts=2, test_ref_warning_counts=2\n",
-      "warning summary: preprocess_warning_counts=0, test_warning_counts=2, test_ref_warning_counts=2\n",
-      "warning summary: preprocess_warning_counts=0, test_warning_counts=2, test_ref_warning_counts=2\n",
-      "warning summary: preprocess_warning_counts=0, test_warning_counts=2, test_ref_warning_counts=2\n",
-      "finished analysing WT1 WT2\n",
-      "\n",
-      "finished analysing WT1 WT2\n",
-      "\n",
-      "finished analysing WT1 WT2\n",
-      "\n",
-      "finished analysing WT1 WT2\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "results_per_test_ref_df = run_wind_up_analysis(assessment_inputs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "0c7901b1-d3dd-4571-b2b7-3849494d058b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>wind_up_version</th>\n",
-       "      <th>time_calculated</th>\n",
-       "      <th>preprocess_warning_counts</th>\n",
-       "      <th>test_warning_counts</th>\n",
-       "      <th>test_ref_warning_counts</th>\n",
-       "      <th>test_wtg</th>\n",
-       "      <th>test_pw_col</th>\n",
-       "      <th>ref</th>\n",
-       "      <th>ref_ws_col</th>\n",
-       "      <th>uplift_frc</th>\n",
-       "      <th>...</th>\n",
-       "      <th>poweronly_uplift_frc</th>\n",
-       "      <th>reversed_uplift_frc</th>\n",
-       "      <th>reversal_error</th>\n",
-       "      <th>lt_wtg_hours_raw</th>\n",
-       "      <th>lt_wtg_hours_filt</th>\n",
-       "      <th>test_max_ws_drift</th>\n",
-       "      <th>test_max_ws_drift_pp_period</th>\n",
-       "      <th>test_powercurve_shift</th>\n",
-       "      <th>test_rpm_shift</th>\n",
-       "      <th>test_pitch_shift</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.1.9</td>\n",
-       "      <td>2024-09-10 14:56:31.754381+00:00</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>WT1</td>\n",
-       "      <td>test_pw_clipped</td>\n",
-       "      <td>WT2</td>\n",
-       "      <td>ref_ws_est_blend</td>\n",
-       "      <td>0.027311</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.023437</td>\n",
-       "      <td>0.02571</td>\n",
-       "      <td>0.002274</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>0.071621</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>1 rows × 49 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  wind_up_version                  time_calculated  preprocess_warning_counts  \\\n",
-       "0           0.1.9 2024-09-10 14:56:31.754381+00:00                          0   \n",
-       "\n",
-       "   test_warning_counts  test_ref_warning_counts test_wtg      test_pw_col  \\\n",
-       "0                    2                        2      WT1  test_pw_clipped   \n",
-       "\n",
-       "   ref        ref_ws_col  uplift_frc  ...  poweronly_uplift_frc  \\\n",
-       "0  WT2  ref_ws_est_blend    0.027311  ...              0.023437   \n",
-       "\n",
-       "   reversed_uplift_frc  reversal_error  lt_wtg_hours_raw  lt_wtg_hours_filt  \\\n",
-       "0              0.02571        0.002274                 0                  0   \n",
-       "\n",
-       "   test_max_ws_drift  test_max_ws_drift_pp_period  test_powercurve_shift  \\\n",
-       "0                NaN                          NaN               0.071621   \n",
-       "\n",
-       "   test_rpm_shift  test_pitch_shift  \n",
-       "0             0.0               0.0  \n",
-       "\n",
-       "[1 rows x 49 columns]"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "results_per_test_ref_df"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/examples/wedowind_example.py b/examples/wedowind_example.py
index 16f69f5..ba4ea16 100644
--- a/examples/wedowind_example.py
+++ b/examples/wedowind_example.py
@@ -1,15 +1,18 @@
 # example based on https://relight.cloud/doc/turbine-upgrade-dataset-9zw1vl/turbineperformance
 
+import datetime as dt
 import logging
 import math
 import sys
 import zipfile
 from enum import Enum
 from pathlib import Path
+from typing import NamedTuple
 
 import numpy as np
 import pandas as pd
 from matplotlib import pyplot as plt
+from pydantic import BaseModel, Field
 
 from wind_up.constants import OUTPUT_DIR, PROJECTROOT_DIR, TIMESTAMP_COL, DataColumns
 from wind_up.interface import AssessmentInputs
@@ -21,7 +24,7 @@
 sys.path.append(str(PROJECTROOT_DIR))
 from examples.helpers import download_zenodo_data, setup_logger
 
-CACHE_DIR = PROJECTROOT_DIR / "cache" / "wedowind_example_data"
+CACHE_DIR = PROJECTROOT_DIR / "cache"
 ASSESSMENT_NAME = "wedowind_example"
 ANALYSIS_OUTPUT_DIR = OUTPUT_DIR / ASSESSMENT_NAME
 ANALYSIS_OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
@@ -36,23 +39,34 @@
 class WeDoWindScadaColumns(Enum):
     Y_CTRL_NORM = "y_ctrl(normalized)"
     Y_TEST_NORM = "y_test(normalized)"
+    UPGRADE_STATUS = "upgradestatus"
+    WIND_SPEED = "V"
+    WIND_DIRECTION = "D"
 
 
-class TurbineNames(Enum):
+class WeDoWindTurbineNames(Enum):
     REF = "Ref"
     TEST = "Test"
 
 
-class MetadataColumns(Enum):
-    NAME = "Name"
-    LATITUDE = "Latitude"
-    LONGITUDE = "Longitude"
+class KeyDates(NamedTuple):
+    analysis_first_dt_utc_start: dt.datetime
+    upgrade_first_dt_utc_start: dt.datetime
+    analysis_last_dt_utc_start: dt.datetime
+    lt_first_dt_utc_start: dt.datetime
+    lt_last_dt_utc_start: dt.datetime
+    detrend_first_dt_utc_start: dt.datetime
+    detrend_last_dt_utc_start: dt.datetime
+    pre_first_dt_utc_start: dt.datetime
+    pre_last_dt_utc_start: dt.datetime
+    post_first_dt_utc_start: dt.datetime
+    post_last_dt_utc_start: dt.datetime
 
 
-class WDWScadaUnpacker:
-    def __init__(self, scada_file_name: str, wdw_zip_file_path: Path = CACHE_DIR / ZIP_FILENAME) -> None:
+class WeDoWindScadaUnpacker:
+    def __init__(self, scada_file_name: str, wedowind_zip_file_path: Path = CACHE_DIR / ZIP_FILENAME) -> None:
         self.scada_file_name = scada_file_name
-        self.wdw_zip_file_path = wdw_zip_file_path
+        self.wedowind_zip_file_path = wedowind_zip_file_path
         self.scada_df = None
 
     def unpack(self, rated_power_kw: float) -> pd.DataFrame:
@@ -66,17 +80,19 @@ def unpack(self, rated_power_kw: float) -> pd.DataFrame:
         return self.scada_df
 
     def _read_raw_df(self) -> pd.DataFrame:
-        with zipfile.ZipFile(self.wdw_zip_file_path) as zf:
-            return pd.read_csv(zf.open(self.scada_file_name), parse_dates=[1], index_col=0).drop(
+        with zipfile.ZipFile(self.wedowind_zip_file_path) as zf:
+            raw_df = pd.read_csv(zf.open(self.scada_file_name), parse_dates=[1], index_col=0).drop(
                 columns=["VcosD", "VsinD"]
             )
+        raw_df.columns = raw_df.columns.str.replace(" ", "")
+        return raw_df
 
     @staticmethod
     def _format_scada_df(scada_df: pd.DataFrame, rated_power_kw: float) -> pd.DataFrame:
         scada_df[DataColumns.active_power_mean] = scada_df["normalized_power"] * rated_power_kw
         # map some mast data to the turbine for convenience
-        scada_df[DataColumns.wind_speed_mean] = scada_df["V"]
-        scada_df[DataColumns.yaw_angle_mean] = scada_df["D"]
+        scada_df[DataColumns.wind_speed_mean] = scada_df[WeDoWindScadaColumns.WIND_SPEED.value]
+        scada_df[DataColumns.yaw_angle_mean] = scada_df[WeDoWindScadaColumns.WIND_DIRECTION.value]
         # placeholder values for other required columns
         scada_df[DataColumns.pitch_angle_mean] = 0
         scada_df[DataColumns.gen_rpm_mean] = 1000
@@ -93,7 +109,7 @@ def _construct_scada_df_test(scada_df_raw: pd.DataFrame) -> pd.DataFrame:
         return (
             scada_df_raw.drop(columns=[WeDoWindScadaColumns.Y_CTRL_NORM.value])
             .copy()
-            .assign(TurbineName=TurbineNames.TEST.value)
+            .assign(TurbineName=WeDoWindTurbineNames.TEST.value)
             .rename(columns={WeDoWindScadaColumns.Y_TEST_NORM.value: "normalized_power"})
         )
 
@@ -102,15 +118,27 @@ def _construct_scada_df_ref(scada_df_raw: pd.DataFrame) -> pd.DataFrame:
         return (
             scada_df_raw.drop(columns=[WeDoWindScadaColumns.Y_TEST_NORM.value])
             .copy()
-            .assign(TurbineName=TurbineNames.REF.value)
+            .assign(TurbineName=WeDoWindTurbineNames.REF.value)
             .rename(columns={WeDoWindScadaColumns.Y_CTRL_NORM.value: "normalized_power"})
         )
 
 
-def make_wdw_metadata_df() -> pd.DataFrame:
+class WeDoWindAnalysisConf(BaseModel):
+    scada_file_name: str = Field(description="e.g. 'Turbine Upgrade Dataset(Pitch Angle Pair).csv'")
+    wd_ranges_polluted_with_mast_shadow: list[tuple[int, int]] = Field(description="e.g. [(250, 315)]")
+    clip_rated_power_pp: bool
+
+
+def download_wedowind_data_from_zenodo() -> None:
+    logger.info("Downloading example data from Zenodo")
+    # https://zenodo.org/records/5516556
+    download_zenodo_data(record_id="5516556", output_dir=CACHE_DIR, filenames={ZIP_FILENAME})
+
+
+def create_fake_wedowind_metadata_df() -> pd.DataFrame:
     coords_df = pd.DataFrame(
         {
-            MetadataColumns.NAME.value: ["WT1", "WT2", "WT3", "WT4", "MAST1", "MAST2"],
+            "Name": ["WT1", "WT2", "WT3", "WT4", "MAST1", "MAST2"],
             "X": [500, 2200, 9836, 7571, 0, 9571],
             "Y": [9136, 9436, 0, 2050, 9836, 50],
         }
@@ -118,21 +146,94 @@ def make_wdw_metadata_df() -> pd.DataFrame:
     assumed_wf_lat = 40
     assumed_wf_lon = -89
     m_per_deglat = 40_075_000 / 360
-    coords_df[MetadataColumns.LATITUDE.value] = assumed_wf_lat + (coords_df["Y"] - coords_df["Y"].mean()) / m_per_deglat
-    coords_df[MetadataColumns.LONGITUDE.value] = assumed_wf_lon + (coords_df["X"] - coords_df["X"].mean()) / (
+    coords_df["Latitude"] = assumed_wf_lat + (coords_df["Y"] - coords_df["Y"].mean()) / m_per_deglat
+    coords_df["Longitude"] = assumed_wf_lon + (coords_df["X"] - coords_df["X"].mean()) / (
         m_per_deglat * math.cos(assumed_wf_lat * math.pi / 180)
     )
-    return coords_df.loc[
-        :, [MetadataColumns.NAME.value, MetadataColumns.LATITUDE.value, MetadataColumns.LONGITUDE.value]
-    ].assign(TimeZone="UTC", TimeSpanMinutes=10, TimeFormat="Start")
+    return coords_df.loc[:, ["Name", "Latitude", "Longitude"]].assign(
+        TimeZone="UTC", TimeSpanMinutes=10, TimeFormat="Start"
+    )
+
+
+def create_fake_wedowind_reanalysis_dataset(scada_df_index_minimum: dt.datetime) -> ReanalysisDataset:
+    rng = np.random.default_rng(0)
+    rows = 100
+    return ReanalysisDataset(
+        id="dummy_reanalysis_data",
+        data=pd.DataFrame(
+            data={
+                "100_m_hws_mean_mps": rng.uniform(5, 10, rows),
+                "100_m_hwd_mean_deg-n_true": rng.uniform(0, 360, rows),
+            },
+            index=pd.DatetimeIndex(pd.date_range(start=scada_df_index_minimum, periods=rows, freq="h", tz="UTC")),
+        ),
+    )
 
 
-def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, rotor_diameter_m: int) -> Path:
+def establish_wedowind_key_dates(scada_df: pd.DataFrame) -> KeyDates:
     """
-    It is unclear how the scada data is related to the metadata so look for wakes in the data
+    Extracts important dates from the SCADA data. These dates may then be used in the WindUpConfig.
+
+    Args:
+        scada_df:
 
-    Returns: None (but displays plots)
+    Returns: tuple of dates that may be passed to the WindUpConfig
 
+    """
+    analysis_first_dt_utc_start = scada_df.index.min()
+    upgrade_first_dt_utc_start = scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.min()
+    analysis_last_dt_utc_start = scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.max()
+    lt_first_dt_utc_start = scada_df.index.min()
+    lt_last_dt_utc_start = (
+        scada_df.index.min()
+        + (
+            scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.max()
+            - scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.min()
+        )
+        - pd.Timedelta(minutes=10)
+    )
+    detrend_first_dt_utc_start = scada_df.index.min()
+    detrend_last_dt_utc_start = (
+        scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.min()
+        - pd.DateOffset(weeks=1)
+        - pd.Timedelta(minutes=10)
+    )
+    pre_first_dt_utc_start = scada_df.index.min()
+    pre_last_dt_utc_start = (
+        scada_df.index.min()
+        + (
+            scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.max()
+            - scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.min()
+        )
+        - pd.Timedelta(minutes=10)
+    )
+    post_first_dt_utc_start = scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.min()
+    post_last_dt_utc_start = scada_df[scada_df[WeDoWindScadaColumns.UPGRADE_STATUS.value] > 0].index.max()
+
+    return KeyDates(
+        analysis_first_dt_utc_start=analysis_first_dt_utc_start,
+        upgrade_first_dt_utc_start=upgrade_first_dt_utc_start,
+        analysis_last_dt_utc_start=analysis_last_dt_utc_start,
+        lt_first_dt_utc_start=lt_first_dt_utc_start,
+        lt_last_dt_utc_start=lt_last_dt_utc_start,
+        detrend_first_dt_utc_start=detrend_first_dt_utc_start,
+        detrend_last_dt_utc_start=detrend_last_dt_utc_start,
+        pre_first_dt_utc_start=pre_first_dt_utc_start,
+        pre_last_dt_utc_start=pre_last_dt_utc_start,
+        post_first_dt_utc_start=post_first_dt_utc_start,
+        post_last_dt_utc_start=post_last_dt_utc_start,
+    )
+
+
+def generate_custom_exploratory_plots(
+    scada_df: pd.DataFrame, assumed_rated_power_kw: float, rotor_diameter_m: int
+) -> Path:
+    """
+    These custom plots are to help with SCADA data exploration.
+    It was created because it was unclear how the SCADA data is related to the metadata so helped in looking for wakes
+    in the data.
+
+    Returns: path to directory containing the plots
     """
     custom_plots_dir_root = ANALYSIS_OUTPUT_DIR / "custom_plots"
     custom_plots_dir_timeseries = custom_plots_dir_root / "timeseries"
@@ -140,7 +241,7 @@ def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, roto
     custom_plots_dir_root.mkdir(exist_ok=True, parents=True)
     custom_plots_dir_timeseries.mkdir(exist_ok=True)
 
-    for name, df in scada_df.groupby("TurbineName"):
+    for name, df in scada_df.groupby(DataColumns.turbine_name):
         for col in df.columns:
             plt.figure()
             plt.scatter(df.index, df[col], s=1)
@@ -156,28 +257,32 @@ def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, roto
     region2_df = scada_df[(scada_df["normalized_power"] > 0.2) & (scada_df["normalized_power"] < 0.8)]  # noqa PLR2004
 
     binned_by_turbine = {}
-    for name, df in region2_df.groupby("TurbineName"):
+    for name, df in region2_df.groupby(DataColumns.turbine_name):
         if name == "Mast":
             continue
         # find mean normalized_power and V binned by D
         _df = df.copy()
-        _df["D_bin"] = pd.cut(_df["D"], bins=range(0, 361, 5))
-        binned = _df.groupby("D_bin", observed=False)[["D", "normalized_power", "V"]].mean()
+        _df[f"{WeDoWindScadaColumns.WIND_DIRECTION.value}_bin"] = pd.cut(
+            _df[WeDoWindScadaColumns.WIND_DIRECTION.value], bins=range(0, 361, 5)
+        )
+        binned = _df.groupby(f"{WeDoWindScadaColumns.WIND_DIRECTION.value}_bin", observed=False)[
+            [WeDoWindScadaColumns.WIND_DIRECTION.value, "normalized_power", "V"]
+        ].mean()
         binned_by_turbine[name] = binned
         plt.figure()
         plt.plot(
-            binned["D"],
+            binned[WeDoWindScadaColumns.WIND_DIRECTION.value],
             calc_cp(
                 power_kw=binned["normalized_power"] * assumed_rated_power_kw,
-                ws_ms=binned["V"],
+                ws_ms=binned[WeDoWindScadaColumns.WIND_SPEED.value],
                 air_density_kgpm3=1.2,
                 rotor_diameter_m=rotor_diameter_m,
             ),
             marker=".",
         )
-        title = f"{name} Cp vs D"
+        title = f"{name} Cp vs {WeDoWindScadaColumns.WIND_DIRECTION.value}"
         plt.title(title)
-        plt.xlabel("D")
+        plt.xlabel(WeDoWindScadaColumns.WIND_DIRECTION.value)
         plt.ylabel("Cp")
         plt.xticks(rotation=90)
         plt.grid()
@@ -188,10 +293,10 @@ def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, roto
     plt.figure()
     for name, binned in binned_by_turbine.items():
         plt.plot(
-            binned["D"],
+            binned[WeDoWindScadaColumns.WIND_DIRECTION.value],
             calc_cp(
                 power_kw=binned["normalized_power"] * assumed_rated_power_kw,
-                ws_ms=binned["V"],
+                ws_ms=binned[WeDoWindScadaColumns.WIND_SPEED.value],
                 air_density_kgpm3=1.2,
                 rotor_diameter_m=rotor_diameter_m,
             ),
@@ -199,9 +304,9 @@ def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, roto
             marker=".",
         )
     plt.ylim(0.2, 0.7)
-    title = "Cp vs D"
+    title = f"Cp vs {WeDoWindScadaColumns.WIND_DIRECTION.value}"
     plt.title(title)
-    plt.xlabel("D")
+    plt.xlabel(WeDoWindScadaColumns.WIND_DIRECTION.value)
     plt.ylabel("Cp")
     plt.xticks(rotation=90)
     plt.grid()
@@ -213,49 +318,59 @@ def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, roto
     return custom_plots_dir_root
 
 
-def download_wdw_data_from_zenodo() -> None:
-    logger.info("Downloading example data from Zenodo")
-    download_zenodo_data(record_id="5516556", output_dir=CACHE_DIR, filenames={ZIP_FILENAME})
-    download_zenodo_data(
-        record_id="5516552", output_dir=CACHE_DIR, filenames={"Inland_Offshore_Wind_Farm_Dataset1.zip"}
-    )
-
-
 def main() -> None:
-    download_wdw_data_from_zenodo()
+    generate_custom_plots = False  # whether to create exploratory scada data plots
+    analysis_name = "Pitch Angle"  # "Pitch Angle" or "Vortex Generator"
+
+    download_wedowind_data_from_zenodo()
 
     assumed_rated_power_kw = 1500
     rotor_diameter_m = 80
     cutout_ws_mps = 20
-    scada_file_name = "Turbine Upgrade Dataset(Pitch Angle Pair).csv"  # or Turbine Upgrade Dataset(VG Pair).csv
 
-    logger.info("Preprocessing turbine SCADA data")
-    scada_df = WDWScadaUnpacker(scada_file_name=scada_file_name).unpack(rated_power_kw=assumed_rated_power_kw)
-    metadata_df = make_wdw_metadata_df()
+    analysis_specific_config = {
+        "Pitch Angle": WeDoWindAnalysisConf(
+            scada_file_name="Turbine Upgrade Dataset(Pitch Angle Pair).csv",
+            wd_ranges_polluted_with_mast_shadow=[(70, 150)],  # determined by inspecting the custom plots
+            clip_rated_power_pp=False,
+        ),
+        "Vortex Generator": WeDoWindAnalysisConf(
+            scada_file_name="Turbine Upgrade Dataset(VG Pair).csv",
+            wd_ranges_polluted_with_mast_shadow=[(30, 115), (250, 315)],  # determined by inspecting the custom plots
+            clip_rated_power_pp=True,  # Vortex Generators are not expected to increase rated power
+        ),
+    }
+    if analysis_name not in analysis_specific_config:
+        msg = f"analysis_name must be one of {list(analysis_specific_config.keys())}"
+        raise ValueError(msg)
 
-    run_custom_plots(
-        scada_df=scada_df, assumed_rated_power_kw=assumed_rated_power_kw, rotor_diameter_m=rotor_diameter_m
+    analysis_conf = analysis_specific_config[analysis_name]
+
+    logger.info("Preprocessing turbine SCADA data")
+    scada_df = WeDoWindScadaUnpacker(scada_file_name=analysis_conf.scada_file_name).unpack(
+        rated_power_kw=assumed_rated_power_kw
     )
+    metadata_df = create_fake_wedowind_metadata_df()
+
+    if generate_custom_plots:
+        generate_custom_exploratory_plots(
+            scada_df=scada_df, assumed_rated_power_kw=assumed_rated_power_kw, rotor_diameter_m=rotor_diameter_m
+        )
 
     # based on the above I think the objects are MAST1, test=WT1 and ref=WT2
     scada_df = scada_df.replace(
-        {"TurbineName": {TurbineNames.TEST.value: "WT1", TurbineNames.REF.value: "WT2", "Mast": "MAST1"}}
+        {
+            DataColumns.turbine_name: {
+                WeDoWindTurbineNames.TEST.value: "WT1",
+                WeDoWindTurbineNames.REF.value: "WT2",
+                "Mast": "MAST1",
+            }
+        }
     )
     # drop everything except the turbines from the metadata
     metadata_df = metadata_df[metadata_df["Name"].isin(["WT1", "WT2"])]
-
-    rng = np.random.default_rng(0)
-    rows = 100
-    reanalysis_dataset = ReanalysisDataset(
-        id="dummy_reanalysis_data",
-        data=pd.DataFrame(
-            data={
-                "100_m_hws_mean_mps": rng.uniform(5, 10, rows),
-                "100_m_hwd_mean_deg-n_true": rng.uniform(0, 360, rows),
-            },
-            index=pd.DatetimeIndex(pd.date_range(start=scada_df.index.min(), periods=rows, freq="h", tz="UTC")),
-        ),
-    )
+    # Reanalysis data is required by WindUp, therefore create a fake reanalysis object
+    reanalysis_dataset = create_fake_wedowind_reanalysis_dataset(scada_df_index_minimum=scada_df.index.min())
 
     # Construct wind-up Configurations
 
@@ -274,56 +389,59 @@ def main() -> None:
         for x in ["WT1", "WT2"]
     }
 
+    key_dates = establish_wedowind_key_dates(scada_df=scada_df)
+
     cfg = WindUpConfig(
-        assessment_name=ASSESSMENT_NAME,
-        ref_wd_filter=[150, 240],  # apparent wake free sector
+        assessment_name=analysis_name,
+        ref_wd_filter=[150, 240],  # apparent wake free sector (determined by inspecting the custom plots)
         use_lt_distribution=False,
-        out_dir=OUTPUT_DIR / ASSESSMENT_NAME,
+        out_dir=OUTPUT_DIR / analysis_name,
         test_wtgs=[wtg_map[x] for x in ["WT1"]],
         ref_wtgs=[wtg_map[x] for x in ["WT2"]],
-        analysis_first_dt_utc_start=scada_df.index.min(),
-        upgrade_first_dt_utc_start=scada_df[scada_df["upgrade status"] > 0].index.min(),
-        analysis_last_dt_utc_start=scada_df[scada_df["upgrade status"] > 0].index.max(),
         years_offset_for_pre_period=1,
-        lt_first_dt_utc_start=scada_df.index.min(),
-        lt_last_dt_utc_start=scada_df.index.min()
-        + (scada_df[scada_df["upgrade status"] > 0].index.max() - scada_df[scada_df["upgrade status"] > 0].index.min())
-        - pd.Timedelta(minutes=10),
-        detrend_first_dt_utc_start=scada_df.index.min(),
-        detrend_last_dt_utc_start=scada_df[scada_df["upgrade status"] > 0].index.min()
-        - pd.DateOffset(weeks=1)
-        - pd.Timedelta(minutes=10),
         years_for_lt_distribution=1,
         years_for_detrend=1,
         ws_bin_width=1.0,
-        asset={
-            "name": "Mystery Wind Farm",
-            "wtgs": list(wtg_map.values()),
-        },
-        missing_scada_data_fields=["YawAngleMin", "YawAngleMax"],
+        analysis_first_dt_utc_start=key_dates.analysis_first_dt_utc_start,
+        upgrade_first_dt_utc_start=key_dates.upgrade_first_dt_utc_start,
+        analysis_last_dt_utc_start=key_dates.analysis_last_dt_utc_start,
+        lt_first_dt_utc_start=key_dates.lt_first_dt_utc_start,
+        lt_last_dt_utc_start=key_dates.lt_last_dt_utc_start,
+        detrend_first_dt_utc_start=key_dates.detrend_first_dt_utc_start,
+        detrend_last_dt_utc_start=key_dates.detrend_last_dt_utc_start,
+        asset={"name": "Mystery Wind Farm", "wtgs": list(wtg_map.values())},
+        missing_scada_data_fields=[DataColumns.yaw_angle_min, DataColumns.yaw_angle_max],
         prepost={
-            "pre_first_dt_utc_start": scada_df.index.min(),
-            "pre_last_dt_utc_start": scada_df.index.min()
-            + (
-                scada_df[scada_df["upgrade status"] > 0].index.max()
-                - scada_df[scada_df["upgrade status"] > 0].index.min()
-            )
-            - pd.Timedelta(minutes=10),
-            "post_first_dt_utc_start": scada_df[scada_df["upgrade status"] > 0].index.min(),
-            "post_last_dt_utc_start": scada_df[scada_df["upgrade status"] > 0].index.max(),
+            "pre_first_dt_utc_start": key_dates.pre_first_dt_utc_start,
+            "pre_last_dt_utc_start": key_dates.pre_last_dt_utc_start,
+            "post_first_dt_utc_start": key_dates.post_first_dt_utc_start,
+            "post_last_dt_utc_start": key_dates.post_last_dt_utc_start,
         },
         optimize_northing_corrections=False,
+        clip_rated_power_pp=analysis_conf.clip_rated_power_pp,
     )
 
     plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / "plots")
 
+    wd_ranges_to_exclude = analysis_conf.wd_ranges_polluted_with_mast_shadow
+    scada_df_for_assessment = scada_df.copy()
+    for wdr in wd_ranges_to_exclude:
+        logger.info("Filtering out wind directions between %s", wdr)
+        mask = (scada_df_for_assessment[DataColumns.yaw_angle_mean] >= wdr[0]) & (
+            scada_df_for_assessment[DataColumns.yaw_angle_mean] <= wdr[1]
+        )
+        scada_df_for_assessment = scada_df_for_assessment.loc[~mask, :]
+
+    cache_assessment = CACHE_DIR / analysis_name
+    cache_assessment.mkdir(parents=True, exist_ok=True)
+
     assessment_inputs = AssessmentInputs.from_cfg(
         cfg=cfg,
         plot_cfg=plot_cfg,
-        scada_df=scada_df[(scada_df["D"] < 70) | (scada_df["D"] > 150)],  # noqa PLR2004 filter out apparent mast waked sector
+        scada_df=scada_df_for_assessment,  # noqa PLR2004 filter out apparent mast waked sector
         metadata_df=metadata_df,
         reanalysis_datasets=[reanalysis_dataset],
-        cache_dir=CACHE_DIR,
+        cache_dir=cache_assessment,
     )
 
     # Run Analysis
diff --git a/wind_up/models.py b/wind_up/models.py
index 54eefd5..4f3af82 100644
--- a/wind_up/models.py
+++ b/wind_up/models.py
@@ -269,6 +269,7 @@ class WindUpConfig(BaseModel):
     )
     toggle: Toggle | None = None
     prepost: PrePost | None = None
+    clip_rated_power_pp: bool = Field(default=True, description="Clip rated power in Pre-Post analysis")
 
     @model_validator(mode="after")
     def check_years_offset_for_pre_period(self: WindUpConfig) -> WindUpConfig:
diff --git a/wind_up/plots/scada_funcs_plots.py b/wind_up/plots/scada_funcs_plots.py
index 4ca01ff..8f79750 100644
--- a/wind_up/plots/scada_funcs_plots.py
+++ b/wind_up/plots/scada_funcs_plots.py
@@ -20,12 +20,12 @@
 
 def plot_data_coverage_heatmap(df: pd.DataFrame, plot_title: str, plot_cfg: PlotConfig) -> None:
     # calculate data coverage per turbine
-    covdf = df.groupby("TurbineName", observed=False).agg(
-        power=pd.NamedAgg(column="ActivePowerMean", aggfunc=lambda x: x.count() / x.size),
-        windspeed=pd.NamedAgg(column="WindSpeedMean", aggfunc=lambda x: x.count() / x.size),
-        yaw=pd.NamedAgg(column="YawAngleMean", aggfunc=lambda x: x.count() / x.size),
-        rpm=pd.NamedAgg(column="GenRpmMean", aggfunc=lambda x: x.count() / x.size),
-        pitch=pd.NamedAgg(column="PitchAngleMean", aggfunc=lambda x: x.count() / x.size),
+    covdf = df.groupby(DataColumns.turbine_name, observed=False).agg(
+        power=pd.NamedAgg(column=DataColumns.active_power_mean, aggfunc=lambda x: x.count() / x.size),
+        windspeed=pd.NamedAgg(column=DataColumns.wind_speed_mean, aggfunc=lambda x: x.count() / x.size),
+        yaw=pd.NamedAgg(column=DataColumns.yaw_angle_mean, aggfunc=lambda x: x.count() / x.size),
+        rpm=pd.NamedAgg(column=DataColumns.gen_rpm_mean, aggfunc=lambda x: x.count() / x.size),
+        pitch=pd.NamedAgg(column=DataColumns.pitch_angle_mean, aggfunc=lambda x: x.count() / x.size),
     )
 
     plt.figure()
@@ -40,9 +40,9 @@ def plot_data_coverage_heatmap(df: pd.DataFrame, plot_title: str, plot_cfg: Plot
 
 def calc_cf_by_turbine(scada_df: pd.DataFrame, cfg: WindUpConfig) -> pd.DataFrame:
     rows_per_hour = 3600 / cfg.timebase_s
-    cf_df = scada_df.groupby("TurbineName", observed=False).agg(
-        hours=pd.NamedAgg(column="TurbineName", aggfunc=lambda x: x.count() / rows_per_hour),
-        MWh=pd.NamedAgg(column="ActivePowerMean", aggfunc=lambda x: x.sum() / rows_per_hour / 1000),
+    cf_df = scada_df.groupby(DataColumns.turbine_name, observed=False).agg(
+        hours=pd.NamedAgg(column=DataColumns.turbine_name, aggfunc=lambda x: x.count() / rows_per_hour),
+        MWh=pd.NamedAgg(column=DataColumns.active_power_mean, aggfunc=lambda x: x.sum() / rows_per_hour / 1000),
     )
     for i, rp in strict_zip(
         [x.name for x in cfg.asset.wtgs],
@@ -105,11 +105,11 @@ def plot_ops_curves_per_ttype(cfg: WindUpConfig, df: pd.DataFrame, title_end: st
 
 def plot_ops_curves_one_ttype_or_wtg(df: pd.DataFrame, ttype_or_wtg: str, title_end: str, plot_cfg: PlotConfig) -> None:
     plt.figure()
-    plt.scatter(df["WindSpeedMean"], df["ActivePowerMean"], s=SCATTER_S, alpha=SCATTER_ALPHA)
+    plt.scatter(df[DataColumns.wind_speed_mean], df[DataColumns.active_power_mean], s=SCATTER_S, alpha=SCATTER_ALPHA)
     plot_title = f"{ttype_or_wtg} power curve {title_end}"
     plt.title(plot_title)
-    plt.xlabel("WindSpeedMean [m/s]")
-    plt.ylabel("ActivePowerMean [kW]")
+    plt.xlabel(f"{DataColumns.wind_speed_mean} [m/s]")
+    plt.ylabel(f"{DataColumns.active_power_mean} [kW]")
     plt.grid()
     if plot_cfg.show_plots:
         plt.show()
@@ -122,27 +122,27 @@ def plot_ops_curves_one_ttype_or_wtg(df: pd.DataFrame, ttype_or_wtg: str, title_
     # plot rpm and pitch vs power and wind speed in a 2 by 2 grid
     plt.figure(figsize=(12, 8))
     plt.subplot(2, 2, 1)
-    plt.scatter(df["ActivePowerMean"], df["GenRpmMean"], s=SCATTER_S, alpha=SCATTER_ALPHA)
-    plt.xlabel("ActivePowerMean [kW]")
-    plt.ylabel("GenRpmMean [RPM]")
+    plt.scatter(df[DataColumns.active_power_mean], df[DataColumns.gen_rpm_mean], s=SCATTER_S, alpha=SCATTER_ALPHA)
+    plt.xlabel(f"{DataColumns.active_power_mean} [kW]")
+    plt.ylabel(f"{DataColumns.gen_rpm_mean} [RPM]")
     plt.grid()
 
     plt.subplot(2, 2, 2)
-    plt.scatter(df["WindSpeedMean"], df["GenRpmMean"], s=SCATTER_S, alpha=SCATTER_ALPHA)
-    plt.xlabel("WindSpeedMean [m/s]")
-    plt.ylabel("GenRpmMean [RPM]")
+    plt.scatter(df[DataColumns.wind_speed_mean], df[DataColumns.gen_rpm_mean], s=SCATTER_S, alpha=SCATTER_ALPHA)
+    plt.xlabel(f"{DataColumns.wind_speed_mean} [m/s]")
+    plt.ylabel(f"{DataColumns.gen_rpm_mean} [RPM]")
     plt.grid()
 
     plt.subplot(2, 2, 3)
-    plt.scatter(df["ActivePowerMean"], df["PitchAngleMean"], s=SCATTER_S, alpha=SCATTER_ALPHA)
-    plt.xlabel("ActivePowerMean [kW]")
-    plt.ylabel("PitchAngleMean [deg]")
+    plt.scatter(df[DataColumns.active_power_mean], df[DataColumns.pitch_angle_mean], s=SCATTER_S, alpha=SCATTER_ALPHA)
+    plt.xlabel(f"{DataColumns.active_power_mean} [kW]")
+    plt.ylabel(f"{DataColumns.pitch_angle_mean} [deg]")
     plt.grid()
 
     plt.subplot(2, 2, 4)
-    plt.scatter(df["WindSpeedMean"], df["PitchAngleMean"], s=SCATTER_S, alpha=SCATTER_ALPHA)
-    plt.xlabel("WindSpeedMean [m/s]")
-    plt.ylabel("PitchAngleMean [deg]")
+    plt.scatter(df[DataColumns.wind_speed_mean], df[DataColumns.pitch_angle_mean], s=SCATTER_S, alpha=SCATTER_ALPHA)
+    plt.xlabel(f"{DataColumns.wind_speed_mean} [m/s]")
+    plt.ylabel(f"{DataColumns.pitch_angle_mean} [deg]")
     plt.grid()
 
     plot_title = f"{ttype_or_wtg} ops curves, {title_end}"
@@ -362,39 +362,39 @@ def plot_filter_rpm_and_pt_curve_one_ttype_or_wtg(
 ) -> None:
     plt.figure(figsize=(12, 8))
     plt.subplot(2, 2, 1)
-    plt.scatter(df["pw_clipped"], df["GenRpmMean"], s=SCATTER_S, alpha=SCATTER_ALPHA)
+    plt.scatter(df["pw_clipped"], df[DataColumns.gen_rpm_mean], s=SCATTER_S, alpha=SCATTER_ALPHA)
     x = [rpm_v_pw_curve.index[0].left] + [x.mid for x in rpm_v_pw_curve.index] + [rpm_v_pw_curve.index[-1].right]
     y = [rpm_v_pw_curve["y_limit"].iloc[0], *list(rpm_v_pw_curve["y_limit"]), rpm_v_pw_curve["y_limit"].iloc[-1]]
     plt.plot(x, y, color="red")
     plt.xlabel("pw_clipped [kW]")
-    plt.ylabel("GenRpmMean [deg]")
+    plt.ylabel(f"{DataColumns.gen_rpm_mean} [deg]")
     plt.grid()
 
     plt.subplot(2, 2, 2)
-    plt.scatter(df["WindSpeedMean"], df["GenRpmMean"], s=SCATTER_S, alpha=SCATTER_ALPHA)
+    plt.scatter(df[DataColumns.wind_speed_mean], df[DataColumns.gen_rpm_mean], s=SCATTER_S, alpha=SCATTER_ALPHA)
     x = [rpm_v_ws_curve.index[0].left] + [x.mid for x in rpm_v_ws_curve.index] + [rpm_v_ws_curve.index[-1].right]
     y = [rpm_v_ws_curve["y_limit"].iloc[0], *list(rpm_v_ws_curve["y_limit"]), rpm_v_ws_curve["y_limit"].iloc[-1]]
     plt.plot(x, y, color="red")
-    plt.xlabel("WindSpeedMean [m/s]")
-    plt.ylabel("GenRpmMean [deg]")
+    plt.xlabel(f"{DataColumns.wind_speed_mean} [m/s]")
+    plt.ylabel(f"{DataColumns.gen_rpm_mean} [deg]")
     plt.grid()
 
     plt.subplot(2, 2, 3)
-    plt.scatter(df["pw_clipped"], df["PitchAngleMean"], s=SCATTER_S, alpha=SCATTER_ALPHA)
+    plt.scatter(df["pw_clipped"], df[DataColumns.pitch_angle_mean], s=SCATTER_S, alpha=SCATTER_ALPHA)
     x = [pt_v_pw_curve.index[0].left] + [x.mid for x in pt_v_pw_curve.index] + [pt_v_pw_curve.index[-1].right]
     y = [pt_v_pw_curve["y_limit"].iloc[0], *list(pt_v_pw_curve["y_limit"]), pt_v_pw_curve["y_limit"].iloc[-1]]
     plt.plot(x, y, color="red")
     plt.xlabel("pw_clipped [kW]")
-    plt.ylabel("PitchAngleMean [deg]")
+    plt.ylabel(f"{DataColumns.pitch_angle_mean} [deg]")
     plt.grid()
 
     plt.subplot(2, 2, 4)
-    plt.scatter(df["WindSpeedMean"], df["PitchAngleMean"], s=SCATTER_S, alpha=SCATTER_ALPHA)
+    plt.scatter(df[DataColumns.wind_speed_mean], df[DataColumns.pitch_angle_mean], s=SCATTER_S, alpha=SCATTER_ALPHA)
     x = [pt_v_ws_curve.index[0].left] + [x.mid for x in pt_v_ws_curve.index] + [pt_v_ws_curve.index[-1].right]
     y = [pt_v_ws_curve["y_limit"].iloc[0], *list(pt_v_ws_curve["y_limit"]), pt_v_ws_curve["y_limit"].iloc[-1]]
     plt.plot(x, y, color="red")
-    plt.xlabel("WindSpeedMean [m/s]")
-    plt.ylabel("PitchAngleMean [deg]")
+    plt.xlabel(f"{DataColumns.wind_speed_mean} [m/s]")
+    plt.ylabel(f"{DataColumns.pitch_angle_mean} [deg]")
     plt.grid()
 
     plot_title = f"{ttype_or_wtg} rpm and pitch curve filters"
diff --git a/wind_up/pp_analysis.py b/wind_up/pp_analysis.py
index de2f109..28f439f 100644
--- a/wind_up/pp_analysis.py
+++ b/wind_up/pp_analysis.py
@@ -58,7 +58,7 @@ def pp_raw_df(
 
 
 def cook_pp(
-    pp_df: pd.DataFrame, *, pre_or_post: str, ws_bin_width: float, rated_power: float, clip_to_rated: bool = True
+    pp_df: pd.DataFrame, *, pre_or_post: str, ws_bin_width: float, rated_power: float, clip_to_rated: bool
 ) -> pd.DataFrame:
     pp_df = pp_df.copy()
 
@@ -89,12 +89,10 @@ def cook_pp(
     rated_ws = pp_df.loc[pp_df[raw_pw_col] >= rated_power * 0.995, "bin_mid"].min() + 1
     empty_rated_bins_fill_value = rated_power
     if not clip_to_rated:
-        try:
+        with contextlib.suppress(IndexError):
             empty_rated_bins_fill_value = pp_df.loc[
                 (pp_df["bin_mid"] >= rated_ws) & ~pp_df[pw_col].isna(), pw_col
             ].iloc[-1]
-        except IndexError:
-            pass
     pp_df.loc[(pp_df["bin_mid"] >= rated_ws) & pp_df[pw_col].isna(), pw_col] = empty_rated_bins_fill_value
     pp_df[pw_sem_col] = pp_df[pw_sem_col].ffill()
 
@@ -181,14 +179,18 @@ def pre_post_pp_analysis(
     )
 
     pre_pp_df = cook_pp(
-        pp_df=pre_pp_df, pre_or_post="pre", ws_bin_width=cfg.ws_bin_width, rated_power=rated_power, clip_to_rated=False
+        pp_df=pre_pp_df,
+        pre_or_post="pre",
+        ws_bin_width=cfg.ws_bin_width,
+        rated_power=rated_power,
+        clip_to_rated=cfg.clip_rated_power_pp,
     )
     post_pp_df = cook_pp(
         pp_df=post_pp_df,
         pre_or_post="post",
         ws_bin_width=cfg.ws_bin_width,
         rated_power=rated_power,
-        clip_to_rated=False,
+        clip_to_rated=cfg.clip_rated_power_pp,
     )
     pp_df = pre_pp_df.merge(
         post_pp_df[[x for x in post_pp_df.columns if x not in pre_pp_df.columns]],