From fcab18a27323d498fbb16497fd324f755b0e8030 Mon Sep 17 00:00:00 2001
From: Samuel Naylor <samuel.naylor@res-group.com>
Date: Tue, 10 Sep 2024 16:27:35 +0100
Subject: [PATCH] refactor: consolidate into functions and ipynb example

Organise code into function to improve readability and demonstrate
usage in ipynb
---
 examples/wdw_example.ipynb   | 1287 ++++++++++++++++++++++++++++++++++
 examples/wedowind_example.py |  312 +++++----
 2 files changed, 1480 insertions(+), 119 deletions(-)
 create mode 100644 examples/wdw_example.ipynb
diff --git a/examples/wdw_example.ipynb b/examples/wdw_example.ipynb
new file mode 100644
index 0000000..3599bdc
--- /dev/null
+++ b/examples/wdw_example.ipynb
@@ -0,0 +1,1287 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a16a48dc-14f9-4ed3-98c2-6b7198cdecdf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ruff: noqa: F405"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "8de26d60-249e-46e9-ae5e-d4f6ed38dc2f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ea667b9b-19e7-455b-909a-1c3bc6e5a2f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from wedowind_example import *  # noqa: F403"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c99e2a22-d632-4f66-8fa8-1294614a657b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Downloading example data from Zenodo\n",
+      "File C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\cache\\wedowind_example_data\\Turbine_Upgrade_Dataset.zip already exists. Skipping download.\n",
+      "File C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\cache\\wedowind_example_data\\Inland_Offshore_Wind_Farm_Dataset1.zip already exists. Skipping download.\n"
+     ]
+    }
+   ],
+   "source": [
+    "download_wdw_data_from_zenodo()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "61992e29-545d-4d81-b5cc-9279517ec9cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "assumed_rated_power_kw = 1500\n",
+    "rotor_diameter_m = 80\n",
+    "cutout_ws_mps = 20\n",
+    "scada_file_name = \"Turbine Upgrade Dataset(Pitch Angle Pair).csv\"  # or Turbine Upgrade Dataset(VG Pair).csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "7009cf71-3418-435a-a983-ee194453b465",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Preprocessing turbine SCADA data\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>upgrade status</th>\n",
+       "      <th>V</th>\n",
+       "      <th>D</th>\n",
+       "      <th>rho</th>\n",
+       "      <th>S</th>\n",
+       "      <th>I</th>\n",
+       "      <th>normalized_power</th>\n",
+       "      <th>TurbineName</th>\n",
+       "      <th>ActivePowerMean</th>\n",
+       "      <th>WindSpeedMean</th>\n",
+       "      <th>YawAngleMean</th>\n",
+       "      <th>PitchAngleMean</th>\n",
+       "      <th>GenRpmMean</th>\n",
+       "      <th>ShutdownDuration</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>TimeStamp_StartFormat</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2010-07-30 22:40:00+00:00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>7.96</td>\n",
+       "      <td>138.9</td>\n",
+       "      <td>1.140224</td>\n",
+       "      <td>0.266512</td>\n",
+       "      <td>0.090452</td>\n",
+       "      <td>0.393152</td>\n",
+       "      <td>Test</td>\n",
+       "      <td>589.727273</td>\n",
+       "      <td>7.96</td>\n",
+       "      <td>138.9</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2010-07-30 22:50:00+00:00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>8.19</td>\n",
+       "      <td>140.6</td>\n",
+       "      <td>1.140522</td>\n",
+       "      <td>0.286167</td>\n",
+       "      <td>0.083028</td>\n",
+       "      <td>0.457455</td>\n",
+       "      <td>Test</td>\n",
+       "      <td>686.181817</td>\n",
+       "      <td>8.19</td>\n",
+       "      <td>140.6</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2010-07-30 23:00:00+00:00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>7.20</td>\n",
+       "      <td>139.3</td>\n",
+       "      <td>1.140771</td>\n",
+       "      <td>0.339321</td>\n",
+       "      <td>0.098611</td>\n",
+       "      <td>0.382121</td>\n",
+       "      <td>Test</td>\n",
+       "      <td>573.181818</td>\n",
+       "      <td>7.20</td>\n",
+       "      <td>139.3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2010-07-30 23:10:00+00:00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>6.81</td>\n",
+       "      <td>137.4</td>\n",
+       "      <td>1.141186</td>\n",
+       "      <td>0.375815</td>\n",
+       "      <td>0.101322</td>\n",
+       "      <td>0.282182</td>\n",
+       "      <td>Test</td>\n",
+       "      <td>423.272727</td>\n",
+       "      <td>6.81</td>\n",
+       "      <td>137.4</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2010-07-30 23:20:00+00:00</th>\n",
+       "      <td>0</td>\n",
+       "      <td>5.09</td>\n",
+       "      <td>137.5</td>\n",
+       "      <td>1.141464</td>\n",
+       "      <td>0.303472</td>\n",
+       "      <td>0.165029</td>\n",
+       "      <td>0.127212</td>\n",
+       "      <td>Test</td>\n",
+       "      <td>190.818182</td>\n",
+       "      <td>5.09</td>\n",
+       "      <td>137.5</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1000</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                           upgrade status     V      D       rho         S  \\\n",
+       "TimeStamp_StartFormat                                                        \n",
+       "2010-07-30 22:40:00+00:00               0  7.96  138.9  1.140224  0.266512   \n",
+       "2010-07-30 22:50:00+00:00               0  8.19  140.6  1.140522  0.286167   \n",
+       "2010-07-30 23:00:00+00:00               0  7.20  139.3  1.140771  0.339321   \n",
+       "2010-07-30 23:10:00+00:00               0  6.81  137.4  1.141186  0.375815   \n",
+       "2010-07-30 23:20:00+00:00               0  5.09  137.5  1.141464  0.303472   \n",
+       "\n",
+       "                                  I  normalized_power TurbineName  \\\n",
+       "TimeStamp_StartFormat                                               \n",
+       "2010-07-30 22:40:00+00:00  0.090452          0.393152        Test   \n",
+       "2010-07-30 22:50:00+00:00  0.083028          0.457455        Test   \n",
+       "2010-07-30 23:00:00+00:00  0.098611          0.382121        Test   \n",
+       "2010-07-30 23:10:00+00:00  0.101322          0.282182        Test   \n",
+       "2010-07-30 23:20:00+00:00  0.165029          0.127212        Test   \n",
+       "\n",
+       "                           ActivePowerMean  WindSpeedMean  YawAngleMean  \\\n",
+       "TimeStamp_StartFormat                                                     \n",
+       "2010-07-30 22:40:00+00:00       589.727273           7.96         138.9   \n",
+       "2010-07-30 22:50:00+00:00       686.181817           8.19         140.6   \n",
+       "2010-07-30 23:00:00+00:00       573.181818           7.20         139.3   \n",
+       "2010-07-30 23:10:00+00:00       423.272727           6.81         137.4   \n",
+       "2010-07-30 23:20:00+00:00       190.818182           5.09         137.5   \n",
+       "\n",
+       "                           PitchAngleMean  GenRpmMean  ShutdownDuration  \n",
+       "TimeStamp_StartFormat                                                    \n",
+       "2010-07-30 22:40:00+00:00               0        1000                 0  \n",
+       "2010-07-30 22:50:00+00:00               0        1000                 0  \n",
+       "2010-07-30 23:00:00+00:00               0        1000                 0  \n",
+       "2010-07-30 23:10:00+00:00               0        1000                 0  \n",
+       "2010-07-30 23:20:00+00:00               0        1000                 0  "
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "logger.info(\"Preprocessing turbine SCADA data\")\n",
+    "scada_df = WDWScadaUnpacker(scada_file_name=scada_file_name).unpack(rated_power_kw=assumed_rated_power_kw)\n",
+    "scada_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "c0922049-6159-41d8-ba4b-94affe82b296",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Latitude</th>\n",
+       "      <th>Longitude</th>\n",
+       "      <th>TimeZone</th>\n",
+       "      <th>TimeSpanMinutes</th>\n",
+       "      <th>TimeFormat</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>WT1</td>\n",
+       "      <td>40.036394</td>\n",
+       "      <td>-89.052141</td>\n",
+       "      <td>UTC</td>\n",
+       "      <td>10</td>\n",
+       "      <td>Start</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>WT2</td>\n",
+       "      <td>40.039089</td>\n",
+       "      <td>-89.032205</td>\n",
+       "      <td>UTC</td>\n",
+       "      <td>10</td>\n",
+       "      <td>Start</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>WT3</td>\n",
+       "      <td>39.954324</td>\n",
+       "      <td>-88.942660</td>\n",
+       "      <td>UTC</td>\n",
+       "      <td>10</td>\n",
+       "      <td>Start</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>WT4</td>\n",
+       "      <td>39.972739</td>\n",
+       "      <td>-88.969221</td>\n",
+       "      <td>UTC</td>\n",
+       "      <td>10</td>\n",
+       "      <td>Start</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>MAST1</td>\n",
+       "      <td>40.042682</td>\n",
+       "      <td>-89.058004</td>\n",
+       "      <td>UTC</td>\n",
+       "      <td>10</td>\n",
+       "      <td>Start</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    Name   Latitude  Longitude TimeZone  TimeSpanMinutes TimeFormat\n",
+       "0    WT1  40.036394 -89.052141      UTC               10      Start\n",
+       "1    WT2  40.039089 -89.032205      UTC               10      Start\n",
+       "2    WT3  39.954324 -88.942660      UTC               10      Start\n",
+       "3    WT4  39.972739 -88.969221      UTC               10      Start\n",
+       "4  MAST1  40.042682 -89.058004      UTC               10      Start"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "metadata_df = make_wdw_metadata_df()\n",
+    "metadata_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "3734d8fd-e4c8-4e5e-820d-a95589998c77",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Custom plots saved to directory: C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\output\\wedowind_example\\custom_plots\n",
+      "Custom plots saved to directory: C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\output\\wedowind_example\\custom_plots\n"
+     ]
+    }
+   ],
+   "source": [
+    "run_custom_plots(scada_df=scada_df, assumed_rated_power_kw=assumed_rated_power_kw, rotor_diameter_m=rotor_diameter_m)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "774b80d0-476f-4a86-a774-b6aca2462181",
+   "metadata": {},
+   "source": [
+    "Amend dataframes based on reviewing the Custom Plots."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "3685f588-060d-4209-aadc-5f87f606887e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# based on the above I think the objects are MAST1, test=WT1 and ref=WT2\n",
+    "scada_df = scada_df.replace(\n",
+    "    {\"TurbineName\": {TurbineNames.TEST.value: \"WT1\", TurbineNames.REF.value: \"WT2\", \"Mast\": \"MAST1\"}}\n",
+    ")\n",
+    "# drop everything except the turbines from the metadata\n",
+    "metadata_df = metadata_df[metadata_df[\"Name\"].isin([\"WT1\", \"WT2\"])]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a346cc79-1012-4d24-b340-ae4115f34fe4",
+   "metadata": {},
+   "source": [
+    "### Construct Reanalysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "47ed234d-5c55-4165-902f-f808408f82a1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>100_m_hws_mean_mps</th>\n",
+       "      <th>100_m_hwd_mean_deg-n_true</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2010-07-30 22:40:00+00:00</th>\n",
+       "      <td>8.184808</td>\n",
+       "      <td>172.795653</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2010-07-30 23:40:00+00:00</th>\n",
+       "      <td>6.348934</td>\n",
+       "      <td>83.654251</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2010-07-31 00:40:00+00:00</th>\n",
+       "      <td>5.204868</td>\n",
+       "      <td>288.677008</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2010-07-31 01:40:00+00:00</th>\n",
+       "      <td>5.082638</td>\n",
+       "      <td>332.470858</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2010-07-31 02:40:00+00:00</th>\n",
+       "      <td>9.066351</td>\n",
+       "      <td>95.806898</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                           100_m_hws_mean_mps  100_m_hwd_mean_deg-n_true\n",
+       "2010-07-30 22:40:00+00:00            8.184808                 172.795653\n",
+       "2010-07-30 23:40:00+00:00            6.348934                  83.654251\n",
+       "2010-07-31 00:40:00+00:00            5.204868                 288.677008\n",
+       "2010-07-31 01:40:00+00:00            5.082638                 332.470858\n",
+       "2010-07-31 02:40:00+00:00            9.066351                  95.806898"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from wind_up.reanalysis_data import ReanalysisDataset\n",
+    "\n",
+    "rng = np.random.default_rng(0)\n",
+    "rows = 100\n",
+    "reanalysis_dataset = ReanalysisDataset(\n",
+    "    id=\"dummy_reanalysis_data\",\n",
+    "    data=pd.DataFrame(\n",
+    "        data={\n",
+    "            \"100_m_hws_mean_mps\": rng.uniform(5, 10, rows),\n",
+    "            \"100_m_hwd_mean_deg-n_true\": rng.uniform(0, 360, rows),\n",
+    "        },\n",
+    "        index=pd.DatetimeIndex(pd.date_range(start=scada_df.index.min(), periods=rows, freq=\"h\", tz=\"UTC\")),\n",
+    "    ),\n",
+    ")\n",
+    "\n",
+    "reanalysis_dataset.data.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d989a0a1-4b3f-4e9d-b3f0-fa4061111616",
+   "metadata": {},
+   "source": [
+    "# Construct `wind-up` Configuration"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6aa9d215-e58b-409b-a1ab-bb5eb1739ebf",
+   "metadata": {},
+   "source": [
+    "## Wind Farm Config"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "63e01414-8585-4cf8-9169-c3229eaa1f2e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "loaded WindUpConfig assessment_name: wedowind_example\n",
+      "loaded WindUpConfig assessment_name: wedowind_example\n",
+      "loaded WindUpConfig assessment_name: wedowind_example\n",
+      "pre analysis period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
+      "pre analysis period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
+      "pre analysis period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
+      "post analysis period (UTC): 2011-04-25 21:50 to 2011-06-25 18:40\n",
+      "post analysis period (UTC): 2011-04-25 21:50 to 2011-06-25 18:40\n",
+      "post analysis period (UTC): 2011-04-25 21:50 to 2011-06-25 18:40\n",
+      "long term period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
+      "long term period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
+      "long term period (UTC): 2010-07-30 22:40 to 2010-09-29 19:20\n",
+      "detrend period (UTC): 2010-07-30 22:40 to 2011-04-18 21:50\n",
+      "detrend period (UTC): 2010-07-30 22:40 to 2011-04-18 21:50\n",
+      "detrend period (UTC): 2010-07-30 22:40 to 2011-04-18 21:50\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "WindUpConfig(assessment_name='wedowind_example', timebase_s=600, ignore_turbine_anemometer_data=False, require_test_wake_free=False, require_ref_wake_free=False, detrend_min_hours=24, ref_wd_filter=[150.0, 240.0], ref_hod_filter=None, filter_all_test_wtgs_together=False, use_lt_distribution=False, use_test_wtg_lt_distribution=True, out_dir=WindowsPath('C:/Users/snaylor/Documents/GitHub/wind-up/output/wedowind_example'), test_wtgs=[Turbine(name='WT1', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan)], ref_wtgs=[Turbine(name='WT2', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan)], ref_super_wtgs=[], non_wtg_ref_names=[], upgrade_first_dt_utc_start=Timestamp('2011-04-25 21:50:00+0000', tz='UTC'), analysis_last_dt_utc_start=Timestamp('2011-06-25 18:30:00+0000', tz='UTC'), analysis_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), lt_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), lt_last_dt_utc_start=Timestamp('2010-09-29 19:10:00+0000', tz='UTC'), detrend_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), detrend_last_dt_utc_start=Timestamp('2011-04-18 21:40:00+0000', tz='UTC'), years_offset_for_pre_period=1, years_for_lt_distribution=1, years_for_detrend=1, ws_bin_width=1.0, bootstrap_runs_override=None, reanalysis_method='node_with_best_ws_corr', missing_scada_data_fields=['YawAngleMin', 'YawAngleMax'], asset=Asset(name='Mystery Wind Farm', wtgs=[Turbine(name='WT1', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan), Turbine(name='WT2', turbine_type=TurbineType(turbine_type='unknown turbine type', rotor_diameter_m=80.0, rated_power_kw=1500.0, cutout_ws_mps=20.0, normal_operation_pitch_range=(-10.0, 35.0), normal_operation_genrpm_range=(0.0, 2000.0), rpm_v_pw_margin_factor=0.05, pitch_to_stall=False), latitude=nan, longitude=nan)], masts_and_lidars=[]), exclusion_periods_utc=[], yaw_data_exclusions_utc=[], optimize_northing_corrections=False, northing_corrections_utc=[], toggle=None, prepost=PrePost(pre_first_dt_utc_start=Timestamp('2010-07-30 22:40:00+0000', tz='UTC'), pre_last_dt_utc_start=Timestamp('2010-09-29 19:10:00+0000', tz='UTC'), post_first_dt_utc_start=Timestamp('2011-04-25 21:50:00+0000', tz='UTC'), post_last_dt_utc_start=Timestamp('2011-06-25 18:30:00+0000', tz='UTC')))"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "wtg_map = {\n",
+    "    x: {\n",
+    "        \"name\": x,\n",
+    "        \"turbine_type\": {\n",
+    "            \"turbine_type\": \"unknown turbine type\",\n",
+    "            \"rotor_diameter_m\": rotor_diameter_m,\n",
+    "            \"rated_power_kw\": assumed_rated_power_kw,\n",
+    "            \"cutout_ws_mps\": cutout_ws_mps,\n",
+    "            \"normal_operation_pitch_range\": (-10.0, 35.0),\n",
+    "            \"normal_operation_genrpm_range\": (0, 2000.0),\n",
+    "        },\n",
+    "    }\n",
+    "    for x in [\"WT1\", \"WT2\"]\n",
+    "}\n",
+    "\n",
+    "cfg = WindUpConfig(\n",
+    "    assessment_name=ASSESSMENT_NAME,\n",
+    "    ref_wd_filter=[150, 240],  # apparent wake free sector\n",
+    "    use_lt_distribution=False,\n",
+    "    out_dir=OUTPUT_DIR / ASSESSMENT_NAME,\n",
+    "    test_wtgs=[wtg_map[x] for x in [\"WT1\"]],\n",
+    "    ref_wtgs=[wtg_map[x] for x in [\"WT2\"]],\n",
+    "    analysis_first_dt_utc_start=scada_df.index.min(),\n",
+    "    upgrade_first_dt_utc_start=scada_df[scada_df[\"upgrade status\"] > 0].index.min(),\n",
+    "    analysis_last_dt_utc_start=scada_df[scada_df[\"upgrade status\"] > 0].index.max(),\n",
+    "    years_offset_for_pre_period=1,\n",
+    "    lt_first_dt_utc_start=scada_df.index.min(),\n",
+    "    lt_last_dt_utc_start=scada_df.index.min()\n",
+    "    + (scada_df[scada_df[\"upgrade status\"] > 0].index.max() - scada_df[scada_df[\"upgrade status\"] > 0].index.min())\n",
+    "    - pd.Timedelta(minutes=10),\n",
+    "    detrend_first_dt_utc_start=scada_df.index.min(),\n",
+    "    detrend_last_dt_utc_start=scada_df[scada_df[\"upgrade status\"] > 0].index.min()\n",
+    "    - pd.DateOffset(weeks=1)\n",
+    "    - pd.Timedelta(minutes=10),\n",
+    "    years_for_lt_distribution=1,\n",
+    "    years_for_detrend=1,\n",
+    "    ws_bin_width=1.0,\n",
+    "    asset={\n",
+    "        \"name\": \"Mystery Wind Farm\",\n",
+    "        \"wtgs\": list(wtg_map.values()),\n",
+    "    },\n",
+    "    missing_scada_data_fields=[\"YawAngleMin\", \"YawAngleMax\"],\n",
+    "    prepost={\n",
+    "        \"pre_first_dt_utc_start\": scada_df.index.min(),\n",
+    "        \"pre_last_dt_utc_start\": scada_df.index.min()\n",
+    "        + (scada_df[scada_df[\"upgrade status\"] > 0].index.max() - scada_df[scada_df[\"upgrade status\"] > 0].index.min())\n",
+    "        - pd.Timedelta(minutes=10),\n",
+    "        \"post_first_dt_utc_start\": scada_df[scada_df[\"upgrade status\"] > 0].index.min(),\n",
+    "        \"post_last_dt_utc_start\": scada_df[scada_df[\"upgrade status\"] > 0].index.max(),\n",
+    "    },\n",
+    "    optimize_northing_corrections=False,\n",
+    ")\n",
+    "\n",
+    "cfg"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "95dd5c37-0945-4384-9019-34b0dab1ce0b",
+   "metadata": {},
+   "source": [
+    "## Plot Configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "9785a31a-ed13-4249-beec-36e91d5458f6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / \"plots\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a4944d52-3e0d-4159-a6a7-7bdf4d6b5838",
+   "metadata": {},
+   "source": [
+    "## Assessment Configs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "092491ca-55be-480e-9494-0cb919c5dae6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "running wind_up analysis for wedowind_example\n",
+      "running wind_up analysis for wedowind_example\n",
+      "running wind_up analysis for wedowind_example\n",
+      "running load_smart_scada_and_md_from_file for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
+      "running load_smart_scada_and_md_from_file for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
+      "running load_smart_scada_and_md_from_file for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
+      "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\wind_up\\smart_data.py:99: SettingWithCopyWarning: \n",
+      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
+      "Try using .loc[row_indexer,col_indexer] = value instead\n",
+      "\n",
+      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
+      "  scada_raw[\"TurbineName\"] = scada_raw[\"TurbineName\"].astype(\"category\")\n",
+      "loaded 2 turbines, 0.5 years per turbine\n",
+      "loaded 2 turbines, 0.5 years per turbine\n",
+      "loaded 2 turbines, 0.5 years per turbine\n",
+      "finished load_smart_scada_and_md for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
+      "finished load_smart_scada_and_md for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
+      "finished load_smart_scada_and_md for 2010-07-30 22:40:00+00:00 to 2011-06-25 18:30:00+00:00\n",
+      "average capacity factor: 52.9%\n",
+      "average capacity factor: 52.9%\n",
+      "average capacity factor: 52.9%\n",
+      "top 3 capacity factor [%]:\n",
+      "+-----+------+\n",
+      "| WT2 | 53.3 |\n",
+      "| WT1 | 52.4 |\n",
+      "+-----+------+\n",
+      "top 3 capacity factor [%]:\n",
+      "+-----+------+\n",
+      "| WT2 | 53.3 |\n",
+      "| WT1 | 52.4 |\n",
+      "+-----+------+\n",
+      "top 3 capacity factor [%]:\n",
+      "+-----+------+\n",
+      "| WT2 | 53.3 |\n",
+      "| WT1 | 52.4 |\n",
+      "+-----+------+\n",
+      "bottom 3 capacity factor [%]:\n",
+      "+-----+------+\n",
+      "| WT1 | 52.4 |\n",
+      "| WT2 | 53.3 |\n",
+      "+-----+------+\n",
+      "bottom 3 capacity factor [%]:\n",
+      "+-----+------+\n",
+      "| WT1 | 52.4 |\n",
+      "| WT2 | 53.3 |\n",
+      "+-----+------+\n",
+      "bottom 3 capacity factor [%]:\n",
+      "+-----+------+\n",
+      "| WT1 | 52.4 |\n",
+      "| WT2 | 53.3 |\n",
+      "+-----+------+\n",
+      "0 rows [0.0%] of power data is missing before filtering\n",
+      "0 rows [0.0%] of power data is missing before filtering\n",
+      "0 rows [0.0%] of power data is missing before filtering\n",
+      "filter_stuck_data set 0 rows [0.0%] to NA\n",
+      "filter_stuck_data set 0 rows [0.0%] to NA\n",
+      "filter_stuck_data set 0 rows [0.0%] to NA\n",
+      "filter_bad_pw_ws set 0 rows [0.0%] to NA\n",
+      "filter_bad_pw_ws set 0 rows [0.0%] to NA\n",
+      "filter_bad_pw_ws set 0 rows [0.0%] to NA\n",
+      "filter_exclusions set 0 rows [0.0%] to NA\n",
+      "filter_exclusions set 0 rows [0.0%] to NA\n",
+      "filter_exclusions set 0 rows [0.0%] to NA\n",
+      "filter_yaw_exclusions set 0 rows [0.0%] to NA yaw\n",
+      "filter_yaw_exclusions set 0 rows [0.0%] to NA yaw\n",
+      "filter_yaw_exclusions set 0 rows [0.0%] to NA yaw\n",
+      "filter_downtime set 0 rows [0.0%] to NA\n",
+      "filter_downtime set 0 rows [0.0%] to NA\n",
+      "filter_downtime set 0 rows [0.0%] to NA\n",
+      "filter_missing_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are NA\n",
+      "filter_missing_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are NA\n",
+      "filter_missing_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are NA\n",
+      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are out of range\n",
+      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are out of range\n",
+      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch are out of range\n",
+      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch curve filtering\n",
+      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch curve filtering\n",
+      "filter_rpm_and_pt set 0 rows [0.0%] to NA because of rpm or pitch curve filtering\n",
+      "0 rows [0.0%] of power data is missing after filtering\n",
+      "0 rows [0.0%] of power data is missing after filtering\n",
+      "0 rows [0.0%] of power data is missing after filtering\n",
+      "dummy_reanalysis_data best correlation is 0.282501 with a shift of -86\n",
+      "dummy_reanalysis_data best correlation is 0.282501 with a shift of -86\n",
+      "dummy_reanalysis_data best correlation is 0.282501 with a shift of -86\n",
+      "dummy_reanalysis_data has best correlation: 0.283 with a shift of -86\n",
+      "dummy_reanalysis_data has best correlation: 0.283 with a shift of -86\n",
+      "dummy_reanalysis_data has best correlation: 0.283 with a shift of -86\n",
+      "top 3 turbines needing northing correction vs reanalysis_wd before northing:\n",
+      "+-----+-----+\n",
+      "| WT1 | nan |\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "top 3 turbines needing northing correction vs reanalysis_wd before northing:\n",
+      "+-----+-----+\n",
+      "| WT1 | nan |\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "top 3 turbines needing northing correction vs reanalysis_wd before northing:\n",
+      "+-----+-----+\n",
+      "| WT1 | nan |\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd before northing:\n",
+      "+-----+-----+\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd before northing:\n",
+      "+-----+-----+\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd before northing:\n",
+      "+-----+-----+\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "no northing corrections to apply\n",
+      "no northing corrections to apply\n",
+      "no northing corrections to apply\n",
+      "applied 0 northing corrections\n",
+      "applied 0 northing corrections\n",
+      "applied 0 northing corrections\n",
+      "top 3 turbines needing northing correction vs reanalysis_wd after northing:\n",
+      "+-----+-----+\n",
+      "| WT1 | nan |\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "top 3 turbines needing northing correction vs reanalysis_wd after northing:\n",
+      "+-----+-----+\n",
+      "| WT1 | nan |\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "top 3 turbines needing northing correction vs reanalysis_wd after northing:\n",
+      "+-----+-----+\n",
+      "| WT1 | nan |\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd after northing:\n",
+      "+-----+-----+\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd after northing:\n",
+      "+-----+-----+\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "top 3 REFERENCE turbines needing northing correction vs reanalysis_wd after northing:\n",
+      "+-----+-----+\n",
+      "| WT2 | nan |\n",
+      "+-----+-----+\n",
+      "estimated rated wind speed = 14.2 m/s\n",
+      "estimated rated wind speed = 14.2 m/s\n",
+      "estimated rated wind speed = 14.2 m/s\n",
+      "estimated cut-in wind speed = 3.5 m/s\n",
+      "estimated cut-in wind speed = 3.5 m/s\n",
+      "estimated cut-in wind speed = 3.5 m/s\n",
+      "##############################################################################\n",
+      "# estimate wind speed from power\n",
+      "##############################################################################\n",
+      "##############################################################################\n",
+      "# estimate wind speed from power\n",
+      "##############################################################################\n",
+      "##############################################################################\n",
+      "# estimate wind speed from power\n",
+      "##############################################################################\n",
+      "WT1 cp correction factor = 0.99\n",
+      "WT1 cp correction factor = 0.99\n",
+      "WT1 cp correction factor = 0.99\n",
+      "WT2 cp correction factor = 1.01\n",
+      "WT2 cp correction factor = 1.01\n",
+      "WT2 cp correction factor = 1.01\n",
+      "unknown turbine type 100.0% of rows are waking\n",
+      "unknown turbine type 100.0% of rows are waking\n",
+      "unknown turbine type 100.0% of rows are waking\n",
+      "unknown turbine type 0.0% of rows are not waking\n",
+      "unknown turbine type 0.0% of rows are not waking\n",
+      "unknown turbine type 0.0% of rows are not waking\n",
+      "unknown turbine type 0.0% of rows have unknown or partial waking\n",
+      "unknown turbine type 0.0% of rows have unknown or partial waking\n",
+      "unknown turbine type 0.0% of rows have unknown or partial waking\n"
+     ]
+    }
+   ],
+   "source": [
+    "assessment_inputs = AssessmentInputs.from_cfg(\n",
+    "    cfg=cfg,\n",
+    "    plot_cfg=plot_cfg,\n",
+    "    scada_df=scada_df[(scada_df[\"D\"] < 70) | (scada_df[\"D\"] > 150)],  # noqa PLR2004 filter out apparent mast waked sector\n",
+    "    metadata_df=metadata_df,\n",
+    "    reanalysis_datasets=[reanalysis_dataset],\n",
+    "    cache_dir=CACHE_DIR,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5f80aa62-d7cd-4ec5-a28c-3e4ffe2e006d",
+   "metadata": {},
+   "source": [
+    "# Run Analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "d12503f2-49b4-473f-b98f-5cb76b636aba",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "test turbines: ['WT1']\n",
+      "test turbines: ['WT1']\n",
+      "test turbines: ['WT1']\n",
+      "test turbines: ['WT1']\n",
+      "ref list: ['WT2']\n",
+      "ref list: ['WT2']\n",
+      "ref list: ['WT2']\n",
+      "ref list: ['WT2']\n",
+      "turbines to test: ['WT1']\n",
+      "turbines to test: ['WT1']\n",
+      "turbines to test: ['WT1']\n",
+      "turbines to test: ['WT1']\n",
+      "could not calculate rolling windspeed diff\n",
+      "could not calculate rolling windspeed diff\n",
+      "could not calculate rolling windspeed diff\n",
+      "could not calculate rolling windspeed diff\n",
+      "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n",
+      "  return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+      "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n",
+      "  return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+      "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n",
+      "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n",
+      "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n",
+      "WT1 check_for_ops_curve_shift warnings: abs(powercurve_shift) > 0.01: 0.072\n",
+      "analysing WT1 WT2, loop_counter=0\n",
+      "analysing WT1 WT2, loop_counter=0\n",
+      "analysing WT1 WT2, loop_counter=0\n",
+      "analysing WT1 WT2, loop_counter=0\n",
+      "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n",
+      "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n",
+      "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n",
+      "removed 12940 [52.0%] rows from ref_df using ref_wd_filter\n",
+      "could not calculate rolling windspeed diff\n",
+      "could not calculate rolling windspeed diff\n",
+      "could not calculate rolling windspeed diff\n",
+      "could not calculate rolling windspeed diff\n",
+      "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n",
+      "  return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+      "C:\\Users\\snaylor\\Documents\\GitHub\\wind-up\\.venv\\Lib\\site-packages\\numpy\\lib\\_nanfunctions_impl.py:1241: RuntimeWarning: Mean of empty slice\n",
+      "  return np.nanmean(a, axis, out=out, keepdims=keepdims)\n",
+      "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n",
+      "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n",
+      "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n",
+      "calc_iec_upwind_turbines lat=40.04 long=-89.05 wind_dir=180 ['WT1']\n",
+      "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n",
+      "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n",
+      "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n",
+      "calc_iec_upwind_turbines lat=40.04 long=-89.03 wind_dir=180 ['WT2']\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "removed 0 bad detrend results\n",
+      "removed 0 bad detrend results\n",
+      "removed 0 bad detrend results\n",
+      "removed 0 bad detrend results\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "top 1 WT1 WT2 waking scenarios [%]:\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "+--------------+-------+\n",
+      "| none offline | 100.0 |\n",
+      "+--------------+-------+\n",
+      "detrend applied to 91 scenario - directions\n",
+      "detrend applied to 91 scenario - directions\n",
+      "detrend applied to 91 scenario - directions\n",
+      "detrend applied to 91 scenario - directions\n",
+      "detrend applied to 91 scenario - directions\n",
+      "detrend applied to 91 scenario - directions\n",
+      "detrend applied to 91 scenario - directions\n",
+      "detrend applied to 91 scenario - directions\n",
+      "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n",
+      "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n",
+      "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n",
+      "detrend improved pre_df ws r2 by 0.00 (0.99 to 0.99)\n",
+      "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n",
+      "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n",
+      "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n",
+      "detrend improved post_df ws r2 by 0.00 (0.99 to 0.99)\n",
+      "removed 0 bad detrend results\n",
+      "removed 0 bad detrend results\n",
+      "removed 0 bad detrend results\n",
+      "removed 0 bad detrend results\n",
+      "removed 0 bad detrend results\n",
+      "removed 0 bad detrend results\n",
+      "removed 0 bad detrend results\n",
+      "removed 0 bad detrend results\n",
+      "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n",
+      "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n",
+      "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n",
+      "plot_pre_post_condition_histogram ref_name=WT2 pre_df missing required column ref_AmbientTemp\n",
+      "\n",
+      "results for test=WT1 ref=WT2:\n",
+      "\n",
+      "\n",
+      "results for test=WT1 ref=WT2:\n",
+      "\n",
+      "\n",
+      "results for test=WT1 ref=WT2:\n",
+      "\n",
+      "\n",
+      "results for test=WT1 ref=WT2:\n",
+      "\n",
+      "hours pre = 459.3\n",
+      "hours pre = 459.3\n",
+      "hours pre = 459.3\n",
+      "hours pre = 459.3\n",
+      "hours post = 522.2\n",
+      "hours post = 522.2\n",
+      "hours post = 522.2\n",
+      "hours post = 522.2\n",
+      "\n",
+      "uplift estimate before adjustments = 2.6 %\n",
+      "\n",
+      "uplift estimate before adjustments = 2.6 %\n",
+      "\n",
+      "uplift estimate before adjustments = 2.6 %\n",
+      "\n",
+      "uplift estimate before adjustments = 2.6 %\n",
+      "\n",
+      "power only uplift estimate = 2.3 %\n",
+      "\n",
+      "power only uplift estimate = 2.3 %\n",
+      "\n",
+      "power only uplift estimate = 2.3 %\n",
+      "\n",
+      "power only uplift estimate = 2.3 %\n",
+      "reversed (power only) uplift estimate = 2.6 %\n",
+      "\n",
+      "reversed (power only) uplift estimate = 2.6 %\n",
+      "\n",
+      "reversed (power only) uplift estimate = 2.6 %\n",
+      "\n",
+      "reversed (power only) uplift estimate = 2.6 %\n",
+      "\n",
+      "Running block bootstrapping uncertainty analysis n_samples = 400\n",
+      "Running block bootstrapping uncertainty analysis n_samples = 400\n",
+      "Running block bootstrapping uncertainty analysis n_samples = 400\n",
+      "Running block bootstrapping uncertainty analysis n_samples = 400\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8b9f3c80d5e94e4e8c49d9c67f12325d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/400 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "block bootstrapping uncertainty analysis results (conf=90%):\n",
+      "  median = 2.7 %\n",
+      "  lower = 1.9 %\n",
+      "  upper = 3.8 %\n",
+      "  unc_one_sigma = 0.6 %\n",
+      "block bootstrapping uncertainty analysis results (conf=90%):\n",
+      "  median = 2.7 %\n",
+      "  lower = 1.9 %\n",
+      "  upper = 3.8 %\n",
+      "  unc_one_sigma = 0.6 %\n",
+      "block bootstrapping uncertainty analysis results (conf=90%):\n",
+      "  median = 2.7 %\n",
+      "  lower = 1.9 %\n",
+      "  upper = 3.8 %\n",
+      "  unc_one_sigma = 0.6 %\n",
+      "block bootstrapping uncertainty analysis results (conf=90%):\n",
+      "  median = 2.7 %\n",
+      "  lower = 1.9 %\n",
+      "  upper = 3.8 %\n",
+      "  unc_one_sigma = 0.6 %\n",
+      "\n",
+      "cat A 1 sigma unc = 0.3 %\n",
+      "\n",
+      "cat A 1 sigma unc = 0.3 %\n",
+      "\n",
+      "cat A 1 sigma unc = 0.3 %\n",
+      "\n",
+      "cat A 1 sigma unc = 0.3 %\n",
+      "abs reversal error / 2 = 0.1 %\n",
+      "abs reversal error / 2 = 0.1 %\n",
+      "abs reversal error / 2 = 0.1 %\n",
+      "abs reversal error / 2 = 0.1 %\n",
+      "bootstrap 1 sigma unc = 0.6 %\n",
+      "bootstrap 1 sigma unc = 0.6 %\n",
+      "bootstrap 1 sigma unc = 0.6 %\n",
+      "bootstrap 1 sigma unc = 0.6 %\n",
+      "missing bins scale factor = 1.000\n",
+      "missing bins scale factor = 1.000\n",
+      "missing bins scale factor = 1.000\n",
+      "missing bins scale factor = 1.000\n",
+      "final 1 sigma unc = 0.6 %\n",
+      "\n",
+      "final 1 sigma unc = 0.6 %\n",
+      "\n",
+      "final 1 sigma unc = 0.6 %\n",
+      "\n",
+      "final 1 sigma unc = 0.6 %\n",
+      "\n",
+      "final uplift estimate = 2.7 %\n",
+      "final uplift estimate = 2.7 %\n",
+      "final uplift estimate = 2.7 %\n",
+      "final uplift estimate = 2.7 %\n",
+      "final P95 uplift estimate = 1.8 %\n",
+      "final P95 uplift estimate = 1.8 %\n",
+      "final P95 uplift estimate = 1.8 %\n",
+      "final P95 uplift estimate = 1.8 %\n",
+      "final P5 uplift estimate = 3.7 %\n",
+      "final P5 uplift estimate = 3.7 %\n",
+      "final P5 uplift estimate = 3.7 %\n",
+      "final P5 uplift estimate = 3.7 %\n",
+      "{'ref': 'WT2', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 1727.532358659697, 'bearing_deg': 80.01879303945559, 'ref_max_northing_error_v_reanalysis': np.float64(nan), 'ref_max_northing_error_v_wf': np.float64(nan), 'ref_max_ws_drift': np.float64(nan), 'ref_max_ws_drift_pp_period': np.float64(nan), 'ref_powercurve_shift': np.float64(-0.0072952693256543855), 'ref_rpm_shift': np.float64(0.0), 'ref_pitch_shift': np.float64(0.0), 'detrend_pre_r2_improvement': np.float64(2.1744130036194242e-05), 'detrend_post_r2_improvement': np.float64(4.176406496059215e-05), 'mean_power_pre': np.float64(736.9209528741912), 'mean_power_post': np.float64(886.8971224584385), 'mean_test_yaw_offset_pre': np.float64(0.0), 'mean_test_yaw_offset_post': np.float64(0.0), 'test_ref_warning_counts': 2, 'time_calculated': Timestamp('2024-09-10 14:56:31.754381+0000', tz='UTC'), 'uplift_frc': np.float64(0.02731091991401073), 'unc_one_sigma_frc': np.float64(0.005683971945706012), 't_value_one_sigma': np.float64(1.0001815211442238), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(459.33333333333337), 'pp_valid_hours_post': np.float64(522.1666666666666), 'pp_valid_hours': np.float64(981.5), 'pp_data_coverage': np.float64(0.12398938858009095), 'pp_invalid_bin_count': np.int64(8), 'uplift_noadj_frc': np.float64(0.026174088035614145), 'unc_one_sigma_noadj_frc': np.float64(0.00267454822464343), 'poweronly_uplift_frc': np.float64(0.023436708877978316), 'reversed_uplift_frc': np.float64(0.02571037263477149), 'reversal_error': np.float64(0.0022736637567931754), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011368318783965877), 'unc_one_sigma_bootstrap_frc': np.float64(0.005683971945706012), 'uplift_p5_frc': np.float64(0.03666022178439568), 'uplift_p95_frc': np.float64(0.017961618043625782), 'wind_up_version': '0.1.9', 'test_wtg': 'WT1', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(nan), 'test_max_ws_drift_pp_period': np.float64(nan), 'test_powercurve_shift': np.float64(0.07162130888044582), 'test_rpm_shift': np.float64(0.0), 'test_pitch_shift': np.float64(0.0), 'preprocess_warning_counts': 0, 'test_warning_counts': 2}\n",
+      "{'ref': 'WT2', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 1727.532358659697, 'bearing_deg': 80.01879303945559, 'ref_max_northing_error_v_reanalysis': np.float64(nan), 'ref_max_northing_error_v_wf': np.float64(nan), 'ref_max_ws_drift': np.float64(nan), 'ref_max_ws_drift_pp_period': np.float64(nan), 'ref_powercurve_shift': np.float64(-0.0072952693256543855), 'ref_rpm_shift': np.float64(0.0), 'ref_pitch_shift': np.float64(0.0), 'detrend_pre_r2_improvement': np.float64(2.1744130036194242e-05), 'detrend_post_r2_improvement': np.float64(4.176406496059215e-05), 'mean_power_pre': np.float64(736.9209528741912), 'mean_power_post': np.float64(886.8971224584385), 'mean_test_yaw_offset_pre': np.float64(0.0), 'mean_test_yaw_offset_post': np.float64(0.0), 'test_ref_warning_counts': 2, 'time_calculated': Timestamp('2024-09-10 14:56:31.754381+0000', tz='UTC'), 'uplift_frc': np.float64(0.02731091991401073), 'unc_one_sigma_frc': np.float64(0.005683971945706012), 't_value_one_sigma': np.float64(1.0001815211442238), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(459.33333333333337), 'pp_valid_hours_post': np.float64(522.1666666666666), 'pp_valid_hours': np.float64(981.5), 'pp_data_coverage': np.float64(0.12398938858009095), 'pp_invalid_bin_count': np.int64(8), 'uplift_noadj_frc': np.float64(0.026174088035614145), 'unc_one_sigma_noadj_frc': np.float64(0.00267454822464343), 'poweronly_uplift_frc': np.float64(0.023436708877978316), 'reversed_uplift_frc': np.float64(0.02571037263477149), 'reversal_error': np.float64(0.0022736637567931754), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011368318783965877), 'unc_one_sigma_bootstrap_frc': np.float64(0.005683971945706012), 'uplift_p5_frc': np.float64(0.03666022178439568), 'uplift_p95_frc': np.float64(0.017961618043625782), 'wind_up_version': '0.1.9', 'test_wtg': 'WT1', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(nan), 'test_max_ws_drift_pp_period': np.float64(nan), 'test_powercurve_shift': np.float64(0.07162130888044582), 'test_rpm_shift': np.float64(0.0), 'test_pitch_shift': np.float64(0.0), 'preprocess_warning_counts': 0, 'test_warning_counts': 2}\n",
+      "{'ref': 'WT2', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 1727.532358659697, 'bearing_deg': 80.01879303945559, 'ref_max_northing_error_v_reanalysis': np.float64(nan), 'ref_max_northing_error_v_wf': np.float64(nan), 'ref_max_ws_drift': np.float64(nan), 'ref_max_ws_drift_pp_period': np.float64(nan), 'ref_powercurve_shift': np.float64(-0.0072952693256543855), 'ref_rpm_shift': np.float64(0.0), 'ref_pitch_shift': np.float64(0.0), 'detrend_pre_r2_improvement': np.float64(2.1744130036194242e-05), 'detrend_post_r2_improvement': np.float64(4.176406496059215e-05), 'mean_power_pre': np.float64(736.9209528741912), 'mean_power_post': np.float64(886.8971224584385), 'mean_test_yaw_offset_pre': np.float64(0.0), 'mean_test_yaw_offset_post': np.float64(0.0), 'test_ref_warning_counts': 2, 'time_calculated': Timestamp('2024-09-10 14:56:31.754381+0000', tz='UTC'), 'uplift_frc': np.float64(0.02731091991401073), 'unc_one_sigma_frc': np.float64(0.005683971945706012), 't_value_one_sigma': np.float64(1.0001815211442238), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(459.33333333333337), 'pp_valid_hours_post': np.float64(522.1666666666666), 'pp_valid_hours': np.float64(981.5), 'pp_data_coverage': np.float64(0.12398938858009095), 'pp_invalid_bin_count': np.int64(8), 'uplift_noadj_frc': np.float64(0.026174088035614145), 'unc_one_sigma_noadj_frc': np.float64(0.00267454822464343), 'poweronly_uplift_frc': np.float64(0.023436708877978316), 'reversed_uplift_frc': np.float64(0.02571037263477149), 'reversal_error': np.float64(0.0022736637567931754), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011368318783965877), 'unc_one_sigma_bootstrap_frc': np.float64(0.005683971945706012), 'uplift_p5_frc': np.float64(0.03666022178439568), 'uplift_p95_frc': np.float64(0.017961618043625782), 'wind_up_version': '0.1.9', 'test_wtg': 'WT1', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(nan), 'test_max_ws_drift_pp_period': np.float64(nan), 'test_powercurve_shift': np.float64(0.07162130888044582), 'test_rpm_shift': np.float64(0.0), 'test_pitch_shift': np.float64(0.0), 'preprocess_warning_counts': 0, 'test_warning_counts': 2}\n",
+      "{'ref': 'WT2', 'ref_ws_col': 'ref_ws_est_blend', 'distance_m': 1727.532358659697, 'bearing_deg': 80.01879303945559, 'ref_max_northing_error_v_reanalysis': np.float64(nan), 'ref_max_northing_error_v_wf': np.float64(nan), 'ref_max_ws_drift': np.float64(nan), 'ref_max_ws_drift_pp_period': np.float64(nan), 'ref_powercurve_shift': np.float64(-0.0072952693256543855), 'ref_rpm_shift': np.float64(0.0), 'ref_pitch_shift': np.float64(0.0), 'detrend_pre_r2_improvement': np.float64(2.1744130036194242e-05), 'detrend_post_r2_improvement': np.float64(4.176406496059215e-05), 'mean_power_pre': np.float64(736.9209528741912), 'mean_power_post': np.float64(886.8971224584385), 'mean_test_yaw_offset_pre': np.float64(0.0), 'mean_test_yaw_offset_post': np.float64(0.0), 'test_ref_warning_counts': 2, 'time_calculated': Timestamp('2024-09-10 14:56:31.754381+0000', tz='UTC'), 'uplift_frc': np.float64(0.02731091991401073), 'unc_one_sigma_frc': np.float64(0.005683971945706012), 't_value_one_sigma': np.float64(1.0001815211442238), 'missing_bins_unc_scale_factor': 1, 'pp_valid_hours_pre': np.float64(459.33333333333337), 'pp_valid_hours_post': np.float64(522.1666666666666), 'pp_valid_hours': np.float64(981.5), 'pp_data_coverage': np.float64(0.12398938858009095), 'pp_invalid_bin_count': np.int64(8), 'uplift_noadj_frc': np.float64(0.026174088035614145), 'unc_one_sigma_noadj_frc': np.float64(0.00267454822464343), 'poweronly_uplift_frc': np.float64(0.023436708877978316), 'reversed_uplift_frc': np.float64(0.02571037263477149), 'reversal_error': np.float64(0.0022736637567931754), 'unc_one_sigma_lowerbound_frc': np.float64(0.0011368318783965877), 'unc_one_sigma_bootstrap_frc': np.float64(0.005683971945706012), 'uplift_p5_frc': np.float64(0.03666022178439568), 'uplift_p95_frc': np.float64(0.017961618043625782), 'wind_up_version': '0.1.9', 'test_wtg': 'WT1', 'test_pw_col': 'test_pw_clipped', 'lt_wtg_hours_raw': 0, 'lt_wtg_hours_filt': 0, 'test_max_ws_drift': np.float64(nan), 'test_max_ws_drift_pp_period': np.float64(nan), 'test_powercurve_shift': np.float64(0.07162130888044582), 'test_rpm_shift': np.float64(0.0), 'test_pitch_shift': np.float64(0.0), 'preprocess_warning_counts': 0, 'test_warning_counts': 2}\n",
+      "warning summary: preprocess_warning_counts=0, test_warning_counts=2, test_ref_warning_counts=2\n",
+      "warning summary: preprocess_warning_counts=0, test_warning_counts=2, test_ref_warning_counts=2\n",
+      "warning summary: preprocess_warning_counts=0, test_warning_counts=2, test_ref_warning_counts=2\n",
+      "warning summary: preprocess_warning_counts=0, test_warning_counts=2, test_ref_warning_counts=2\n",
+      "finished analysing WT1 WT2\n",
+      "\n",
+      "finished analysing WT1 WT2\n",
+      "\n",
+      "finished analysing WT1 WT2\n",
+      "\n",
+      "finished analysing WT1 WT2\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "results_per_test_ref_df = run_wind_up_analysis(assessment_inputs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "0c7901b1-d3dd-4571-b2b7-3849494d058b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>wind_up_version</th>\n",
+       "      <th>time_calculated</th>\n",
+       "      <th>preprocess_warning_counts</th>\n",
+       "      <th>test_warning_counts</th>\n",
+       "      <th>test_ref_warning_counts</th>\n",
+       "      <th>test_wtg</th>\n",
+       "      <th>test_pw_col</th>\n",
+       "      <th>ref</th>\n",
+       "      <th>ref_ws_col</th>\n",
+       "      <th>uplift_frc</th>\n",
+       "      <th>...</th>\n",
+       "      <th>poweronly_uplift_frc</th>\n",
+       "      <th>reversed_uplift_frc</th>\n",
+       "      <th>reversal_error</th>\n",
+       "      <th>lt_wtg_hours_raw</th>\n",
+       "      <th>lt_wtg_hours_filt</th>\n",
+       "      <th>test_max_ws_drift</th>\n",
+       "      <th>test_max_ws_drift_pp_period</th>\n",
+       "      <th>test_powercurve_shift</th>\n",
+       "      <th>test_rpm_shift</th>\n",
+       "      <th>test_pitch_shift</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.1.9</td>\n",
+       "      <td>2024-09-10 14:56:31.754381+00:00</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>WT1</td>\n",
+       "      <td>test_pw_clipped</td>\n",
+       "      <td>WT2</td>\n",
+       "      <td>ref_ws_est_blend</td>\n",
+       "      <td>0.027311</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.023437</td>\n",
+       "      <td>0.02571</td>\n",
+       "      <td>0.002274</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.071621</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>1 rows × 49 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  wind_up_version                  time_calculated  preprocess_warning_counts  \\\n",
+       "0           0.1.9 2024-09-10 14:56:31.754381+00:00                          0   \n",
+       "\n",
+       "   test_warning_counts  test_ref_warning_counts test_wtg      test_pw_col  \\\n",
+       "0                    2                        2      WT1  test_pw_clipped   \n",
+       "\n",
+       "   ref        ref_ws_col  uplift_frc  ...  poweronly_uplift_frc  \\\n",
+       "0  WT2  ref_ws_est_blend    0.027311  ...              0.023437   \n",
+       "\n",
+       "   reversed_uplift_frc  reversal_error  lt_wtg_hours_raw  lt_wtg_hours_filt  \\\n",
+       "0              0.02571        0.002274                 0                  0   \n",
+       "\n",
+       "   test_max_ws_drift  test_max_ws_drift_pp_period  test_powercurve_shift  \\\n",
+       "0                NaN                          NaN               0.071621   \n",
+       "\n",
+       "   test_rpm_shift  test_pitch_shift  \n",
+       "0             0.0               0.0  \n",
+       "\n",
+       "[1 rows x 49 columns]"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results_per_test_ref_df"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/wedowind_example.py b/examples/wedowind_example.py
index d82a9b0..16f69f5 100644
--- a/examples/wedowind_example.py
+++ b/examples/wedowind_example.py
@@ -4,6 +4,7 @@
 import math
 import sys
 import zipfile
+from enum import Enum
 from pathlib import Path
 
 import numpy as np
@@ -14,6 +15,7 @@
 from wind_up.interface import AssessmentInputs
 from wind_up.main_analysis import run_wind_up_analysis
 from wind_up.models import PlotConfig, WindUpConfig
+from wind_up.reanalysis_data import ReanalysisDataset
 from wind_up.wind_funcs import calc_cp
 
 sys.path.append(str(PROJECTROOT_DIR))
@@ -27,43 +29,88 @@
 PARENT_DIR = Path(__file__).parent
 ZIP_FILENAME = "Turbine_Upgrade_Dataset.zip"
 
+setup_logger(ANALYSIS_OUTPUT_DIR / "analysis.log")
+logger = logging.getLogger(__name__)
 
-def unpack_wedowind_scada(rated_power_kw: float, filename: str) -> pd.DataFrame:
-    with zipfile.ZipFile(CACHE_DIR / ZIP_FILENAME) as zf:
-        scada_df_raw = pd.read_csv(zf.open(filename), parse_dates=[1], index_col=0).drop(columns=["VcosD", "VsinD"])
-    scada_df_test = (
-        scada_df_raw.drop(columns=["y_ctrl(normalized)"])
-        .copy()
-        .assign(TurbineName="Test")
-        .rename(columns={"y_test(normalized)": "normalized_power"})
-    )
-    scada_df_ref = (
-        scada_df_raw.drop(columns=["y_test(normalized)"])
-        .copy()
-        .assign(TurbineName="Ref")
-        .rename(columns={"y_ctrl(normalized)": "normalized_power"})
-    )
-    scada_df = pd.concat([scada_df_test, scada_df_ref])
-    scada_df[DataColumns.active_power_mean] = scada_df["normalized_power"] * rated_power_kw
-    # map some mast data to the turbine for convenience
-    scada_df[DataColumns.wind_speed_mean] = scada_df["V"]
-    scada_df[DataColumns.yaw_angle_mean] = scada_df["D"]
-    # placeholder values for other required columns
-    scada_df[DataColumns.pitch_angle_mean] = 0
-    scada_df[DataColumns.gen_rpm_mean] = 1000
-    scada_df[DataColumns.shutdown_duration] = 0
-
-    scada_df = scada_df.set_index("time")
-    scada_df.index.name = TIMESTAMP_COL
-    # make index UTC
-    scada_df.index = scada_df.index.tz_localize("UTC")
-    return scada_df
-
-
-def make_wedowind_metadata_df() -> pd.DataFrame:
+
+class WeDoWindScadaColumns(Enum):
+    Y_CTRL_NORM = "y_ctrl(normalized)"
+    Y_TEST_NORM = "y_test(normalized)"
+
+
+class TurbineNames(Enum):
+    REF = "Ref"
+    TEST = "Test"
+
+
+class MetadataColumns(Enum):
+    NAME = "Name"
+    LATITUDE = "Latitude"
+    LONGITUDE = "Longitude"
+
+
+class WDWScadaUnpacker:
+    def __init__(self, scada_file_name: str, wdw_zip_file_path: Path = CACHE_DIR / ZIP_FILENAME) -> None:
+        self.scada_file_name = scada_file_name
+        self.wdw_zip_file_path = wdw_zip_file_path
+        self.scada_df = None
+
+    def unpack(self, rated_power_kw: float) -> pd.DataFrame:
+        if self.scada_df is None:
+            raw_df = self._read_raw_df()
+            scada_df_test = self._construct_scada_df_test(scada_df_raw=raw_df)
+            scada_df_ref = self._construct_scada_df_ref(scada_df_raw=raw_df)
+            self.scada_df = self._format_scada_df(
+                scada_df=pd.concat([scada_df_test, scada_df_ref]), rated_power_kw=rated_power_kw
+            )
+        return self.scada_df
+
+    def _read_raw_df(self) -> pd.DataFrame:
+        with zipfile.ZipFile(self.wdw_zip_file_path) as zf:
+            return pd.read_csv(zf.open(self.scada_file_name), parse_dates=[1], index_col=0).drop(
+                columns=["VcosD", "VsinD"]
+            )
+
+    @staticmethod
+    def _format_scada_df(scada_df: pd.DataFrame, rated_power_kw: float) -> pd.DataFrame:
+        scada_df[DataColumns.active_power_mean] = scada_df["normalized_power"] * rated_power_kw
+        # map some mast data to the turbine for convenience
+        scada_df[DataColumns.wind_speed_mean] = scada_df["V"]
+        scada_df[DataColumns.yaw_angle_mean] = scada_df["D"]
+        # placeholder values for other required columns
+        scada_df[DataColumns.pitch_angle_mean] = 0
+        scada_df[DataColumns.gen_rpm_mean] = 1000
+        scada_df[DataColumns.shutdown_duration] = 0
+
+        scada_df = scada_df.set_index("time")
+        scada_df.index.name = TIMESTAMP_COL
+        # make index UTC
+        scada_df.index = scada_df.index.tz_localize("UTC")
+        return scada_df
+
+    @staticmethod
+    def _construct_scada_df_test(scada_df_raw: pd.DataFrame) -> pd.DataFrame:
+        return (
+            scada_df_raw.drop(columns=[WeDoWindScadaColumns.Y_CTRL_NORM.value])
+            .copy()
+            .assign(TurbineName=TurbineNames.TEST.value)
+            .rename(columns={WeDoWindScadaColumns.Y_TEST_NORM.value: "normalized_power"})
+        )
+
+    @staticmethod
+    def _construct_scada_df_ref(scada_df_raw: pd.DataFrame) -> pd.DataFrame:
+        return (
+            scada_df_raw.drop(columns=[WeDoWindScadaColumns.Y_TEST_NORM.value])
+            .copy()
+            .assign(TurbineName=TurbineNames.REF.value)
+            .rename(columns={WeDoWindScadaColumns.Y_CTRL_NORM.value: "normalized_power"})
+        )
+
+
+def make_wdw_metadata_df() -> pd.DataFrame:
     coords_df = pd.DataFrame(
         {
-            "Name": ["WT1", "WT2", "WT3", "WT4", "MAST1", "MAST2"],
+            MetadataColumns.NAME.value: ["WT1", "WT2", "WT3", "WT4", "MAST1", "MAST2"],
             "X": [500, 2200, 9836, 7571, 0, 9571],
             "Y": [9136, 9436, 0, 2050, 9836, 50],
         }
@@ -71,112 +118,131 @@ def make_wedowind_metadata_df() -> pd.DataFrame:
     assumed_wf_lat = 40
     assumed_wf_lon = -89
     m_per_deglat = 40_075_000 / 360
-    coords_df["Latitude"] = assumed_wf_lat + (coords_df["Y"] - coords_df["Y"].mean()) / m_per_deglat
-    coords_df["Longitude"] = assumed_wf_lon + (coords_df["X"] - coords_df["X"].mean()) / (
+    coords_df[MetadataColumns.LATITUDE.value] = assumed_wf_lat + (coords_df["Y"] - coords_df["Y"].mean()) / m_per_deglat
+    coords_df[MetadataColumns.LONGITUDE.value] = assumed_wf_lon + (coords_df["X"] - coords_df["X"].mean()) / (
         m_per_deglat * math.cos(assumed_wf_lat * math.pi / 180)
     )
-    return coords_df.loc[:, ["Name", "Latitude", "Longitude"]].assign(
-        TimeZone="UTC", TimeSpanMinutes=10, TimeFormat="Start"
-    )
+    return coords_df.loc[
+        :, [MetadataColumns.NAME.value, MetadataColumns.LATITUDE.value, MetadataColumns.LONGITUDE.value]
+    ].assign(TimeZone="UTC", TimeSpanMinutes=10, TimeFormat="Start")
 
 
-if __name__ == "__main__":
-    setup_logger(ANALYSIS_OUTPUT_DIR / "analysis.log")
-    logger = logging.getLogger(__name__)
+def run_custom_plots(scada_df: pd.DataFrame, assumed_rated_power_kw: float, rotor_diameter_m: int) -> Path:
+    """
+    It is unclear how the scada data is related to the metadata so look for wakes in the data
 
-    logger.info("Downloading example data from Zenodo")
-    download_zenodo_data(record_id="5516556", output_dir=CACHE_DIR, filenames={ZIP_FILENAME})
-    download_zenodo_data(
-        record_id="5516552", output_dir=CACHE_DIR, filenames={"Inland_Offshore_Wind_Farm_Dataset1.zip"}
-    )
-    assumed_rated_power_kw = 1500
-    rotor_diameter_m = 80
-    cutout_ws_mps = 20
+    Returns: None (but displays plots)
 
-    filename = "Turbine Upgrade Dataset(Pitch Angle Pair).csv"  # or Turbine Upgrade Dataset(VG Pair).csv
-    logger.info("Preprocessing turbine SCADA data")
-    scada_df = unpack_wedowind_scada(rated_power_kw=assumed_rated_power_kw, filename=filename)
-    metadata_df = make_wedowind_metadata_df()
-
-    # it is unclear how the scada data is related to the metadata so look for wakes in the data
-    make_custom_plots = True
-    if make_custom_plots:
-        (ANALYSIS_OUTPUT_DIR / "custom_plots").mkdir(exist_ok=True, parents=True)
-        (ANALYSIS_OUTPUT_DIR / "custom_plots" / "timeseries").mkdir(exist_ok=True)
-        for name, df in scada_df.groupby("TurbineName"):
-            for col in df.columns:
-                plt.figure()
-                plt.scatter(df.index, df[col], s=1)
-                title = f"{name} {col}"
-                plt.xlabel(TIMESTAMP_COL)
-                plt.ylabel(col)
-                plt.xticks(rotation=90)
-                plt.grid()
-                plt.tight_layout()
-                plt.savefig(ANALYSIS_OUTPUT_DIR / "custom_plots" / "timeseries" / f"{title}.png")
-                plt.close()
-
-        region2_df = scada_df[(scada_df["normalized_power"] > 0.2) & (scada_df["normalized_power"] < 0.8)]  # noqa PLR2004
-        binned_by_turbine = {}
-        for name, df in region2_df.groupby("TurbineName"):
-            if name == "Mast":
-                continue
-            # find mean normalized_power and V binned by D
-            _df = df.copy()
-            _df["D_bin"] = pd.cut(_df["D"], bins=range(0, 361, 5))
-            binned = _df.groupby("D_bin", observed=False)[["D", "normalized_power", "V"]].mean()
-            binned_by_turbine[name] = binned
+    """
+    custom_plots_dir_root = ANALYSIS_OUTPUT_DIR / "custom_plots"
+    custom_plots_dir_timeseries = custom_plots_dir_root / "timeseries"
+
+    custom_plots_dir_root.mkdir(exist_ok=True, parents=True)
+    custom_plots_dir_timeseries.mkdir(exist_ok=True)
+
+    for name, df in scada_df.groupby("TurbineName"):
+        for col in df.columns:
             plt.figure()
-            plt.plot(
-                binned["D"],
-                calc_cp(
-                    power_kw=binned["normalized_power"] * assumed_rated_power_kw,
-                    ws_ms=binned["V"],
-                    air_density_kgpm3=1.2,
-                    rotor_diameter_m=rotor_diameter_m,
-                ),
-                marker=".",
-            )
-            title = f"{name} Cp vs D"
-            plt.title(title)
-            plt.xlabel("D")
-            plt.ylabel("Cp")
+            plt.scatter(df.index, df[col], s=1)
+            title = f"{name} {col}"
+            plt.xlabel(TIMESTAMP_COL)
+            plt.ylabel(col)
             plt.xticks(rotation=90)
             plt.grid()
             plt.tight_layout()
-            plt.savefig(ANALYSIS_OUTPUT_DIR / "custom_plots" / f"{title}.png")
+            plt.savefig(custom_plots_dir_timeseries / f"{title}.png")
             plt.close()
 
+    region2_df = scada_df[(scada_df["normalized_power"] > 0.2) & (scada_df["normalized_power"] < 0.8)]  # noqa PLR2004
+
+    binned_by_turbine = {}
+    for name, df in region2_df.groupby("TurbineName"):
+        if name == "Mast":
+            continue
+        # find mean normalized_power and V binned by D
+        _df = df.copy()
+        _df["D_bin"] = pd.cut(_df["D"], bins=range(0, 361, 5))
+        binned = _df.groupby("D_bin", observed=False)[["D", "normalized_power", "V"]].mean()
+        binned_by_turbine[name] = binned
         plt.figure()
-        for name, binned in binned_by_turbine.items():
-            plt.plot(
-                binned["D"],
-                calc_cp(
-                    power_kw=binned["normalized_power"] * assumed_rated_power_kw,
-                    ws_ms=binned["V"],
-                    air_density_kgpm3=1.2,
-                    rotor_diameter_m=rotor_diameter_m,
-                ),
-                label=name,
-                marker=".",
-            )
-        plt.ylim(0.2, 0.7)
-        title = "Cp vs D"
+        plt.plot(
+            binned["D"],
+            calc_cp(
+                power_kw=binned["normalized_power"] * assumed_rated_power_kw,
+                ws_ms=binned["V"],
+                air_density_kgpm3=1.2,
+                rotor_diameter_m=rotor_diameter_m,
+            ),
+            marker=".",
+        )
+        title = f"{name} Cp vs D"
         plt.title(title)
         plt.xlabel("D")
         plt.ylabel("Cp")
         plt.xticks(rotation=90)
         plt.grid()
         plt.tight_layout()
-        plt.savefig(ANALYSIS_OUTPUT_DIR / "custom_plots" / f"{title}.png")
+        plt.savefig(custom_plots_dir_root / f"{title}.png")
         plt.close()
 
+    plt.figure()
+    for name, binned in binned_by_turbine.items():
+        plt.plot(
+            binned["D"],
+            calc_cp(
+                power_kw=binned["normalized_power"] * assumed_rated_power_kw,
+                ws_ms=binned["V"],
+                air_density_kgpm3=1.2,
+                rotor_diameter_m=rotor_diameter_m,
+            ),
+            label=name,
+            marker=".",
+        )
+    plt.ylim(0.2, 0.7)
+    title = "Cp vs D"
+    plt.title(title)
+    plt.xlabel("D")
+    plt.ylabel("Cp")
+    plt.xticks(rotation=90)
+    plt.grid()
+    plt.tight_layout()
+    plt.savefig(custom_plots_dir_root / f"{title}.png")
+    plt.close()
+
+    logger.info("Custom plots saved to directory: %s", custom_plots_dir_root)
+    return custom_plots_dir_root
+
+
+def download_wdw_data_from_zenodo() -> None:
+    logger.info("Downloading example data from Zenodo")
+    download_zenodo_data(record_id="5516556", output_dir=CACHE_DIR, filenames={ZIP_FILENAME})
+    download_zenodo_data(
+        record_id="5516552", output_dir=CACHE_DIR, filenames={"Inland_Offshore_Wind_Farm_Dataset1.zip"}
+    )
+
+
+def main() -> None:
+    download_wdw_data_from_zenodo()
+
+    assumed_rated_power_kw = 1500
+    rotor_diameter_m = 80
+    cutout_ws_mps = 20
+    scada_file_name = "Turbine Upgrade Dataset(Pitch Angle Pair).csv"  # or Turbine Upgrade Dataset(VG Pair).csv
+
+    logger.info("Preprocessing turbine SCADA data")
+    scada_df = WDWScadaUnpacker(scada_file_name=scada_file_name).unpack(rated_power_kw=assumed_rated_power_kw)
+    metadata_df = make_wdw_metadata_df()
+
+    run_custom_plots(
+        scada_df=scada_df, assumed_rated_power_kw=assumed_rated_power_kw, rotor_diameter_m=rotor_diameter_m
+    )
+
     # based on the above I think the objects are MAST1, test=WT1 and ref=WT2
-    scada_df = scada_df.replace({"TurbineName": {"Test": "WT1", "Ref": "WT2", "Mast": "MAST1"}})
+    scada_df = scada_df.replace(
+        {"TurbineName": {TurbineNames.TEST.value: "WT1", TurbineNames.REF.value: "WT2", "Mast": "MAST1"}}
+    )
     # drop everything except the turbines from the metadata
     metadata_df = metadata_df[metadata_df["Name"].isin(["WT1", "WT2"])]
-    # make up reanalysis for now
-    from wind_up.reanalysis_data import ReanalysisDataset
 
     rng = np.random.default_rng(0)
     rows = 100
@@ -191,6 +257,8 @@ def make_wedowind_metadata_df() -> pd.DataFrame:
         ),
     )
 
+    # Construct wind-up Configurations
+
     wtg_map = {
         x: {
             "name": x,
@@ -246,9 +314,9 @@ def make_wedowind_metadata_df() -> pd.DataFrame:
         },
         optimize_northing_corrections=False,
     )
-    msg = f"{cfg.out_dir=}"
-    logger.info(msg)
+
     plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / "plots")
+
     assessment_inputs = AssessmentInputs.from_cfg(
         cfg=cfg,
         plot_cfg=plot_cfg,
@@ -257,4 +325,10 @@ def make_wedowind_metadata_df() -> pd.DataFrame:
         reanalysis_datasets=[reanalysis_dataset],
         cache_dir=CACHE_DIR,
     )
-    results_per_test_ref_df = run_wind_up_analysis(assessment_inputs)
+
+    # Run Analysis
+    results_per_test_ref_df = run_wind_up_analysis(assessment_inputs)  # noqa: F841
+
+
+if __name__ == "__main__":
+    main()

	upgrade status	V	D	rho	S	I	normalized_power	TurbineName	ActivePowerMean	WindSpeedMean	YawAngleMean	PitchAngleMean	GenRpmMean	ShutdownDuration
TimeStamp_StartFormat
2010-07-30 22:40:00+00:00	0	7.96	138.9	1.140224	0.266512	0.090452	0.393152	Test	589.727273	7.96	138.9	0	1000	0
2010-07-30 22:50:00+00:00	0	8.19	140.6	1.140522	0.286167	0.083028	0.457455	Test	686.181817	8.19	140.6	0	1000	0
2010-07-30 23:00:00+00:00	0	7.20	139.3	1.140771	0.339321	0.098611	0.382121	Test	573.181818	7.20	139.3	0	1000	0
2010-07-30 23:10:00+00:00	0	6.81	137.4	1.141186	0.375815	0.101322	0.282182	Test	423.272727	6.81	137.4	0	1000	0
2010-07-30 23:20:00+00:00	0	5.09	137.5	1.141464	0.303472	0.165029	0.127212	Test	190.818182	5.09	137.5	0	1000	0
	Name	Latitude	Longitude	TimeZone	TimeSpanMinutes	TimeFormat
0	WT1	40.036394	-89.052141	UTC	10	Start
1	WT2	40.039089	-89.032205	UTC	10	Start
2	WT3	39.954324	-88.942660	UTC	10	Start
3	WT4	39.972739	-88.969221	UTC	10	Start
4	MAST1	40.042682	-89.058004	UTC	10	Start
	100_m_hws_mean_mps	100_m_hwd_mean_deg-n_true
2010-07-30 22:40:00+00:00	8.184808	172.795653
2010-07-30 23:40:00+00:00	6.348934	83.654251
2010-07-31 00:40:00+00:00	5.204868	288.677008
2010-07-31 01:40:00+00:00	5.082638	332.470858
2010-07-31 02:40:00+00:00	9.066351	95.806898