Skip to content

Commit

Permalink
smarteole and kelmarsh examples (#1)
Browse files Browse the repository at this point in the history
* smarteole and kelmarsh examples WIP

* Update kelmarsh_example.py

use exclusion_periods_utc to exclude obviously curtailed times

* update __init__.py and version to 0.1.0

* update tool.setuptools.packages.find

* update __init__.py

* fix __init__.py format

* try to fix CI

* try to fix CI
  • Loading branch information
aclerc authored Apr 24, 2024
1 parent 49d16e3 commit 30c2213
Show file tree
Hide file tree
Showing 12 changed files with 537 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
run: |
python -m venv .venv
source .venv/bin/activate
pip install -r dev-requirements.txt
pip install -r dev-requirements.txt .
- name: "lint-check & test"
run: |
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,5 @@ dmypy.json

.venv/
__pycache__/
.pyc
cache/
output/
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ python -m venv .venv
# activate the virtual environment
source .venv/Scripts/activate # or .venv/bin/activate on linux
# install the wind-up package
pip install -r requirement.txt
pip install -r requirements.txt
```

See `examples` folder for analysis examples
Expand Down
Binary file not shown.
Binary file not shown.
Empty file added examples/__init__.py
Empty file.
46 changes: 46 additions & 0 deletions examples/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import logging
from collections.abc import Collection
from pathlib import Path

import requests

logger = logging.getLogger(__name__)

BYTES_IN_MB = 1024 * 1024


def download_zenodo_data(
record_id: str, output_dir: Path, filenames: Collection[str] | None = None, *, cache_overwrite: bool = False
) -> list[Path]:
output_dir.mkdir(parents=True, exist_ok=True)
r = requests.get(f"https://zenodo.org/api/records/{record_id}", timeout=10)
r.raise_for_status()

files_to_download = r.json()["files"]
if filenames is not None:
files_to_download = [i for i in files_to_download if i["key"] in set(filenames)]
if len(files_to_download) != len(filenames):
msg = (
f"Could not find all files in the Zenodo record. "
f"Missing files: {set(filenames) - {i['key'] for i in files_to_download} }"
)
raise ValueError(msg)

filepaths = []
for file_to_download in files_to_download:
dst_fpath = output_dir / file_to_download["key"]
if not dst_fpath.exists() or cache_overwrite:
logger.info("Beginning file download from Zenodo...")
filesize = file_to_download["size"] / BYTES_IN_MB
result = requests.get(file_to_download["links"]["self"], stream=True, timeout=10)
chunk_number = 0
with Path.open(dst_fpath, "wb") as f:
for chunk in result.iter_content(chunk_size=BYTES_IN_MB):
chunk_number = chunk_number + 1
print(f"{chunk_number} out of {filesize:.2f} MB downloaded", end="\r")
f.write(chunk)
else:
logger.info(f"File {dst_fpath} already exists. Skipping download.")
filepaths.append(dst_fpath)

return filepaths
164 changes: 164 additions & 0 deletions examples/kelmarsh_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import logging
import zipfile
from pathlib import Path

import pandas as pd

from examples.helpers import download_zenodo_data
from wind_up.caching import with_parquet_cache
from wind_up.constants import OUTPUT_DIR, PROJECTROOT_DIR, TIMESTAMP_COL, DataColumns
from wind_up.interface import AssessmentInputs
from wind_up.main_analysis import run_wind_up_analysis
from wind_up.models import PlotConfig, WindUpConfig
from wind_up.reanalysis_data import ReanalysisDataset

logging.basicConfig(level=logging.INFO)

CACHE_FLD = PROJECTROOT_DIR / "cache" / "kelmarsh_example_data"
TURBINE_METADATA_FILENAME = "Kelmarsh_WT_static.csv"
SCADA_DATA_FILENAME = "Kelmarsh_SCADA_2022_4457.zip"
PARENT_DIR = Path(__file__).parent


@with_parquet_cache(CACHE_FLD / "_kelmarsh_scada.parquet")
def _unpack_scada() -> pd.DataFrame:
turbine_dfs = []

# unzipping the data in memory and only reading the relevant files
with zipfile.ZipFile(CACHE_FLD / SCADA_DATA_FILENAME) as zf:
for inner_file in zf.filelist:
if not inner_file.filename.startswith("Turbine"):
continue
turbine_name = f'KWF{inner_file.filename.split("_")[3]}'
turbine_dfs.append(pd.read_csv(zf.open(inner_file.filename), skiprows=9).assign(turbine_name=turbine_name))

# reshaping the turbine data to a single standard dataframe format
return (
pd.concat(turbine_dfs)
.rename(
columns={
"Wind speed (m/s)": DataColumns.wind_speed_mean,
"Wind speed, Standard deviation (m/s)": DataColumns.wind_speed_sd,
"Power (kW)": DataColumns.active_power_mean,
"Power, Standard deviation (kW)": DataColumns.active_power_sd,
"Nacelle ambient temperature (°C)": DataColumns.ambient_temp,
"Generator RPM (RPM)": DataColumns.gen_rpm_mean,
"Blade angle (pitch position) A (°)": DataColumns.pitch_angle_mean,
"Yaw bearing angle (°)": DataColumns.yaw_angle_mean,
"Yaw bearing angle, Max (°)": DataColumns.yaw_angle_max,
"Yaw bearing angle, Min (°)": DataColumns.yaw_angle_min,
"turbine_name": DataColumns.turbine_name,
}
)
.assign(
**{
DataColumns.shutdown_duration: lambda d: (1 - d["Time-based System Avail."]) * 600,
TIMESTAMP_COL: lambda d: pd.to_datetime(d["# Date and time"], utc=True),
}
)
.set_index(TIMESTAMP_COL)
.loc[:, DataColumns.all()]
)


@with_parquet_cache(CACHE_FLD / "_kelmarsh_metadata.parquet")
def _unpack_metadata() -> pd.DataFrame:
md_fpath = CACHE_FLD / "Kelmarsh_WT_static.csv"
return (
pd.read_csv(md_fpath, index_col=0)
.reset_index()
.rename(columns={"Alternative Title": "Name"})
.loc[:, ["Name", "Latitude", "Longitude"]]
.assign(TimeZone="UTC", TimeSpanMinutes=10, TimeFormat="Start")
)


if __name__ == "__main__":
download_zenodo_data(
record_id="8252025",
output_dir=CACHE_FLD,
filenames={TURBINE_METADATA_FILENAME, SCADA_DATA_FILENAME},
)

turbine_map = {
row["Alternative Title"]: {
"name": row["Alternative Title"],
"turbine_type": {
"turbine_type": "Senvion-MM92-2050",
"rotor_diameter_m": 92.0,
"rated_power_kw": 2050.0,
"cutout_ws_mps": 25,
"normal_operation_pitch_range": (-10.0, 35.0),
"normal_operation_genrpm_range": (250.0, 2000.0),
"rpm_v_pw_margin_factor": 0.05,
"pitch_to_stall": False,
},
}
for _, row in pd.read_csv(CACHE_FLD / "Kelmarsh_WT_static.csv").iterrows()
}

metadata_df = _unpack_metadata()
turbine_comb_df = _unpack_scada()

pre_first_dt_utc_start = turbine_comb_df.index.min().tz_localize("UTC")
post_last_dt_utc_start = turbine_comb_df.index.max().tz_localize("UTC")
post_first_dt_utc_start = pre_first_dt_utc_start + (post_last_dt_utc_start - pre_first_dt_utc_start) / 2
pre_last_dt_utc_start = post_first_dt_utc_start - pd.Timedelta(minutes=10)

cfg = WindUpConfig(
assessment_name="kelmarsh-example",
asset={
"name": "Kelmarsh",
"wtgs": list(turbine_map.values()),
},
test_wtgs=[turbine_map["KWF1"]],
ref_wtgs=[turbine_map["KWF2"]],
out_dir=OUTPUT_DIR / "kelmarsh-example",
analysis_first_dt_utc_start=pre_first_dt_utc_start,
upgrade_first_dt_utc_start=post_first_dt_utc_start,
analysis_last_dt_utc_start=post_last_dt_utc_start,
lt_first_dt_utc_start=pre_first_dt_utc_start,
lt_last_dt_utc_start=post_last_dt_utc_start,
detrend_first_dt_utc_start=pre_first_dt_utc_start,
detrend_last_dt_utc_start=pre_last_dt_utc_start,
years_offset_for_pre_period=0,
years_for_lt_distribution=0,
years_for_detrend=0,
ws_bin_width=1.0,
prepost={
"pre_first_dt_utc_start": pre_first_dt_utc_start,
"pre_last_dt_utc_start": pre_last_dt_utc_start,
"post_first_dt_utc_start": post_first_dt_utc_start,
"post_last_dt_utc_start": post_last_dt_utc_start,
},
optimize_northing_corrections=False,
northing_corrections_utc=[
("KWF1", pd.Timestamp("2022-01-01 00:00:00+0000"), 9.090411376952998),
("KWF1", pd.Timestamp("2022-04-23 10:30:00+0000"), 5.755374908447257),
("KWF2", pd.Timestamp("2022-01-01 00:00:00+0000"), 4.068655395508082),
("KWF3", pd.Timestamp("2022-01-01 00:00:00+0000"), 1.8756744384765625),
("KWF4", pd.Timestamp("2022-01-01 00:00:00+0000"), 7.840148925780969),
("KWF5", pd.Timestamp("2022-01-01 00:00:00+0000"), 11.577139806747734),
("KWF6", pd.Timestamp("2022-01-01 00:00:00+0000"), 4.946038818359088),
],
exclusion_periods_utc=[
("ALL", pd.Timestamp("2022-09-30 14:20:00+0000"), pd.Timestamp("2022-09-30 17:50:00+0000")),
("ALL", pd.Timestamp("2022-10-05 01:10:00+0000"), pd.Timestamp("2022-10-07 14:30:00+0000")),
],
)
plot_cfg = PlotConfig(show_plots=False, save_plots=True, plots_dir=cfg.out_dir / "plots")

reanalysis_dataset = ReanalysisDataset(
id="ERA5T_52.50N_-1.00E_100m_1hr",
data=pd.read_parquet(PARENT_DIR / "ERA5T_52.50N_-1.00E_100m_1hr_2022.parquet"),
)
assessment_inputs = AssessmentInputs.from_cfg(
cfg=cfg,
plot_cfg=plot_cfg,
scada_df=turbine_comb_df,
metadata_df=metadata_df,
toggle_df=None,
reanalysis_datasets=[reanalysis_dataset],
cache_dir=CACHE_FLD,
)
results_per_test_ref_df = run_wind_up_analysis(assessment_inputs)
Loading

0 comments on commit 30c2213

Please sign in to comment.