-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
v0.2.2
- Loading branch information
Showing
21 changed files
with
912 additions
and
457 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# import packages\n", | ||
"import pandas as pd\n", | ||
"import os\n", | ||
"import zipfile\n", | ||
"\n", | ||
"%reload_ext autoreload\n", | ||
"%autoreload 2\n", | ||
"\n", | ||
"# # Tell python where to look for modules.\n", | ||
"import sys\n", | ||
"sys.path.append('../../../open-grid-emissions/src/')\n", | ||
"\n", | ||
"import load_data\n", | ||
"from column_checks import get_dtypes\n", | ||
"from filepaths import *\n", | ||
"\n", | ||
"\n", | ||
"year = 2021\n", | ||
"path_prefix = f\"{year}/\"" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## About this notebook\n", | ||
"This notebook can be used to identify differences between one version of OGE data and another. \n", | ||
"This is useful if you want to identify how much a code update affects the output results.\n", | ||
"\n", | ||
"This notebook compares files in the `outputs` and `results` directory against archived data in the `zenodo` or `s3_upload` directories. \n", | ||
"This assumes that the previous, stable version of the data outputs are archived on your computer." | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Compare plant data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# load archived data\n", | ||
"data_type = \"plant_data\"\n", | ||
"resolution = \"annual\"\n", | ||
"\n", | ||
"# unzip archived data\n", | ||
"if not os.path.exists(data_folder(\"diff\")):\n", | ||
" os.mkdir(data_folder(\"diff\"))\n", | ||
"with zipfile.ZipFile(data_folder(f\"s3_upload/{year}_{data_type}_{resolution}_us_units.zip\"), \"r\") as zip_to_unzip:\n", | ||
" zip_to_unzip.extractall(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\"))\n", | ||
"\n", | ||
"# load archived data\n", | ||
"prev_data = pd.read_csv(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units/plant_data.csv\"), dtype=get_dtypes()).round(0)\n", | ||
"\n", | ||
"# load new data\n", | ||
"new_data = pd.read_csv(results_folder(f\"{year}/{data_type}/{resolution}/us_units/plant_data.csv\"), dtype=get_dtypes()).round(0)\n", | ||
"\n", | ||
"# load plant attributes\n", | ||
"plant_attributes = pd.read_csv(outputs_folder(f\"{year}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes())\n", | ||
"\n", | ||
"prev_data = prev_data.merge(plant_attributes[[\"plant_id_eia\",\"ba_code\",\"fuel_category\"]], how=\"left\", on=\"plant_id_eia\")\n", | ||
"new_data = new_data.merge(plant_attributes[[\"plant_id_eia\",\"ba_code\",\"fuel_category\"]], how=\"left\", on=\"plant_id_eia\")\n", | ||
"\n", | ||
"key_cols = [\"plant_id_eia\",\"ba_code\",\"fuel_category\"]\n", | ||
"comparison = prev_data.set_index(key_cols).compare(new_data.set_index(key_cols), result_names=(\"previous\",\"new\"))\n", | ||
"\n", | ||
"# get difference\n", | ||
"diff = comparison.groupby(level=0, axis=1).diff().rename(columns={\"new\":\"pct_diff\"}).drop(columns=[\"previous\"], level=1)\n", | ||
"comparison = pd.concat([comparison, diff], axis=1).sort_index(axis=1, level=0, ascending=True, sort_remaining=False)\n", | ||
"comparison.iloc[:, comparison.columns.get_level_values(1)=='pct_diff'] = (comparison.iloc[:, comparison.columns.get_level_values(1)=='pct_diff'].values / comparison.iloc[:, comparison.columns.get_level_values(1)=='previous'].values).round(2)\n", | ||
"\n", | ||
"comparison\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"comparison[comparison.loc[:,(\"co2_mass_lb_for_electricity\",\"pct_diff\")] > 0.001]#.groupby(\"ba_code\").sum().sum()" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Compare BA data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# load archived data\n", | ||
"data_type = \"power_sector_data\"\n", | ||
"resolution = \"annual\"\n", | ||
"\n", | ||
"# unzip archived data\n", | ||
"if not os.path.exists(data_folder(\"diff\")):\n", | ||
" os.mkdir(data_folder(\"diff\"))\n", | ||
"with zipfile.ZipFile(data_folder(f\"s3_upload/{year}_{data_type}_{resolution}_us_units.zip\"), \"r\") as zip_to_unzip:\n", | ||
" zip_to_unzip.extractall(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\"))\n", | ||
"\n", | ||
"# load archived data\n", | ||
"prev_data = []\n", | ||
"for ba in os.listdir(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\")):\n", | ||
" df = pd.read_csv(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units/{ba}\"), dtype=get_dtypes())\n", | ||
" df[\"ba_code\"] = ba.split(\".\")[0]\n", | ||
" prev_data.append(df)\n", | ||
"\n", | ||
"prev_data = pd.concat(prev_data, axis=0).reset_index(drop=True)\n", | ||
"\n", | ||
"# load data\n", | ||
"new_data = []\n", | ||
"for ba in os.listdir(results_folder(f\"{year}/{data_type}/{resolution}/us_units\")):\n", | ||
" df = pd.read_csv(results_folder(f\"{year}/{data_type}/{resolution}/us_units/{ba}\"), dtype=get_dtypes())\n", | ||
" df[\"ba_code\"] = ba.split(\".\")[0]\n", | ||
" new_data.append(df)\n", | ||
"\n", | ||
"new_data = pd.concat(new_data, axis=0).reset_index(drop=True)\n", | ||
"\n", | ||
"key_cols = [\"ba_code\", \"fuel_category\"]\n", | ||
"comparison = prev_data.set_index(key_cols).compare(new_data.set_index(key_cols), result_names=(\"previous\",\"new\"))\n", | ||
"comparison\n" | ||
] | ||
}, | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Compare intermediate outputs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# load archived data\n", | ||
"file = \"cems_cleaned\"\n", | ||
"key_cols = [\"plant_id_eia\",\"emissions_unit_id_epa\",\"datetime_utc\"]\n", | ||
"\n", | ||
"# unzip archived data\n", | ||
"if not os.path.exists(data_folder(f\"diff/outputs_{year}\")):\n", | ||
" os.mkdir(data_folder(f\"diff/outputs_{year}\"))\n", | ||
" with zipfile.ZipFile(data_folder(f\"zenodo/outputs_{year}.zip\"), \"r\") as zip_to_unzip:\n", | ||
" zip_to_unzip.extractall(data_folder(f\"diff/outputs_{year}\"))\n", | ||
"\n", | ||
"# load archived data\n", | ||
"prev_data = pd.read_csv(data_folder(f\"diff/outputs_{year}/{file}_{year}.csv\"), dtype=get_dtypes())\n", | ||
"\n", | ||
"# load new data\n", | ||
"new_data = pd.read_csv(outputs_folder(f\"{year}/{file}_{year}.csv\"), dtype=get_dtypes())\n", | ||
"\n", | ||
"comparison = prev_data.set_index(key_cols).compare(new_data.set_index(key_cols), result_names=(\"previous\",\"new\"))\n", | ||
"comparison\n" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "open_grid_emissions", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.10.9" | ||
}, | ||
"orig_nbformat": 4, | ||
"vscode": { | ||
"interpreter": { | ||
"hash": "25e36f192ecdbe5da57d9bea009812e7b11ef0e0053366a845a2802aae1b29d2" | ||
} | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.