Skip to content

Commit

Permalink
OGE v.0.2.2
Browse files Browse the repository at this point in the history
v0.2.2
  • Loading branch information
grgmiller authored Mar 2, 2023
2 parents 1477ac5 + f05729c commit bb0c032
Show file tree
Hide file tree
Showing 21 changed files with 912 additions and 457 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ data/*

example/.ipynb_checkpoints/
test/__pycache__/
test/*.txt
src/__pycache__/

CHANGELOG.md

notebooks/visualization/outputs/*

# Python
# Python
notebooks/.ipynb_checkpoints
notebooks/*/.ipynb_checkpoints
.hypothesis/
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ dependencies:
- sqlalchemy
- sqlite # used for pudl
- statsmodels
- coloredlogs # used for prettier logging

- pip:
# --editable ../pudl #NOTE: this is for development use
Expand Down
208 changes: 208 additions & 0 deletions notebooks/validation/diff_output_versions.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import packages\n",
"import pandas as pd\n",
"import os\n",
"import zipfile\n",
"\n",
"%reload_ext autoreload\n",
"%autoreload 2\n",
"\n",
"# # Tell python where to look for modules.\n",
"import sys\n",
"sys.path.append('../../../open-grid-emissions/src/')\n",
"\n",
"import load_data\n",
"from column_checks import get_dtypes\n",
"from filepaths import *\n",
"\n",
"\n",
"year = 2021\n",
"path_prefix = f\"{year}/\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## About this notebook\n",
"This notebook can be used to identify differences between one version of OGE data and another. \n",
"This is useful if you want to identify how much a code update affects the output results.\n",
"\n",
"This notebook compares files in the `outputs` and `results` directory against archived data in the `zenodo` or `s3_upload` directories. \n",
"This assumes that the previous, stable version of the data outputs are archived on your computer."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compare plant data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load archived data\n",
"data_type = \"plant_data\"\n",
"resolution = \"annual\"\n",
"\n",
"# unzip archived data\n",
"if not os.path.exists(data_folder(\"diff\")):\n",
" os.mkdir(data_folder(\"diff\"))\n",
"with zipfile.ZipFile(data_folder(f\"s3_upload/{year}_{data_type}_{resolution}_us_units.zip\"), \"r\") as zip_to_unzip:\n",
" zip_to_unzip.extractall(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\"))\n",
"\n",
"# load archived data\n",
"prev_data = pd.read_csv(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units/plant_data.csv\"), dtype=get_dtypes()).round(0)\n",
"\n",
"# load new data\n",
"new_data = pd.read_csv(results_folder(f\"{year}/{data_type}/{resolution}/us_units/plant_data.csv\"), dtype=get_dtypes()).round(0)\n",
"\n",
"# load plant attributes\n",
"plant_attributes = pd.read_csv(outputs_folder(f\"{year}/plant_static_attributes_{year}.csv\"), dtype=get_dtypes())\n",
"\n",
"prev_data = prev_data.merge(plant_attributes[[\"plant_id_eia\",\"ba_code\",\"fuel_category\"]], how=\"left\", on=\"plant_id_eia\")\n",
"new_data = new_data.merge(plant_attributes[[\"plant_id_eia\",\"ba_code\",\"fuel_category\"]], how=\"left\", on=\"plant_id_eia\")\n",
"\n",
"key_cols = [\"plant_id_eia\",\"ba_code\",\"fuel_category\"]\n",
"comparison = prev_data.set_index(key_cols).compare(new_data.set_index(key_cols), result_names=(\"previous\",\"new\"))\n",
"\n",
"# get difference\n",
"diff = comparison.groupby(level=0, axis=1).diff().rename(columns={\"new\":\"pct_diff\"}).drop(columns=[\"previous\"], level=1)\n",
"comparison = pd.concat([comparison, diff], axis=1).sort_index(axis=1, level=0, ascending=True, sort_remaining=False)\n",
"comparison.iloc[:, comparison.columns.get_level_values(1)=='pct_diff'] = (comparison.iloc[:, comparison.columns.get_level_values(1)=='pct_diff'].values / comparison.iloc[:, comparison.columns.get_level_values(1)=='previous'].values).round(2)\n",
"\n",
"comparison\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"comparison[comparison.loc[:,(\"co2_mass_lb_for_electricity\",\"pct_diff\")] > 0.001]#.groupby(\"ba_code\").sum().sum()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Compare BA data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load archived data\n",
"data_type = \"power_sector_data\"\n",
"resolution = \"annual\"\n",
"\n",
"# unzip archived data\n",
"if not os.path.exists(data_folder(\"diff\")):\n",
" os.mkdir(data_folder(\"diff\"))\n",
"with zipfile.ZipFile(data_folder(f\"s3_upload/{year}_{data_type}_{resolution}_us_units.zip\"), \"r\") as zip_to_unzip:\n",
" zip_to_unzip.extractall(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\"))\n",
"\n",
"# load archived data\n",
"prev_data = []\n",
"for ba in os.listdir(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units\")):\n",
" df = pd.read_csv(data_folder(f\"diff/{year}_{data_type}_{resolution}_us_units/{ba}\"), dtype=get_dtypes())\n",
" df[\"ba_code\"] = ba.split(\".\")[0]\n",
" prev_data.append(df)\n",
"\n",
"prev_data = pd.concat(prev_data, axis=0).reset_index(drop=True)\n",
"\n",
"# load data\n",
"new_data = []\n",
"for ba in os.listdir(results_folder(f\"{year}/{data_type}/{resolution}/us_units\")):\n",
" df = pd.read_csv(results_folder(f\"{year}/{data_type}/{resolution}/us_units/{ba}\"), dtype=get_dtypes())\n",
" df[\"ba_code\"] = ba.split(\".\")[0]\n",
" new_data.append(df)\n",
"\n",
"new_data = pd.concat(new_data, axis=0).reset_index(drop=True)\n",
"\n",
"key_cols = [\"ba_code\", \"fuel_category\"]\n",
"comparison = prev_data.set_index(key_cols).compare(new_data.set_index(key_cols), result_names=(\"previous\",\"new\"))\n",
"comparison\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Compare intermediate outputs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# load archived data\n",
"file = \"cems_cleaned\"\n",
"key_cols = [\"plant_id_eia\",\"emissions_unit_id_epa\",\"datetime_utc\"]\n",
"\n",
"# unzip archived data\n",
"if not os.path.exists(data_folder(f\"diff/outputs_{year}\")):\n",
" os.mkdir(data_folder(f\"diff/outputs_{year}\"))\n",
" with zipfile.ZipFile(data_folder(f\"zenodo/outputs_{year}.zip\"), \"r\") as zip_to_unzip:\n",
" zip_to_unzip.extractall(data_folder(f\"diff/outputs_{year}\"))\n",
"\n",
"# load archived data\n",
"prev_data = pd.read_csv(data_folder(f\"diff/outputs_{year}/{file}_{year}.csv\"), dtype=get_dtypes())\n",
"\n",
"# load new data\n",
"new_data = pd.read_csv(outputs_folder(f\"{year}/{file}_{year}.csv\"), dtype=get_dtypes())\n",
"\n",
"comparison = prev_data.set_index(key_cols).compare(new_data.set_index(key_cols), result_names=(\"previous\",\"new\"))\n",
"comparison\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "open_grid_emissions",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "25e36f192ecdbe5da57d9bea009812e7b11ef0e0053366a845a2802aae1b29d2"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit bb0c032

Please sign in to comment.