Skip to content

Commit

Permalink
chore: add facility classification to validation
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobdadams committed Aug 19, 2024
1 parent d0c1e43 commit b27f99f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 5 deletions.
12 changes: 9 additions & 3 deletions src/wmrc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,9 +453,13 @@ def run_validation():
slice_b = all_changes.columns.slice_indexer("msw_recycling_rate_pct_change", "msw_recycling_rate_diff")
index_c = all_changes.columns.get_loc("msw_recycling_rate_diff") + 1
new_index = all_changes.columns[slice_b].append([all_changes.columns[:index_a], all_changes.columns[index_c:]])
reordered = all_changes.reindex(columns=new_index)
classifications = [val[-1] if isinstance(val, tuple) else None for val in reordered.index]
reordered.insert(0, "classification", classifications)
reordered.index = [val[:-1] if isinstance(val, tuple) else val for val in reordered.index]

wmrc_skid.skid_logger.debug("Writing report to csv...")
all_changes.reindex(columns=new_index).to_csv(report_path)
reordered.to_csv(report_path)

end = datetime.now()

Expand Down Expand Up @@ -510,5 +514,7 @@ def subscribe(cloud_event: CloudEvent) -> None:

#: Putting this here means you can call the file via `python main.py` and it will run. Useful for pre-GCF testing.
if __name__ == "__main__":
wmrc_skid = Skid()
wmrc_skid.process()
# wmrc_skid = Skid()
# wmrc_skid.process()

run_validation()
3 changes: 2 additions & 1 deletion src/wmrc/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def facility_year_over_year(
"facility_id": "id",
"Calendar_Year__c": "data_year",
"Municipal_Solid_Waste__c": "percent_msw",
"Classifications__c": "classification",
}
column_renaming.update({col: col.rstrip("__c") for col in all_facility_records.columns if "_County__c" in col})
all_facility_records_renamed = all_facility_records.rename(columns=column_renaming)
Expand All @@ -86,7 +87,7 @@ def facility_year_over_year(
right_index=True,
)
.reset_index()
.set_index(["data_year", "id", "name"])
.set_index(["data_year", "id", "name", "classification"])
)

return _year_over_year_changes(facility_summary_by_year, current_year)
Expand Down
12 changes: 11 additions & 1 deletion tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import numpy as np
import pandas as pd
import pytest

from wmrc import validate


Expand Down Expand Up @@ -87,6 +86,7 @@ def test_facility_year_over_year_happy_path(self, input_df, expected_output):
{
"facility_id": ["SW01", "SW03", "SW01", "SW03"],
"Calendar_Year__c": [2022, 2022, 2023, 2023],
"Classifications__c": ["Class 1", "Recycling", "Class 1", "Recycling"],
"Municipal_Solid_Waste__c": [10, 50, 100, 100],
"Cache_County__c": [80, 50, 40, 100],
}
Expand All @@ -109,6 +109,11 @@ def test_facility_year_over_year_happy_path(self, input_df, expected_output):
)
expected_output = pd.concat([expected_output, new_output_columns], axis=1)

#: Add the classification level to the index
expected_output.index = pd.MultiIndex.from_tuples(
[("SW01", "foo", "Class 1"), ("SW03", "baz", "Recycling")], names=["id", "name", "classification"]
)

pd.testing.assert_frame_equal(expected_output, output)

def test_facility_year_over_year_switches_record_year_to_int(self, input_df, expected_output):
Expand All @@ -124,6 +129,7 @@ def test_facility_year_over_year_switches_record_year_to_int(self, input_df, exp
{
"facility_id": ["SW01", "SW03", "SW01", "SW03"],
"Calendar_Year__c": ["2022", "2022", "2023", "2023"],
"Classifications__c": ["Class 1", "Recycling", "Class 1", "Recycling"],
"Municipal_Solid_Waste__c": [10, 50, 100, 100],
"Cache_County__c": [80, 50, 40, 100],
}
Expand All @@ -145,6 +151,10 @@ def test_facility_year_over_year_switches_record_year_to_int(self, input_df, exp
index=pd.MultiIndex.from_tuples([("SW01", "foo"), ("SW03", "baz")], names=["id", "name"]),
)
expected_output = pd.concat([expected_output, new_output_columns], axis=1)
#: Add the classification level to the index
expected_output.index = pd.MultiIndex.from_tuples(
[("SW01", "foo", "Class 1"), ("SW03", "baz", "Recycling")], names=["id", "name", "classification"]
)

pd.testing.assert_frame_equal(expected_output, output)

Expand Down

0 comments on commit b27f99f

Please sign in to comment.