chore: add facility classification to validation

agrc · Aug 19, 2024 · b27f99f · b27f99f
1 parent d0c1e43
commit b27f99f
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 5 deletions.
diff --git a/src/wmrc/main.py b/src/wmrc/main.py
@@ -453,9 +453,13 @@ def run_validation():
     slice_b = all_changes.columns.slice_indexer("msw_recycling_rate_pct_change", "msw_recycling_rate_diff")
     index_c = all_changes.columns.get_loc("msw_recycling_rate_diff") + 1
     new_index = all_changes.columns[slice_b].append([all_changes.columns[:index_a], all_changes.columns[index_c:]])
+    reordered = all_changes.reindex(columns=new_index)
+    classifications = [val[-1] if isinstance(val, tuple) else None for val in reordered.index]
+    reordered.insert(0, "classification", classifications)
+    reordered.index = [val[:-1] if isinstance(val, tuple) else val for val in reordered.index]
 
     wmrc_skid.skid_logger.debug("Writing report to csv...")
-    all_changes.reindex(columns=new_index).to_csv(report_path)
+    reordered.to_csv(report_path)
 
     end = datetime.now()
 
@@ -510,5 +514,7 @@ def subscribe(cloud_event: CloudEvent) -> None:
 
 #: Putting this here means you can call the file via `python main.py` and it will run. Useful for pre-GCF testing.
 if __name__ == "__main__":
-    wmrc_skid = Skid()
-    wmrc_skid.process()
+    # wmrc_skid = Skid()
+    # wmrc_skid.process()
+
+    run_validation()
diff --git a/src/wmrc/validate.py b/src/wmrc/validate.py
@@ -71,6 +71,7 @@ def facility_year_over_year(
         "facility_id": "id",
         "Calendar_Year__c": "data_year",
         "Municipal_Solid_Waste__c": "percent_msw",
+        "Classifications__c": "classification",
     }
     column_renaming.update({col: col.rstrip("__c") for col in all_facility_records.columns if "_County__c" in col})
     all_facility_records_renamed = all_facility_records.rename(columns=column_renaming)
@@ -86,7 +87,7 @@ def facility_year_over_year(
             right_index=True,
         )
         .reset_index()
-        .set_index(["data_year", "id", "name"])
+        .set_index(["data_year", "id", "name", "classification"])
     )
 
     return _year_over_year_changes(facility_summary_by_year, current_year)

diff --git a/tests/test_validate.py b/tests/test_validate.py
@@ -3,7 +3,6 @@
 import numpy as np
 import pandas as pd
 import pytest
-
 from wmrc import validate
 
 
@@ -87,6 +86,7 @@ def test_facility_year_over_year_happy_path(self, input_df, expected_output):
             {
                 "facility_id": ["SW01", "SW03", "SW01", "SW03"],
                 "Calendar_Year__c": [2022, 2022, 2023, 2023],
+                "Classifications__c": ["Class 1", "Recycling", "Class 1", "Recycling"],
                 "Municipal_Solid_Waste__c": [10, 50, 100, 100],
                 "Cache_County__c": [80, 50, 40, 100],
             }
@@ -109,6 +109,11 @@ def test_facility_year_over_year_happy_path(self, input_df, expected_output):
         )
         expected_output = pd.concat([expected_output, new_output_columns], axis=1)
 
+        #: Add the classification level to the index
+        expected_output.index = pd.MultiIndex.from_tuples(
+            [("SW01", "foo", "Class 1"), ("SW03", "baz", "Recycling")], names=["id", "name", "classification"]
+        )
+
         pd.testing.assert_frame_equal(expected_output, output)
 
     def test_facility_year_over_year_switches_record_year_to_int(self, input_df, expected_output):
@@ -124,6 +129,7 @@ def test_facility_year_over_year_switches_record_year_to_int(self, input_df, exp
             {
                 "facility_id": ["SW01", "SW03", "SW01", "SW03"],
                 "Calendar_Year__c": ["2022", "2022", "2023", "2023"],
+                "Classifications__c": ["Class 1", "Recycling", "Class 1", "Recycling"],
                 "Municipal_Solid_Waste__c": [10, 50, 100, 100],
                 "Cache_County__c": [80, 50, 40, 100],
             }
@@ -145,6 +151,10 @@ def test_facility_year_over_year_switches_record_year_to_int(self, input_df, exp
             index=pd.MultiIndex.from_tuples([("SW01", "foo"), ("SW03", "baz")], names=["id", "name"]),
         )
         expected_output = pd.concat([expected_output, new_output_columns], axis=1)
+        #: Add the classification level to the index
+        expected_output.index = pd.MultiIndex.from_tuples(
+            [("SW01", "foo", "Class 1"), ("SW03", "baz", "Recycling")], names=["id", "name", "classification"]
+        )
 
         pd.testing.assert_frame_equal(expected_output, output)