diff --git a/src/wmrc/summarize.py b/src/wmrc/summarize.py index 0771d24..34e3193 100644 --- a/src/wmrc/summarize.py +++ b/src/wmrc/summarize.py @@ -2,6 +2,7 @@ Calender_Year__c to create dataframes of the reports that will be used to update the AGOL feature services. """ +import numpy as np import pandas as pd try: @@ -198,8 +199,9 @@ def recovery_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.Series: """Calculates a yearly recovery rate based on the Salesforce records. Recovery rate is opposite of contaminated rate (5% contamination = 95% uncontaminated). Rate is - calculated by using the contamination rate to determine contaminated tonnage and comparing that to the total - tonnage handled by facilities reporting a contamination rate. + calculated by calculating the total in-state MSW recycled per facility and the total received, which comes from + dividing that amount by the recovery rate per facility, and then dividing the sums of those two values across all + facilities. Args: records (helpers.SalesForceRecords): Helper object containing the Salesforce records @@ -208,34 +210,31 @@ def recovery_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.Series: pd.Series: recovery rates per year with index name data_year and series name "annual_recycling_uncontaminated_rate" """ - #: First, create a modifier to account for material from out-of-state + #: Create our various modifiers records.df["in_state_modifier"] = (100 - records.df["Out_of_State__c"]) / 100 + records.df["msw_modifier"] = records.df["Municipal_Solid_Waste__c"] / 100 + records.df["recovery_rate"] = (100 - records.df["Annual_Recycling_Contamination_Rate__c"]) / 100 - #: Calculate contaminated tonnage - records.df["recycling_tons_contaminated"] = ( - records.df["Annual_Recycling_Contamination_Rate__c"] - / 100 - * records.df["Combined_Total_of_Material_Recycled__c"] + #: Amount of material recycled + records.df["in_state_msw_recycled"] = ( + records.df["Combined_Total_of_Material_Recycled__c"] * records.df["in_state_modifier"] + * records.df["msw_modifier"] ) - #: Calculate total tonnage from facilities reporting a contamination rate - records.df["recycling_tons_report_contamination_total"] = pd.NA - records.df.loc[~records.df["recycling_tons_contaminated"].isnull(), "recycling_tons_report_contamination_total"] = ( - records.df["Combined_Total_of_Material_Recycled__c"] * records.df["in_state_modifier"] + #: Amount of material received derived from recovery rate + records.df["in_state_msw_received_for_recycling"] = ( + records.df["in_state_msw_recycled"] / records.df["recovery_rate"] ) - #: Invert to get uncontaminated rate + #: Uncontaminated rates by year clean_rates = records.df.groupby("Calendar_Year__c").apply( lambda year_df: ( - 1 - - ( - year_df["recycling_tons_contaminated"].sum() - / year_df["recycling_tons_report_contamination_total"].sum() - ) + year_df["in_state_msw_recycled"].sum() / year_df["in_state_msw_received_for_recycling"].sum() * 100 ) - * 100 ) + + clean_rates.replace([np.inf, -np.inf], np.nan, inplace=True) #: Can arise from division by np.nan clean_rates.name = "annual_recycling_uncontaminated_rate" clean_rates.index.name = "data_year" clean_rates.index = clean_rates.index.map(helpers.convert_to_int) diff --git a/src/wmrc/yearly.py b/src/wmrc/yearly.py index 109a9bc..6a68e25 100644 --- a/src/wmrc/yearly.py +++ b/src/wmrc/yearly.py @@ -177,6 +177,7 @@ def rates_per_material(year_df: pd.DataFrame, classification: str, fields: list[ .str.replace("_", " ") .str.replace(" CM", " Compostable Material") .str.replace("SW Stream", "Other Solid Waste Stream Materials") + .str.replace("Paper", "Paper and Paperboard") ) return sum_df diff --git a/tests/test_summarize.py b/tests/test_summarize.py index 0014a1e..969e68f 100644 --- a/tests/test_summarize.py +++ b/tests/test_summarize.py @@ -82,8 +82,9 @@ def test_recovery_rates_by_tonnage_happy_path(self, mocker): { "Calendar_Year__c": [2022, 2022, 2023, 2023], "Out_of_State__c": [0, 0, 0, 0], - "Annual_Recycling_Contamination_Rate__c": [10, 0, 10, 20], - "Combined_Total_of_Material_Recycled__c": [100, 100, 100, 100], + "Municipal_Solid_Waste__c": [100, 100, 100, 100], + "Annual_Recycling_Contamination_Rate__c": [50, 0, 50, 50], + "Combined_Total_of_Material_Recycled__c": [50, 100, 50, 40], } ) @@ -91,8 +92,33 @@ def test_recovery_rates_by_tonnage_happy_path(self, mocker): test_df = pd.Series( { - 2022: 95.0, - 2023: 85.0, + 2022: 75.0, + 2023: 50.0, + }, + name="annual_recycling_uncontaminated_rate", + ) + test_df.index.name = "data_year" + + pd.testing.assert_series_equal(output_series, test_df) + + def test_recovery_rates_by_tonnage_replaces_inf_with_zero(self, mocker): + records = mocker.Mock() + records.df = pd.DataFrame( + { + "Calendar_Year__c": [2022, 2022, 2023, 2023], + "Out_of_State__c": [0, 0, 0, 0], + "Municipal_Solid_Waste__c": [100, 100, 100, 100], + "Annual_Recycling_Contamination_Rate__c": [np.nan, np.nan, 50, 50], + "Combined_Total_of_Material_Recycled__c": [50, 100, 50, 40], + } + ) + + output_series = summarize.recovery_rates_by_tonnage(records) + + test_df = pd.Series( + { + 2022: np.nan, + 2023: 50.0, }, name="annual_recycling_uncontaminated_rate", ) @@ -104,11 +130,11 @@ def test_recovery_rates_by_tonnage_uses_out_of_state_modifier(self, mocker): records = mocker.Mock() records.df = pd.DataFrame( { - "facility_name": ["foo", "bar", "foo", "bar"], "Calendar_Year__c": [2022, 2022, 2023, 2023], "Out_of_State__c": [0, 100, 0, 100], - "Annual_Recycling_Contamination_Rate__c": [10, 0, 10, 20], - "Combined_Total_of_Material_Recycled__c": [100, 100, 100, 100], + "Municipal_Solid_Waste__c": [100, 100, 100, 100], + "Annual_Recycling_Contamination_Rate__c": [50, 0, 50, 50], + "Combined_Total_of_Material_Recycled__c": [50, 100, 50, 40], } ) @@ -116,8 +142,8 @@ def test_recovery_rates_by_tonnage_uses_out_of_state_modifier(self, mocker): test_df = pd.Series( { - 2022: 90.0, - 2023: 90.0, + 2022: 50.0, + 2023: 50.0, }, name="annual_recycling_uncontaminated_rate", )