From d4b3954e17aebad14f0e76774e1b6945a1893146 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Mon, 4 Nov 2024 17:15:30 -0700 Subject: [PATCH] chore: replace infs with nans --- src/wmrc/summarize.py | 2 ++ tests/test_summarize.py | 25 +++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/src/wmrc/summarize.py b/src/wmrc/summarize.py index ff85ded..34e3193 100644 --- a/src/wmrc/summarize.py +++ b/src/wmrc/summarize.py @@ -2,6 +2,7 @@ Calender_Year__c to create dataframes of the reports that will be used to update the AGOL feature services. """ +import numpy as np import pandas as pd try: @@ -233,6 +234,7 @@ def recovery_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.Series: ) ) + clean_rates.replace([np.inf, -np.inf], np.nan, inplace=True) #: Can arise from division by np.nan clean_rates.name = "annual_recycling_uncontaminated_rate" clean_rates.index.name = "data_year" clean_rates.index = clean_rates.index.map(helpers.convert_to_int) diff --git a/tests/test_summarize.py b/tests/test_summarize.py index 47598d7..9977915 100644 --- a/tests/test_summarize.py +++ b/tests/test_summarize.py @@ -101,6 +101,31 @@ def test_recovery_rates_by_tonnage_happy_path(self, mocker): pd.testing.assert_series_equal(output_series, test_df) + def test_recovery_rates_by_tonnage_replaces_inf_with_zero(self, mocker): + records = mocker.Mock() + records.df = pd.DataFrame( + { + "Calendar_Year__c": [2022, 2022, 2023, 2023], + "Out_of_State__c": [0, 0, 0, 0], + "Municipal_Solid_Waste__c": [100, 100, 100, 100], + "Annual_Recycling_Contamination_Rate__c": [np.nan, np.nan, 50, 50], + "Combined_Total_of_Material_Recycled__c": [50, 100, 50, 40], + } + ) + + output_series = summarize.recovery_rates_by_tonnage(records) + + test_df = pd.Series( + { + 2022: np.nan, + 2023: 50.0, + }, + name="annual_recycling_uncontaminated_rate", + ) + test_df.index.name = "data_year" + + pd.testing.assert_series_equal(output_series, test_df) + def test_recovery_rates_by_tonnage_uses_out_of_state_modifier(self, mocker): records = mocker.Mock() records.df = pd.DataFrame(