From 7484911c6478766c08d8ca9e00fe1d57e2393134 Mon Sep 17 00:00:00 2001 From: Jake Adams Date: Thu, 30 May 2024 11:34:08 -0600 Subject: [PATCH] chore: refactor for organization --- src/wmrc/helpers.py | 374 ++++++++++++++++++++++-------------------- src/wmrc/main.py | 74 ++++----- tests/test_helpers.py | 19 ++- tests/test_wmrc.py | 24 +-- 4 files changed, 252 insertions(+), 239 deletions(-) diff --git a/src/wmrc/helpers.py b/src/wmrc/helpers.py index d46ec8e..df09db9 100644 --- a/src/wmrc/helpers.py +++ b/src/wmrc/helpers.py @@ -13,6 +13,26 @@ def convert_to_int(s): return -1 +def add_bogus_geometries(input_dataframe: pd.DataFrame) -> pd.DataFrame: + """Add a bogus geometry (point in downtown Malad City, ID) to a dataframe in WKID 4326. + + Args: + input_dataframe (pd.DataFrame): Non-spatial dataframe to add geometry to + + Returns: + pd.DataFrame: Spatially-enabled dataframe version of input input_dataframe with geometry added to every row + """ + + input_dataframe["x"] = 12_495_000 + input_dataframe["y"] = 5_188_000 + + spatial_dataframe = pd.DataFrame.spatial.from_xy(input_dataframe, "x", "y", sr=4326) + + spatial_dataframe.drop(columns=["x", "y"], inplace=True) + + return spatial_dataframe + + class SalesForceRecords: def __init__(self, salesforce_extractor: palletjack.extract.SalesforceRestLoader): @@ -145,205 +165,211 @@ def _build_field_mapping(self): raise ValueError(f"Missing fields: {missing_fields}") -def county_summaries(year_df: pd.DataFrame, county_fields: list[str]) -> pd.DataFrame: - """Calculate the county-wide summaries for Municipal Solid Waste (MSW) over time. +class YearlyAnalysis: + """These methods calculate metrics for a given year, usually applied to a groupby(year) object""" - Designed to be run on a yearly groupby object. Calculates the totals based on the following formulas: - - recycling tons: county % * MSW/100 * Combined Total of Material Recycled - - composted tons: county % * MSW/100 * Total Materials sent to composting - - digested tons: county % * MSW/100 * Total Material managed by AD/C - - landfilled tons: county % * Municipal Waste In-State (in Tons) - - recycling rate: (recycling + composted + digested) / (recycling + composted + digested + landfilled) * 100 + @staticmethod + def county_summaries(year_df: pd.DataFrame, county_fields: list[str]) -> pd.DataFrame: + """Calculate the county-wide summaries for Municipal Solid Waste (MSW) over time. - County % is the amount of a given record's totals that apply to the given county. MSW/100 is a modifier to - isolate the materials reported by the facility that are MSW instead of construction debris, etc. + Designed to be run on a yearly groupby object. Calculates the totals based on the following formulas: + - recycling tons: county % * MSW/100 * Combined Total of Material Recycled + - composted tons: county % * MSW/100 * Total Materials sent to composting + - digested tons: county % * MSW/100 * Total Material managed by AD/C + - landfilled tons: county % * Municipal Waste In-State (in Tons) + - recycling rate: (recycling + composted + digested) / (recycling + composted + digested + landfilled) * 100 - Args: - year_df (pd.DataFrame): A dataframe of facility records for a single year (can be .applied to a groupby - (year) object). Columns include percentages for each county and the fields needed for the calculations - county_fields (List[str]): List county field names + County % is the amount of a given record's totals that apply to the given county. MSW/100 is a modifier to + isolate the materials reported by the facility that are MSW instead of construction debris, etc. - Returns: - pd.DataFrame: A dataframe of tons recycled, composted, digested, and landfilled for each county along with - overall recycling rate - """ + Args: + year_df (pd.DataFrame): A dataframe of facility records for a single year (can be .applied to a groupby + (year) object). Columns include percentages for each county and the fields needed for the calculations + county_fields (List[str]): List county field names - #: Create new dataframes that have a column for each county, one dataframe per category - recycling_df = pd.DataFrame() - composted_df = pd.DataFrame() - digested_df = pd.DataFrame() - landfilled_df = pd.DataFrame() + Returns: + pd.DataFrame: A dataframe of tons recycled, composted, digested, and landfilled for each county along with + overall recycling rate + """ - #: MSW modifier is the percentage of the facility's materials that are MSW instead of construction debris, etc. - year_df["msw_modifier"] = year_df["Municipal_Solid_Waste__c"] / 100 + #: Create new dataframes that have a column for each county, one dataframe per category + recycling_df = pd.DataFrame() + composted_df = pd.DataFrame() + digested_df = pd.DataFrame() + landfilled_df = pd.DataFrame() - #: Calculate the tons per county for each category - for county in county_fields: - recycling_df[county] = ( - year_df[county] / 100 * year_df["msw_modifier"] * year_df["Combined_Total_of_Material_Recycled__c"] - ) - composted_df[county] = ( - year_df[county] / 100 * year_df["msw_modifier"] * year_df["Total_Materials_sent_to_composting__c"] - ) - digested_df[county] = ( - year_df[county] / 100 * year_df["msw_modifier"] * year_df["Total_Material_managed_by_ADC__c"] - ) - landfilled_df[county] = year_df[county] / 100 * year_df["Municipal_Waste_In_State_in_Tons__c"] - - #: Now sum all the counties to get a single value per county per category - counties_df = pd.DataFrame() - counties_df["county_wide_msw_recycled"] = recycling_df.sum() - counties_df["county_wide_msw_composted"] = composted_df.sum() - counties_df["county_wide_msw_digested"] = digested_df.sum() - counties_df["county_wide_msw_landfilled"] = landfilled_df.sum() - counties_df["county_wide_msw_recycling_rate"] = ( - ( - counties_df["county_wide_msw_recycled"] - + counties_df["county_wide_msw_composted"] - + counties_df["county_wide_msw_digested"] - ) - / ( - counties_df["county_wide_msw_recycled"] - + counties_df["county_wide_msw_composted"] - + counties_df["county_wide_msw_digested"] - + counties_df["county_wide_msw_landfilled"] - ) - * 100 - ) + #: MSW modifier is the percentage of the facility's materials that are MSW instead of construction debris, etc. + year_df["msw_modifier"] = year_df["Municipal_Solid_Waste__c"] / 100 - return counties_df + #: Calculate the tons per county for each category + for county in county_fields: + recycling_df[county] = ( + year_df[county] / 100 * year_df["msw_modifier"] * year_df["Combined_Total_of_Material_Recycled__c"] + ) + composted_df[county] = ( + year_df[county] / 100 * year_df["msw_modifier"] * year_df["Total_Materials_sent_to_composting__c"] + ) + digested_df[county] = ( + year_df[county] / 100 * year_df["msw_modifier"] * year_df["Total_Material_managed_by_ADC__c"] + ) + landfilled_df[county] = year_df[county] / 100 * year_df["Municipal_Waste_In_State_in_Tons__c"] + + #: Now sum all the counties to get a single value per county per category + counties_df = pd.DataFrame() + counties_df["county_wide_msw_recycled"] = recycling_df.sum() + counties_df["county_wide_msw_composted"] = composted_df.sum() + counties_df["county_wide_msw_digested"] = digested_df.sum() + counties_df["county_wide_msw_landfilled"] = landfilled_df.sum() + counties_df["county_wide_msw_recycling_rate"] = ( + ( + counties_df["county_wide_msw_recycled"] + + counties_df["county_wide_msw_composted"] + + counties_df["county_wide_msw_digested"] + ) + / ( + counties_df["county_wide_msw_recycled"] + + counties_df["county_wide_msw_composted"] + + counties_df["county_wide_msw_digested"] + + counties_df["county_wide_msw_landfilled"] + ) + * 100 + ) + return counties_df -def facility_tons_diverted_from_landfills(year_df: pd.DataFrame) -> pd.DataFrame: - """Calculate the total tonnage of material diverted from landfills for each facility. + @staticmethod + def facility_tons_diverted_from_landfills(year_df: pd.DataFrame) -> pd.DataFrame: + """Calculate the total tonnage of material diverted from landfills for each facility. - Tons diverted = Combined Total of Material Recycled + Total Materials recycled + Total Materials sent to - composting + Combined Total Material for Composting +Total Material managed by AD/C + Combined Total Material - for Combustion + Total Materials combusted + Total waste tires recycled (in Tons) + Total WT for combustion (in - Tons) + Tons diverted = Combined Total of Material Recycled + Total Materials recycled + Total Materials sent to + composting + Combined Total Material for Composting +Total Material managed by AD/C + Combined Total Material + for Combustion + Total Materials combusted + Total waste tires recycled (in Tons) + Total WT for combustion (in + Tons) - Args: - year_df (pd.DataFrame): Dataframe of facility records for a single year (can be .applied to a groupby - year)). + Args: + year_df (pd.DataFrame): Dataframe of facility records for a single year (can be .applied to a groupby + year)). - Returns: - pd.DataFrame: Facility name, id, and total tons diverted from landfills - """ + Returns: + pd.DataFrame: Facility name, id, and total tons diverted from landfills + """ - fields = [ - "Facility_Name__c", - "facility_id", - "Combined_Total_of_Material_Recycled__c", - "Total_Materials_recycled__c", - "Total_Materials_sent_to_composting__c", - "Combined_Total_Material_for_Compostion__c", - "Total_Material_managed_by_ADC__c", - "Combined_Total_Material_for_Combustion__c", - "Total_Materials_combusted__c", - "Total_waste_tires_recycled_in_Tons__c", - "Total_WT_for_combustion_in_Tons__c", - ] - subset_df = year_df[fields].copy() - - #: Sum any duplicate records for a single facility - sum_df = subset_df.groupby(["Facility_Name__c", "facility_id"]).sum().reset_index() - - sum_df["tons_of_material_diverted_from_"] = ( - sum_df["Combined_Total_of_Material_Recycled__c"] - + sum_df["Total_Materials_recycled__c"] - + sum_df["Total_Materials_sent_to_composting__c"] - + sum_df["Combined_Total_Material_for_Compostion__c"] - + sum_df["Total_Material_managed_by_ADC__c"] - + sum_df["Combined_Total_Material_for_Combustion__c"] - + sum_df["Total_Materials_combusted__c"] - + sum_df["Total_waste_tires_recycled_in_Tons__c"] - + sum_df["Total_WT_for_combustion_in_Tons__c"] - ) - - #: Extract just the number part of the facility id, strip leading zeros - sum_df["id_"] = sum_df["facility_id"].astype(str).str[3:].str.lstrip("0") - - #: Replace 0s with None for AGOL/Arcade logic - sum_df["tons_of_material_diverted_from_"] = sum_df["tons_of_material_diverted_from_"].replace(0, None) - - return sum_df[["Facility_Name__c", "id_", "tons_of_material_diverted_from_"]] - - -def rates_per_material(year_df: pd.DataFrame, classification: str, fields: list[str], total_field: str) -> pd.DataFrame: - """Calculate recycling/composting rates for each material type for a given year. + fields = [ + "Facility_Name__c", + "facility_id", + "Combined_Total_of_Material_Recycled__c", + "Total_Materials_recycled__c", + "Total_Materials_sent_to_composting__c", + "Combined_Total_Material_for_Compostion__c", + "Total_Material_managed_by_ADC__c", + "Combined_Total_Material_for_Combustion__c", + "Total_Materials_combusted__c", + "Total_waste_tires_recycled_in_Tons__c", + "Total_WT_for_combustion_in_Tons__c", + ] + subset_df = year_df[fields].copy() + + #: Sum any duplicate records for a single facility + sum_df = subset_df.groupby(["Facility_Name__c", "facility_id"]).sum().reset_index() + + sum_df["tons_of_material_diverted_from_"] = ( + sum_df["Combined_Total_of_Material_Recycled__c"] + + sum_df["Total_Materials_recycled__c"] + + sum_df["Total_Materials_sent_to_composting__c"] + + sum_df["Combined_Total_Material_for_Compostion__c"] + + sum_df["Total_Material_managed_by_ADC__c"] + + sum_df["Combined_Total_Material_for_Combustion__c"] + + sum_df["Total_Materials_combusted__c"] + + sum_df["Total_waste_tires_recycled_in_Tons__c"] + + sum_df["Total_WT_for_combustion_in_Tons__c"] + ) - Args: - year_df (pd.DataFrame): Dataframe of facility records for a single year (can be .applied to a groupby(year) object). - classification (str): Report Classification, either "Recycling" or "Composts" - fields (list[str]): List of the fields containing the material totals. - total_field (str): The field containing the total material received for the percentage calculation. + #: Extract just the number part of the facility id, strip leading zeros + sum_df["id_"] = sum_df["facility_id"].astype(str).str[3:].str.lstrip("0") - Returns: - pd.DataFrame: Renamed material types, total tonnage processed, and percent processed - """ + #: Replace 0s with None for AGOL/Arcade logic + sum_df["tons_of_material_diverted_from_"] = sum_df["tons_of_material_diverted_from_"].replace(0, None) - #: Make sure the MSW percentage field is last - try: - fields.remove("Municipal_Solid_Waste__c") - except ValueError: - pass - fields.append("Municipal_Solid_Waste__c") + return sum_df[["Facility_Name__c", "id_", "tons_of_material_diverted_from_"]] - subset_df = year_df[year_df["Classifications__c"] == classification][fields] + @staticmethod + def rates_per_material( + year_df: pd.DataFrame, classification: str, fields: list[str], total_field: str + ) -> pd.DataFrame: + """Calculate recycling/composting rates for each material type for a given year. - #: Sum totals across all records taking into account MSW modifier, calculate total percentage - sum_series = pd.Series() - for col in fields[:-1]: #: We don't want to total Municipal Solid Waste, we just need for the computation - sum_series[col] = (subset_df["Municipal_Solid_Waste__c"] / 100 * subset_df[col]).sum() - sum_df = pd.DataFrame(sum_series, columns=["amount"]) - sum_df["percent"] = sum_df["amount"] / sum_df.loc[total_field, "amount"] + Args: + year_df (pd.DataFrame): Dataframe of facility records for a single year (can be .applied to a groupby(year) object). + classification (str): Report Classification, either "Recycling" or "Composts" + fields (list[str]): List of the fields containing the material totals. + total_field (str): The field containing the total material received for the percentage calculation. - #: Rename columns for existing AGOL layer - regex = re.compile(r"(?<=Total_)(.+)(?=_Materials_recei)|(?<=Total_)(.+)(?=_recei)") - sum_df.reset_index(names="material", inplace=True) - sum_df["material"] = ( - sum_df["material"] - .apply(lambda x: re.search(regex, x)[0] if re.search(regex, x) else x) - .str.replace("__c", "") - .str.replace("_", " ") - .str.replace(" CM", " Compostable Material") - ) + Returns: + pd.DataFrame: Renamed material types, total tonnage processed, and percent processed + """ - return sum_df + #: Make sure the MSW percentage field is last + try: + fields.remove("Municipal_Solid_Waste__c") + except ValueError: + pass + fields.append("Municipal_Solid_Waste__c") + + subset_df = year_df[year_df["Classifications__c"] == classification][fields] + + #: Sum totals across all records taking into account MSW modifier, calculate total percentage + sum_series = pd.Series() + for col in fields[:-1]: #: We don't want to total Municipal Solid Waste, we just need for the computation + sum_series[col] = (subset_df["Municipal_Solid_Waste__c"] / 100 * subset_df[col]).sum() + sum_df = pd.DataFrame(sum_series, columns=["amount"]) + sum_df["percent"] = sum_df["amount"] / sum_df.loc[total_field, "amount"] + + #: Rename columns for existing AGOL layer + regex = re.compile(r"(?<=Total_)(.+)(?=_Materials_recei)|(?<=Total_)(.+)(?=_recei)") + sum_df.reset_index(names="material", inplace=True) + sum_df["material"] = ( + sum_df["material"] + .apply(lambda x: re.search(regex, x)[0] if re.search(regex, x) else x) + .str.replace("__c", "") + .str.replace("_", " ") + .str.replace(" CM", " Compostable Material") + ) + return sum_df -def statewide_yearly_metrics(county_year_df: pd.DataFrame) -> pd.DataFrame: - """Calculate statewide yearly metrics for recycling, composting, digestion, and landfilling (RCDL), filtering out - out of state totals. + @staticmethod + def statewide_yearly_metrics(county_year_df: pd.DataFrame) -> pd.DataFrame: + """Calculate statewide yearly metrics for recycling, composting, digestion, and landfilling (RCDL), filtering out + out of state totals. - Args: - county_year_df (pd.DataFrame): Dataframe of county summaries for a given year with the RCDL metrics (can be - applied to a groupby (year) object). + Args: + county_year_df (pd.DataFrame): Dataframe of county summaries for a given year with the RCDL metrics (can be + applied to a groupby (year) object). - Returns: - pd.DataFrame: Statewide yearly metrics. - """ + Returns: + pd.DataFrame: Statewide yearly metrics. + """ - in_state_only = county_year_df.drop(index="Out of State", errors="ignore") - - statewide_series = pd.Series() - statewide_series["statewide_msw_recycled"] = in_state_only["county_wide_msw_recycled"].sum() - statewide_series["statewide_msw_composted"] = in_state_only["county_wide_msw_composted"].sum() - statewide_series["statewide_msw_digested"] = in_state_only["county_wide_msw_digested"].sum() - statewide_series["statewide_msw_landfilled"] = in_state_only["county_wide_msw_landfilled"].sum() - statewide_series["statewide_msw_recycling_rate"] = ( - ( - statewide_series["statewide_msw_recycled"] - + statewide_series["statewide_msw_composted"] - + statewide_series["statewide_msw_digested"] - ) - / ( - statewide_series["statewide_msw_recycled"] - + statewide_series["statewide_msw_composted"] - + statewide_series["statewide_msw_digested"] - + statewide_series["statewide_msw_landfilled"] + in_state_only = county_year_df.drop(index="Out of State", errors="ignore") + + statewide_series = pd.Series() + statewide_series["statewide_msw_recycled"] = in_state_only["county_wide_msw_recycled"].sum() + statewide_series["statewide_msw_composted"] = in_state_only["county_wide_msw_composted"].sum() + statewide_series["statewide_msw_digested"] = in_state_only["county_wide_msw_digested"].sum() + statewide_series["statewide_msw_landfilled"] = in_state_only["county_wide_msw_landfilled"].sum() + statewide_series["statewide_msw_recycling_rate"] = ( + ( + statewide_series["statewide_msw_recycled"] + + statewide_series["statewide_msw_composted"] + + statewide_series["statewide_msw_digested"] + ) + / ( + statewide_series["statewide_msw_recycled"] + + statewide_series["statewide_msw_composted"] + + statewide_series["statewide_msw_digested"] + + statewide_series["statewide_msw_landfilled"] + ) + * 100 ) - * 100 - ) - return statewide_series + return statewide_series diff --git a/src/wmrc/main.py b/src/wmrc/main.py index c5476ef..a31b531 100644 --- a/src/wmrc/main.py +++ b/src/wmrc/main.py @@ -145,13 +145,13 @@ def process(self): #: Do the work - #: Load data from Salesforce and generate analyses + #: Load data from Salesforce and generate analyses using Summarize methods self.skid_logger.info("Loading records from Salesforce...") records = self._load_salesforce_data() - facility_summary_df = self._facility_summaries(records).query("data_year == @config.YEAR") - county_summary_df = self._county_summaries(records) # .query("data_year == @config.YEAR") - materials_recycled_df = self._materials_recycled(records) - materials_composted_df = self._materials_composted(records) + facility_summary_df = Summarize.facility_summaries(records).query("data_year == @config.YEAR") + county_summary_df = Summarize.county_summaries(records) # .query("data_year == @config.YEAR") + materials_recycled_df = Summarize.materials_recycled(records) + materials_composted_df = Summarize.materials_composted(records) #: Facilities on map self.skid_logger.info("Updating facility info...") @@ -163,25 +163,27 @@ def process(self): #: Materials recycled on dashboard: self.skid_logger.info("Updating materials recycled...") - materials_spatial = self._add_bogus_geometries(materials_recycled_df) + materials_spatial = helpers.add_bogus_geometries(materials_recycled_df) materials_spatial.rename(columns={"percent": "percent_"}, inplace=True) materials_loader = load.FeatureServiceUpdater(gis, config.MATERIALS_LAYER_ITEMID, self.tempdir_path) materials_count = materials_loader.truncate_and_load_features(materials_spatial) #: Materials composted on dashboard: self.skid_logger.info("Updating materials composted...") - composting_spatial = self._add_bogus_geometries(materials_composted_df) + composting_spatial = helpers.add_bogus_geometries(materials_composted_df) composting_spatial.rename(columns={"percent": "percent_"}, inplace=True) composting_loader = load.FeatureServiceUpdater(gis, config.COMPOSTING_LAYER_ITEMID, self.tempdir_path) composting_count = composting_loader.truncate_and_load_features(composting_spatial) #: Statewide metrics self.skid_logger.info("Updating statewide metrics...") - statewide_totals_df = county_summary_df.groupby("data_year").apply(helpers.statewide_yearly_metrics) - contamination_rates_df = self._contamination_rates_by_tonnage(records) - # contamination_rates_df = self._contamination_rates_by_facility(records) + statewide_totals_df = county_summary_df.groupby("data_year").apply( + helpers.YearlyAnalysis.statewide_yearly_metrics + ) + contamination_rates_df = Summarize.contamination_rates_by_tonnage(records) + # contamination_rates_df = Summaries._contamination_rates_by_facility(records) statewide_metrics = pd.concat([statewide_totals_df, contamination_rates_df], axis=1) - statewide_spatial = self._add_bogus_geometries(statewide_metrics) + statewide_spatial = helpers.add_bogus_geometries(statewide_metrics) statewide_loader = load.FeatureServiceUpdater(gis, config.STATEWIDE_LAYER_ITEMID, self.tempdir_path) statewide_count = statewide_loader.truncate_and_load_features(statewide_spatial) @@ -341,9 +343,6 @@ def _get_county_names(input_df, gis): return joined_points_df - #: The following methods operate on all the salesforce data, while the SalesForceRecords class operates on subsets - #: of the data, usually a year at a time. Thus, these methods get all the records and then groupby them by year, - #: applying the SalesForceRecords methods. def _load_salesforce_data(self) -> helpers.SalesForceRecords: salesforce_credentials = extract.SalesforceApiUserCredentials( @@ -356,11 +355,17 @@ def _load_salesforce_data(self) -> helpers.SalesForceRecords: return salesforce_records + +class Summarize: + """These static methods generally apply functions from the helpers module to the records grouped by + Calender_Year__c to create dataframes of the reports that will be used to update the AGOL feature services. + """ + @staticmethod - def _county_summaries(records: helpers.SalesForceRecords) -> pd.DataFrame: + def county_summaries(records: helpers.SalesForceRecords) -> pd.DataFrame: county_df = records.df.groupby("Calendar_Year__c").apply( - helpers.county_summaries, county_fields=records.county_fields + helpers.YearlyAnalysis.county_summaries, county_fields=records.county_fields ) county_df.index.names = ["data_year", "name"] county_df.reset_index(level="data_year", inplace=True) @@ -373,11 +378,11 @@ def _county_summaries(records: helpers.SalesForceRecords) -> pd.DataFrame: return county_df @staticmethod - def _facility_summaries(records: helpers.SalesForceRecords) -> pd.DataFrame: + def facility_summaries(records: helpers.SalesForceRecords) -> pd.DataFrame: facility_summaries = ( records.df.groupby("Calendar_Year__c") .apply( - helpers.facility_tons_diverted_from_landfills, + helpers.YearlyAnalysis.facility_tons_diverted_from_landfills, ) .droplevel(1) ) @@ -388,7 +393,7 @@ def _facility_summaries(records: helpers.SalesForceRecords) -> pd.DataFrame: return facility_summaries @staticmethod - def _materials_recycled(records: helpers.SalesForceRecords) -> pd.DataFrame: + def materials_recycled(records: helpers.SalesForceRecords) -> pd.DataFrame: recycling_fields = [ "Combined Total of Material Received", "Total Corrugated Boxes received", @@ -415,7 +420,7 @@ def _materials_recycled(records: helpers.SalesForceRecords) -> pd.DataFrame: materials_recycled = ( records.df.groupby("Calendar_Year__c") .apply( - helpers.rates_per_material, + helpers.YearlyAnalysis.rates_per_material, classification="Recycling", fields=renamed_fields, total_field="Combined_Total_of_Material_Received__c", @@ -429,7 +434,7 @@ def _materials_recycled(records: helpers.SalesForceRecords) -> pd.DataFrame: return materials_recycled @staticmethod - def _materials_composted(records: helpers.SalesForceRecords) -> pd.DataFrame: + def materials_composted(records: helpers.SalesForceRecords) -> pd.DataFrame: composting_fields = [ "Municipal Solid Waste", "Total Material Received Compost", @@ -450,7 +455,7 @@ def _materials_composted(records: helpers.SalesForceRecords) -> pd.DataFrame: materials_composted = ( records.df.groupby("Calendar_Year__c") .apply( - helpers.rates_per_material, + helpers.YearlyAnalysis.rates_per_material, classification="Composts", fields=renamed_fields, total_field="Total_Material_Received_Compost__c", @@ -464,7 +469,7 @@ def _materials_composted(records: helpers.SalesForceRecords) -> pd.DataFrame: return materials_composted @staticmethod - def _contamination_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.DataFrame: + def contamination_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.DataFrame: records.df["in_state_modifier"] = (100 - records.df["Out_of_State__c"]) / 100 records.df["recycling_tons_contaminated"] = ( records.df["Annual_Recycling_Contamination_Rate__c"] @@ -493,7 +498,8 @@ def _contamination_rates_by_tonnage(records: helpers.SalesForceRecords) -> pd.Da return clean_rates - def _contamination_rates_by_facility(records: helpers.SalesForceRecords) -> pd.DataFrame: + @staticmethod + def contamination_rates_by_facility(records: helpers.SalesForceRecords) -> pd.DataFrame: records.df["annual_recycling_uncontaminated_rate"] = 100 - records.df["Annual_Recycling_Contamination_Rate__c"] yearly_stats = records.df.groupby("Calendar_Year__c").describe() @@ -501,26 +507,6 @@ def _contamination_rates_by_facility(records: helpers.SalesForceRecords) -> pd.D yearly_stats.index.name = "data_year" return yearly_stats[["count", "mean", "std"]] - @staticmethod - def _add_bogus_geometries(input_dataframe: pd.DataFrame) -> pd.DataFrame: - """Add a bogus geometry (point in downtown Malad City, ID) to a dataframe in WKID 4326. - - Args: - input_dataframe (pd.DataFrame): Non-spatial dataframe to add geometry to - - Returns: - pd.DataFrame: Spatially-enabled dataframe version of input input_dataframe with geometry added to every row - """ - - input_dataframe["x"] = 12_495_000 - input_dataframe["y"] = 5_188_000 - - spatial_dataframe = pd.DataFrame.spatial.from_xy(input_dataframe, "x", "y", sr=4326) - - spatial_dataframe.drop(columns=["x", "y"], inplace=True) - - return spatial_dataframe - def main(event, context): # pylint: disable=unused-argument """Entry point for Google Cloud Function triggered by pub/sub event diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 4771802..b749ec2 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -26,7 +26,7 @@ def test_statewide_yearly_metrics_happy_path(self): } ) - output = helpers.statewide_yearly_metrics(input_df) + output = helpers.YearlyAnalysis.statewide_yearly_metrics(input_df) pd.testing.assert_series_equal(output, expected_output) @@ -51,6 +51,21 @@ def test_statewide_yearly_metrics_removes_out_of_state_values(self): } ) - output = helpers.statewide_yearly_metrics(input_df) + output = helpers.YearlyAnalysis.statewide_yearly_metrics(input_df) pd.testing.assert_series_equal(output, expected_output) + + +class TestSmallMethods: + + def test_add_bogus_geometries_happy_path(self): + input_df = pd.DataFrame( + { + "a": [1, 2], + "b": [3, 4], + } + ) + + result_df = helpers.add_bogus_geometries(input_df) + + assert result_df.spatial.validate() diff --git a/tests/test_wmrc.py b/tests/test_wmrc.py index dc62bbe..b9fa0c1 100644 --- a/tests/test_wmrc.py +++ b/tests/test_wmrc.py @@ -1,5 +1,6 @@ import numpy as np import pandas as pd + from wmrc import main @@ -88,7 +89,7 @@ def test_county_summaries_happy_path(self, mocker): ) records_mock.df.groupby.return_value.apply.return_value = summaries_df - result_df = main.Skid._county_summaries(records_mock) + result_df = main.Summarize.county_summaries(records_mock) test_df = pd.DataFrame( { @@ -123,7 +124,7 @@ def test_county_summaries_replace_nan_with_0(self, mocker): ) records_mock.df.groupby.return_value.apply.return_value = summaries_df - result_df = main.Skid._county_summaries(records_mock) + result_df = main.Summarize.county_summaries(records_mock) test_df = pd.DataFrame( { @@ -149,7 +150,7 @@ def test_contamination_rates_by_tonnage_happy_path(self, mocker): } ) - output_series = main.Skid._contamination_rates_by_tonnage(records) + output_series = main.Summarize.contamination_rates_by_tonnage(records) test_df = pd.Series( { @@ -174,7 +175,7 @@ def test_contamination_rates_by_tonnage_uses_out_of_state_modifier(self, mocker) } ) - output_series = main.Skid._contamination_rates_by_tonnage(records) + output_series = main.Summarize.contamination_rates_by_tonnage(records) test_df = pd.Series( { @@ -186,18 +187,3 @@ def test_contamination_rates_by_tonnage_uses_out_of_state_modifier(self, mocker) test_df.index.name = "data_year" pd.testing.assert_series_equal(output_series, test_df) - - -class TestSmallMethods: - - def test_add_bogus_geometries_happy_path(self): - input_df = pd.DataFrame( - { - "a": [1, 2], - "b": [3, 4], - } - ) - - result_df = main.Skid._add_bogus_geometries(input_df) - - assert result_df.spatial.validate()