From 58b22080904c985c5c00e75b8b96e77c5393930b Mon Sep 17 00:00:00 2001 From: Nick Moore Date: Fri, 8 Sep 2023 10:28:31 +1000 Subject: [PATCH] Fix for #25 on Regex Tool. --- countess/plugins/regex.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/countess/plugins/regex.py b/countess/plugins/regex.py index 252b20a..d04e7c1 100644 --- a/countess/plugins/regex.py +++ b/countess/plugins/regex.py @@ -54,9 +54,9 @@ def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataF assert isinstance(self.parameters["output"], ArrayParam) df = super().process_dataframe(dataframe, logger) - if self.parameters["drop_unmatch"].value: - output_names = [pp.name.value for pp in self.parameters["output"]] - df = df.dropna(subset=output_names, how="all") + #if self.parameters["drop_unmatch"].value: + # output_names = [pp.name.value for pp in self.parameters["output"]] + # df = df.dropna(subset=output_names, how="all") if self.parameters["drop_column"].value: column_name = self.parameters["column"].value @@ -87,8 +87,21 @@ def process_value(self, value: str, logger: Logger) -> Iterable: except (TypeError, ValueError) as exc: logger.exception(exc) - return [None] * self.compiled_re.groups + # If dropping unmatched values, return a simple None which will + # be filtered out in series_to_dataframe below, otherwise return + # a tuple of Nones which will fill in the unmatched row. + + if self.parameters["drop_unmatch"].value: + return None + else: + return [None] * self.compiled_re.groups + def series_to_dataframe(self, series: pd.Series) -> pd.DataFrame: + # Unmatched rows return a single None, so we can easily drop + # them out before doing further processing + if self.parameters["drop_unmatch"].value: + series.dropna(inplace=True) + return super().series_to_dataframe(series) class RegexReaderPlugin(PandasInputPlugin): name = "Regex Reader"