Skip to content

Commit

Permalink
Fix for #25 on Regex Tool.
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Sep 8, 2023
1 parent f14d7ac commit 58b2208
Showing 1 changed file with 17 additions and 4 deletions.
21 changes: 17 additions & 4 deletions countess/plugins/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataF
assert isinstance(self.parameters["output"], ArrayParam)
df = super().process_dataframe(dataframe, logger)

if self.parameters["drop_unmatch"].value:
output_names = [pp.name.value for pp in self.parameters["output"]]
df = df.dropna(subset=output_names, how="all")
#if self.parameters["drop_unmatch"].value:
# output_names = [pp.name.value for pp in self.parameters["output"]]
# df = df.dropna(subset=output_names, how="all")

if self.parameters["drop_column"].value:
column_name = self.parameters["column"].value
Expand Down Expand Up @@ -87,8 +87,21 @@ def process_value(self, value: str, logger: Logger) -> Iterable:
except (TypeError, ValueError) as exc:
logger.exception(exc)

return [None] * self.compiled_re.groups
# If dropping unmatched values, return a simple None which will
# be filtered out in series_to_dataframe below, otherwise return
# a tuple of Nones which will fill in the unmatched row.

if self.parameters["drop_unmatch"].value:
return None
else:
return [None] * self.compiled_re.groups

def series_to_dataframe(self, series: pd.Series) -> pd.DataFrame:
# Unmatched rows return a single None, so we can easily drop
# them out before doing further processing
if self.parameters["drop_unmatch"].value:
series.dropna(inplace=True)
return super().series_to_dataframe(series)

class RegexReaderPlugin(PandasInputPlugin):
name = "Regex Reader"
Expand Down

0 comments on commit 58b2208

Please sign in to comment.