Skip to content

Commit

Permalink
fix issue with pandas dataframe apply raw=True
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Oct 16, 2023
1 parent 68052c0 commit 9cfe6d9
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
2 changes: 2 additions & 0 deletions countess/core/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ def error(self, message: str, detail: Optional[str] = None):

def exception(self, exception: Exception):
# Slightly odd calling to maintain compatibility with 3.9 and 3.10
# XXX format more nicely
message = traceback.format_exception(None, value=exception, tb=None)
message += "\n\n" + "".join(traceback.format_tb(exception.__traceback__))
self.error(str(exception), detail="".join(message))

def clear(self):
Expand Down
9 changes: 6 additions & 3 deletions countess/core/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,12 +413,15 @@ class PandasTransformDictToXMixin:
"""Transformer which takes a row as a dictionary"""

def dataframe_to_series(self, dataframe: pd.DataFrame, logger: Logger) -> pd.Series:
# XXX there is a bug in Pandas 2.1.x which prevents
# args and kwargs getting passed through when raw=True
# this seems to be fixed in Pandas 2.2.0.dev so
# hopefully this lambda can be removed some day.
# https://github.com/pandas-dev/pandas/issues/55009
return dataframe.apply(
self.process_raw,
lambda x: self.process_raw(x, list(dataframe.columns), logger),
axis=1,
raw=True,
columns=list(dataframe.columns),
logger=logger,
)

def process_dict(self, data, logger: Logger):
Expand Down
15 changes: 12 additions & 3 deletions countess/plugins/python.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from types import CodeType

import pandas as pd

from countess import VERSION
Expand All @@ -18,7 +20,6 @@
# PandasTransformDictToDictPlugin
# which is a bit more efficient.


class PythonPlugin(PandasTransformRowToDictPlugin):
name = "Python Code"
description = "Apply python code to each row."
Expand All @@ -35,13 +36,18 @@ class PythonPlugin(PandasTransformRowToDictPlugin):
"code": TextParam("Python Code"),
}

code_object = None

def process_row(self, row: pd.Series, logger: Logger):
assert isinstance(self.parameters["code"], TextParam)
assert isinstance(self.parameters["columns"], PerColumnArrayParam)
code_object = compile(self.parameters["code"].value, "<PythonPlugin>", mode="exec")
assert isinstance(self.code_object, CodeType)

row_dict = dict(row)
exec(code_object, {}, row_dict) # pylint: disable=exec-used
try:
exec(self.code_object, {}, row_dict) # pylint: disable=exec-used
except Exception as exc: # pylint: disable=broad-exception-caught
logger.exception(exc)

column_parameters = list(zip(self.input_columns, self.parameters["columns"].params))
columns_to_remove = set(col for col, param in column_parameters if not param.value)
Expand All @@ -53,6 +59,9 @@ def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataF
the indexes so we can use their values easily and
b) we don't need to merge afterwards"""

# XXX cache this?
self.code_object = compile(self.parameters["code"].value, "<PythonPlugin>", mode="exec")

dataframe = dataframe.reset_index(drop=False)
series = self.dataframe_to_series(dataframe, logger)
dataframe = self.series_to_dataframe(series)
Expand Down

0 comments on commit 9cfe6d9

Please sign in to comment.