diff --git a/countess/core/logger.py b/countess/core/logger.py index 713309a..d7dc7c6 100644 --- a/countess/core/logger.py +++ b/countess/core/logger.py @@ -35,7 +35,9 @@ def error(self, message: str, detail: Optional[str] = None): def exception(self, exception: Exception): # Slightly odd calling to maintain compatibility with 3.9 and 3.10 + # XXX format more nicely message = traceback.format_exception(None, value=exception, tb=None) + message += "\n\n" + "".join(traceback.format_tb(exception.__traceback__)) self.error(str(exception), detail="".join(message)) def clear(self): diff --git a/countess/core/plugins.py b/countess/core/plugins.py index bb54d0a..6a81906 100644 --- a/countess/core/plugins.py +++ b/countess/core/plugins.py @@ -413,12 +413,15 @@ class PandasTransformDictToXMixin: """Transformer which takes a row as a dictionary""" def dataframe_to_series(self, dataframe: pd.DataFrame, logger: Logger) -> pd.Series: + # XXX there is a bug in Pandas 2.1.x which prevents + # args and kwargs getting passed through when raw=True + # this seems to be fixed in Pandas 2.2.0.dev so + # hopefully this lambda can be removed some day. + # https://github.com/pandas-dev/pandas/issues/55009 return dataframe.apply( - self.process_raw, + lambda x: self.process_raw(x, list(dataframe.columns), logger), axis=1, raw=True, - columns=list(dataframe.columns), - logger=logger, ) def process_dict(self, data, logger: Logger): diff --git a/countess/plugins/python.py b/countess/plugins/python.py index 4d34094..6e5b389 100644 --- a/countess/plugins/python.py +++ b/countess/plugins/python.py @@ -1,3 +1,5 @@ +from types import CodeType + import pandas as pd from countess import VERSION @@ -18,7 +20,6 @@ # PandasTransformDictToDictPlugin # which is a bit more efficient. - class PythonPlugin(PandasTransformRowToDictPlugin): name = "Python Code" description = "Apply python code to each row." @@ -35,13 +36,18 @@ class PythonPlugin(PandasTransformRowToDictPlugin): "code": TextParam("Python Code"), } + code_object = None + def process_row(self, row: pd.Series, logger: Logger): assert isinstance(self.parameters["code"], TextParam) assert isinstance(self.parameters["columns"], PerColumnArrayParam) - code_object = compile(self.parameters["code"].value, "", mode="exec") + assert isinstance(self.code_object, CodeType) row_dict = dict(row) - exec(code_object, {}, row_dict) # pylint: disable=exec-used + try: + exec(self.code_object, {}, row_dict) # pylint: disable=exec-used + except Exception as exc: # pylint: disable=broad-exception-caught + logger.exception(exc) column_parameters = list(zip(self.input_columns, self.parameters["columns"].params)) columns_to_remove = set(col for col, param in column_parameters if not param.value) @@ -53,6 +59,9 @@ def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataF the indexes so we can use their values easily and b) we don't need to merge afterwards""" + # XXX cache this? + self.code_object = compile(self.parameters["code"].value, "", mode="exec") + dataframe = dataframe.reset_index(drop=False) series = self.dataframe_to_series(dataframe, logger) dataframe = self.series_to_dataframe(series)