Skip to content

Commit

Permalink
code tidying
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Oct 13, 2023
1 parent 83658f6 commit 040584d
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 41 deletions.
11 changes: 5 additions & 6 deletions countess/core/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def value(self, value):
def value(self):
self._value = None

def copy(self):
def copy(self) -> "SimpleParam":
return self.__class__(self.label, self.value, self.read_only)


Expand Down Expand Up @@ -154,7 +154,7 @@ def clean_value(self, value: Any):
x = "".join([self.clean_character(c) for c in value_str])
return x

def copy(self):
def copy(self) -> "StringCharacterSetParam":
return self.__class__(self.label, self.value, self.read_only, character_set=self.character_set)


Expand Down Expand Up @@ -221,7 +221,7 @@ def get_parameters(self, key, base_dir="."):

return [(key, relpath)]

def copy(self):
def copy(self) -> "FileParam":
return self.__class__(self.label, self.value, self.read_only, file_types=self.file_types)

def get_hash_value(self) -> str:
Expand Down Expand Up @@ -303,7 +303,7 @@ def set_choices(self, choices: Iterable[str]):
else:
self._value = self.DEFAULT_VALUE

def copy(self):
def copy(self) -> "ChoiceParam":
return self.__class__(self.label, self.value, self.choices)


Expand Down Expand Up @@ -456,7 +456,7 @@ class ArrayParam(BaseParam):
# maybe we should have a TabularParam which combines ArrayParam and
# MultiParam more directly."""

params: list[BaseParam] = []
params: list[BaseParam]

def __init__(
self,
Expand Down Expand Up @@ -556,7 +556,6 @@ def set_column_choices(self, choices):
class PerColumnArrayParam(ArrayParam):
def __init__(self, *a, **k) -> None:
super().__init__(*a, **k)
self.params_by_column_name: Mapping[str, BaseParam] = {}
self.read_only = True

def get_parameters(self, key, base_dir="."):
Expand Down
14 changes: 3 additions & 11 deletions countess/core/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from countess.core.parameters import (
ArrayParam,
BaseParam,
ColumnChoiceParam,
FileArrayParam,
FileParam,
FileSaveParam,
Expand Down Expand Up @@ -229,10 +228,9 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger) -> Iterable[p
if len(result) > 0:
yield result

except Exception as exc:
except Exception as exc: # pylint: disable=broad-exception-caught
logger.exception(exc)


def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> Optional[pd.DataFrame]:
"""Override this to process a single dataframe"""
raise NotImplementedError(f"{self.__class__}.process_dataframe()")
Expand Down Expand Up @@ -364,12 +362,11 @@ def series_to_dataframe(self, series: pd.Series) -> pd.DataFrame:
def dataframe_to_series(self, dataframe: pd.DataFrame, logger: Logger) -> pd.Series:
raise NotImplementedError(f"{self.__class__}.dataframe_to_series()")

def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataFrame:
def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> Optional[pd.DataFrame]:
try:
series = self.dataframe_to_series(dataframe, logger)
df2 = self.series_to_dataframe(series)
except Exception as exc:
print(f"EXCEPTION! {exc}")
except Exception as exc: # pylint: disable=broad-exception-caught
logger.exception(exc)
return None
df3 = dataframe.merge(df2, left_index=True, right_index=True)
Expand All @@ -384,10 +381,6 @@ class PandasTransformSingleToXMixin: # type: ignore [attr-defined]
"""Transformer which takes a single column, the name of which is specified
in a ColumnChoiceParam called "column" """

def __init__(self, *a, **k):
super().__init__(*a, **k)
assert isinstance(self.parameters["column"], ColumnChoiceParam)

def process_value(self, value, logger: Logger):
raise NotImplementedError(f"{self.__class__}.process_value()")

Expand Down Expand Up @@ -581,7 +574,6 @@ def load_file(self, file_number: int, logger: Logger, row_limit: Optional[int] =


class PandasInputFilesPlugin(PandasInputPlugin):

def __init__(self, *a, **k):
# Add in filenames
super().__init__(*a, **k)
Expand Down
10 changes: 5 additions & 5 deletions countess/plugins/mutagenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@


def mutagenize(
sequence: str, mutate: bool, delete: bool, del3: bool, insert: bool, ins3: bool
sequence: str, mutate: bool, delete: bool, del3: bool, insert: bool, ins3: bool
) -> Iterable[tuple[str, int, Optional[str], Optional[str]]]:
# XXX not really happy with how the args are multiplying here!
# XXX it'd be faster, but less neat, to include logic for duplicate
# removal here instead of producing duplicates and then removing them
# later.
Expand All @@ -27,20 +28,19 @@ def mutagenize(
yield sequence[0:n] + sequence[n + 3 :], n + 1, sequence[n : n + 3], None
if ins3:
for ins in product("ACGT", "ACGT", "ACGT"):
ins_str = ''.join(ins)
yield sequence[0:n] + ins_str + sequence[n:], n+1, None, ins_str
ins_str = "".join(ins)
yield sequence[0:n] + ins_str + sequence[n:], n + 1, None, ins_str

ll = len(sequence) + 1
if insert:
for b2 in "ACGT":
yield sequence + b2, ll, None, b2
if ins3:
for ins in product("ACGT", "ACGT", "ACGT"):
ins_str = ''.join(ins)
ins_str = "".join(ins)
yield sequence + ins_str, ll, None, ins_str



class MutagenizePlugin(PandasInputPlugin):
"""Mutagenize"""

Expand Down
12 changes: 7 additions & 5 deletions countess/plugins/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from countess import VERSION
from countess.core.logger import Logger
from countess.core.parameters import TextParam, PerColumnArrayParam, BooleanParam
from countess.core.parameters import BooleanParam, PerColumnArrayParam, TextParam
from countess.core.plugins import PandasTransformRowToDictPlugin

# XXX pretty sure this is a job for ast.parse rather than just
Expand Down Expand Up @@ -31,18 +31,20 @@ class PythonPlugin(PandasTransformRowToDictPlugin):
version = VERSION

parameters = {
"columns": PerColumnArrayParam("columns", BooleanParam("keep", True)),
"code": TextParam("Python Code")}
"columns": PerColumnArrayParam("columns", BooleanParam("keep", True)),
"code": TextParam("Python Code"),
}

def process_row(self, row: pd.Series, logger: Logger):
assert isinstance(self.parameters["code"], TextParam)
assert isinstance(self.parameters["columns"], PerColumnArrayParam)
code_object = compile(self.parameters["code"].value, "<PythonPlugin>", mode="exec")

row_dict = dict(row)
exec(code_object, {}, row_dict) # pylint: disable=exec-used

column_parameters = list(zip(self.input_columns, self.parameters["columns"]))
columns_to_remove = set( col for col, param in column_parameters if not param.value )
column_parameters = list(zip(self.input_columns, self.parameters["columns"].params))
columns_to_remove = set(col for col, param in column_parameters if not param.value)

return dict((k, v) for k, v in row_dict.items() if k not in columns_to_remove and type(v) in SIMPLE_TYPES)

Expand Down
4 changes: 3 additions & 1 deletion countess/plugins/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,11 @@ def prepare(self, sources: list[str], row_limit: Optional[int] = None):
super().prepare(sources, row_limit)
self.compiled_re = re.compile(self.parameters["regex"].value)

def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataFrame:
def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> Optional[pd.DataFrame]:
assert isinstance(self.parameters["output"], ArrayParam)
df = super().process_dataframe(dataframe, logger)
if df is None:
return None

if self.parameters["drop_column"].value:
column_name = self.parameters["column"].value
Expand Down
28 changes: 15 additions & 13 deletions countess/plugins/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,19 @@ def process_dict(self, data, logger: Logger) -> Optional[str]:
logger.exception(exc)
return None

def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataFrame:
def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> Optional[pd.DataFrame]:
assert isinstance(self.parameters["reference"], ColumnOrNoneChoiceParam)
dataframe = super().process_dataframe(dataframe, logger)
if self.parameters["drop"].value:
dataframe.dropna(subset=self.parameters["output"].value, inplace=True)
if self.parameters["drop_columns"].value:
try:
dataframe.drop(columns=self.parameters["column"].value, inplace=True)
if not self.parameters["reference"].is_none():
dataframe.drop(columns=self.parameters["reference"].value, inplace=True)
except KeyError:
pass

return dataframe
df_out = super().process_dataframe(dataframe, logger)

if df_out is not None:
if self.parameters["drop"].value:
df_out.dropna(subset=self.parameters["output"].value, inplace=True)
if self.parameters["drop_columns"].value:
try:
df_out.drop(columns=self.parameters["column"].value, inplace=True)
if not self.parameters["reference"].is_none():
df_out.drop(columns=self.parameters["reference"].value, inplace=True)
except KeyError:
pass

return df_out

0 comments on commit 040584d

Please sign in to comment.