Skip to content

Commit

Permalink
everything converted to new parameter system
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Aug 6, 2024
1 parent af99144 commit 5ad98aa
Show file tree
Hide file tree
Showing 11 changed files with 69 additions and 94 deletions.
17 changes: 6 additions & 11 deletions countess/plugins/collate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,8 @@ class CollatePlugin(PandasProcessPlugin):
version = VERSION
link = "https://countess-project.github.io/CountESS/included-p lugins/#collate"

parameters = {
"columns": PerColumnArrayParam(
"Columns", ChoiceParam("Role", choices=["—", "Group", "Sort (Asc)", "Sort (Desc)"])
),
"limit": IntegerParam("First N records", 0),
}
columns = PerColumnArrayParam("Columns", ChoiceParam("Role", choices=["—", "Group", "Sort (Asc)", "Sort (Desc)"]))
limit = IntegerParam("First N records", 0)

dataframes: List[pd.DataFrame]

Expand All @@ -37,22 +33,21 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger) -> Iterable:
return []

def finalize(self, logger: Logger) -> Iterable[pd.DataFrame]:
assert isinstance(self.parameters["columns"], PerColumnArrayParam)
assert self.dataframes

df = pd.concat(self.dataframes)
input_columns = get_all_columns(df).keys()
self.parameters["columns"].set_column_choices(input_columns)
column_parameters = list(zip(input_columns, self.parameters["columns"]))
self.columns.set_column_choices(input_columns)
column_parameters = list(zip(input_columns, self.columns))
group_cols = [col for col, param in column_parameters if param.value == "Group"]
sort_cols = {
col: param.value.endswith("(Asc)") for col, param in column_parameters if param.value.startswith("Sort")
}

def sort_and_limit(df: pd.DataFrame) -> pd.DataFrame:
df = df.sort_values(by=list(sort_cols.keys()), ascending=list(sort_cols.values()))
if self.parameters["limit"].value > 0:
df = df.head(self.parameters["limit"].value)
if self.limit > 0:
df = df.head(self.limit.value)
return df

try:
Expand Down
24 changes: 8 additions & 16 deletions countess/plugins/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from countess import VERSION
from countess.core.logger import Logger
from countess.core.parameters import (
ArrayParam,
BooleanParam,
DataTypeOrNoneChoiceParam,
PerColumnArrayParam,
Expand All @@ -13,28 +12,21 @@
from countess.core.plugins import PandasSimplePlugin


class _ColumnMultiParam(TabularMultiParam):
rename = StringParam("Name")
datatype = DataTypeOrNoneChoiceParam("Column Type")
index = BooleanParam("Index?")


class ColumnToolPlugin(PandasSimplePlugin):
name = "DataFrame Column Tool"
description = "Alter Columns of a DataFrame"
version = VERSION

parameters = {
"columns": PerColumnArrayParam(
"Columns",
TabularMultiParam(
"Column",
{
"rename": StringParam("Name"),
"datatype": DataTypeOrNoneChoiceParam("Column Type"),
"index": BooleanParam("Index?"),
},
),
)
}
columns = PerColumnArrayParam("Columns", _ColumnMultiParam("Column"))

def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataFrame:
assert isinstance(self.parameters["columns"], ArrayParam)
column_parameters = list(zip(self.input_columns, self.parameters["columns"]))
column_parameters = list(zip(self.input_columns, self.columns))

drop_columns = [column_name for column_name, parameter in column_parameters if parameter.datatype.is_none()]

Expand Down
29 changes: 13 additions & 16 deletions countess/plugins/correlation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,34 +16,31 @@ class CorrelationPlugin(PandasSimplePlugin):
version = VERSION
link = "https://countess-project.github.io/CountESS/included-plugins/#correlation-tool"

parameters = {
"method": ChoiceParam("Method", choices=["pearson", "kendall", "spearman"]),
"group": ColumnOrNoneChoiceParam("Group"),
"column1": ColumnChoiceParam("Column 1"),
"column2": ColumnChoiceParam("Column 2"),
}
method = ChoiceParam("Method", choices=["pearson", "kendall", "spearman"])
group = ColumnOrNoneChoiceParam("Group")
column1 = ColumnChoiceParam("Column 1")
column2 = ColumnChoiceParam("Column 2")

columns: list[str] = []
dataframes: list[pd.DataFrame] = []

def prepare(self, sources: list[str], row_limit: Optional[int] = None):
assert isinstance(self.parameters["group"], ColumnOrNoneChoiceParam)
column1 = self.parameters["column1"].value
column2 = self.parameters["column2"].value
column1 = self.column1.value
column2 = self.column2.value
self.columns = [column1, column2]
if self.parameters["group"].is_not_none():
self.columns.append(self.parameters["group"].value)
if self.group.is_not_none():
self.columns.append(self.group.value)
self.dataframes = []

def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> None:
self.dataframes.append(dataframe[self.columns])

def finalize(self, logger: Logger) -> Iterable[pd.DataFrame]:
assert isinstance(self.parameters["group"], ColumnOrNoneChoiceParam)
column1 = self.parameters["column1"].value
column2 = self.parameters["column2"].value
groupby = None if self.parameters["group"].is_none() else self.parameters["group"].value
column1 = self.column1.value
column2 = self.column2.value
groupby = None if self.group.is_none() else self.group.value

method = self.parameters["method"].value
method = self.method.value

dataframe = pd.concat(self.dataframes)
if groupby:
Expand Down
18 changes: 7 additions & 11 deletions countess/plugins/curve_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,15 @@ class CurveFitPlugin(PandasTransformDictToDictPlugin):

version = VERSION

parameters = {
"xaxis": ColumnGroupOrNoneChoiceParam("X Axis", None, []),
"yaxis": ColumnGroupChoiceParam("Y Axis", None, []),
"function": ChoiceParam("Function", list(FUNCTIONS.keys())[0], list(FUNCTIONS.keys())),
}
xaxis = ColumnGroupOrNoneChoiceParam("X Axis", None, [])
yaxis = ColumnGroupChoiceParam("Y Axis", None, [])
function = ChoiceParam("Function", list(FUNCTIONS.keys())[0], list(FUNCTIONS.keys()))

def process_dict(self, data: dict, logger: Logger) -> dict:
assert isinstance(self.parameters["xaxis"], ColumnGroupOrNoneChoiceParam)
assert isinstance(self.parameters["yaxis"], ColumnGroupChoiceParam)
xprefix = None
if not self.parameters["xaxis"].is_none():
xprefix = self.parameters["xaxis"].value
yprefix = self.parameters["yaxis"].value
if not self.xaxis.is_none():
xprefix = self.xaxis.value
yprefix = self.yaxis.value

tvals = {k.removeprefix(yprefix) for k in data.keys() if k.startswith(yprefix)}
if xprefix:
Expand All @@ -62,7 +58,7 @@ def process_dict(self, data: dict, logger: Logger) -> dict:
yvals.append(yval)

try:
function = FUNCTIONS[self.parameters["function"].value]
function = FUNCTIONS[self.function.value]
popt, pcov, *_ = curve_fit(function, xvals, yvals)

r = {f"popt_{n}": v for n, v in enumerate(popt)}
Expand Down
8 changes: 6 additions & 2 deletions countess/plugins/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,26 @@

OPERATORS = ["equals", "greater than", "less than", "contains", "starts with", "ends with", "matches regex"]

class _FilterColumnMultiParam(MultiParam):

class _FilterColumnMultiParam(TabularMultiParam):
column = ColumnChoiceParam("Column")
negate = BooleanParam("Negate?")
operator = ChoiceParam("Operator", OPERATORS[0], OPERATORS)
value = StringParam("Value")

class _FilterOutputMultiParam(MultiParam):

class _FilterOutputMultiParam(TabularMultiParam):
output = StringParam("Output Column")
value = StringParam("Output Value")
type = DataTypeChoiceParam("Output Type")


class FilterMultiParam(MultiParam):
columns = ArrayParam("Columns", _FilterColumnMultiParam("Column"))
combine = ChoiceParam("Combine", "All", ["All", "Any"])
outputs = ArrayParam("Outputs", _FilterOutputMultiParam("Output"))


class FilterPlugin(PandasSimplePlugin):
name = "Filter Plugin"
description = "Filter rows by simple expressions"
Expand Down
4 changes: 2 additions & 2 deletions countess/plugins/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class ColumnMultiParam(TabularMultiParam):
sum = BooleanParam("Sum")
mean = BooleanParam("Mean")


class GroupByPlugin(PandasConcatProcessPlugin):
"""Groups a Pandas Dataframe by an arbitrary column and rolls up rows"""

Expand All @@ -40,8 +41,7 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger) -> Iterable:
keep_columns = [
col_param.label
for col_param in self.columns
if any(cp.value for cp in col_param.values())
and col_param.label in data.columns
if any(cp.value for cp in col_param.values()) and col_param.label in data.columns
]
data = data[keep_columns]

Expand Down
3 changes: 1 addition & 2 deletions countess/plugins/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ class PivotPlugin(PandasProcessPlugin):
version = VERSION
link = "https://countess-project.github.io/CountESS/included-plugins/#pivot-tool"

columns = PerColumnArrayParam("Columns",
ChoiceParam("Role", "Drop", choices=["Index", "Pivot", "Expand", "Drop"]))
columns = PerColumnArrayParam("Columns", ChoiceParam("Role", "Drop", choices=["Index", "Pivot", "Expand", "Drop"]))
aggfunc = ChoiceParam("Aggregation Function", "sum", choices=["sum", "mean", "min", "max"])

input_columns: Dict[str, np.dtype] = {}
Expand Down
16 changes: 6 additions & 10 deletions countess/plugins/python.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import builtins
import math
import re
from types import CodeType, FunctionType, ModuleType, NoneType
from types import FunctionType, ModuleType, NoneType
from typing import Any

import numpy as np
Expand Down Expand Up @@ -50,18 +50,14 @@ class PythonPlugin(PandasTransformDictToDictPlugin):

version = VERSION

parameters = {
"code": TextParam("Python Code"),
"dropna": BooleanParam("Drop Null Columns?"),
}
code = TextParam("Python Code")
dropna = BooleanParam("Drop Null Columns?")

code_object = None
code_globals: dict[str, Any] = {"__builtins__": SAFE_BUILTINS, **MATH_FUNCTIONS, **RE_FUNCTIONS, **NUMPY_IMPORTS}

def process_dict(self, data: dict, logger: Logger):
assert isinstance(self.parameters["code"], TextParam)
assert isinstance(self.code_object, CodeType)

assert self.code_object is not None
try:
exec(self.code_object, self.code_globals, data) # pylint: disable=exec-used
except Exception as exc: # pylint: disable=broad-exception-caught
Expand All @@ -75,7 +71,7 @@ def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataF
b) we don't need to merge afterwards"""

# XXX cache this?
self.code_object = compile(self.parameters["code"].value, "<PythonPlugin>", mode="exec")
self.code_object = compile(self.code.value, "<PythonPlugin>", mode="exec")

dataframe = dataframe.reset_index(drop=dataframe.index.names == [None])
series = self.dataframe_to_series(dataframe, logger)
Expand All @@ -86,7 +82,7 @@ def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> pd.DataF
columns="__filter"
)

if self.parameters["dropna"].value:
if self.dropna:
dataframe.dropna(axis=1, how="all", inplace=True)

return dataframe
1 change: 0 additions & 1 deletion countess/plugins/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ def process_row(
return s[0] if s else None

def process_dataframe(self, dataframe: pd.DataFrame, logger: Logger) -> Optional[pd.DataFrame]:

variant_col = self.variant.value
replicate_col = self.replicate.value
count_cols = self.columns.get_column_names(dataframe)
Expand Down
39 changes: 18 additions & 21 deletions countess/plugins/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,42 +9,39 @@


class SequencePlugin(PandasTransformSingleToSinglePlugin):
"""Manipulate DNA valueuences"""
"""Manipulate DNA sequences"""

name = "Sequence Tool"
description = "Manipulate DNA Sequences"
version = VERSION
link = "https://countess-project.github.io/CountESS/included-plugins/#valueuence"

parameters = {
"column": ColumnChoiceParam("Input Column"),
"invert": BooleanParam("Invert", False),
"offset": IntegerParam("Offset", 0),
"start": StringParam("Start at ...", ""),
"stop": StringParam("Stop at ...", ""),
"length": IntegerParam("Max Length", 150),
"output": StringParam("Output Column", "sequence"),
}
column = ColumnChoiceParam("Input Column")
invert = BooleanParam("Invert", False)
offset = IntegerParam("Offset", 0)
start = StringParam("Start at ...", "")
stop = StringParam("Stop at ...", "")
length = IntegerParam("Max Length", 150)
output = StringParam("Output Column", "sequence")

def process_value(self, value: str, logger: Logger) -> Optional[str]:
if value is None:
return None

if self.parameters["invert"].value:
if self.invert:
value = reverse_complement(value)
if self.parameters["offset"].value:
offset = self.parameters["offset"].value
value = value[offset:]
if self.parameters["start"].value:
offset = value.find(self.parameters["start"].value)
if self.offset > 0:
value = value[int(self.offset) :]
if self.start:
offset = value.find(self.start.value)
if offset >= 0:
value = value[offset:]
else:
return None
if self.parameters["stop"].value:
offset = value.find(self.parameters["stop"].value)
if self.stop:
offset = value.find(self.stop.value)
if offset >= 0:
value = value[0 : offset + len(self.parameters["stop"].value)]
if self.parameters["length"].value:
value = value[0 : self.parameters["length"].value]
value = value[0 : offset + len(self.stop.value)]
if self.length > 0:
value = value[0 : int(self.length)]
return value
4 changes: 2 additions & 2 deletions countess/plugins/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ def process_dict(self, data, logger: Logger) -> dict:

reference = self.reference.get_value_from_dict(data)

r = {}
r : dict[str,str] = {}

if self.output:
try:
r[self.output] = find_variant_string(
r[self.output.value] = find_variant_string(
"g.", reference, sequence, int(self.max_mutations), offset=int(self.offset)
)
except ValueError:
Expand Down

0 comments on commit 5ad98aa

Please sign in to comment.