Skip to content

Commit

Permalink
Merge branch 'main' into new_parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Aug 23, 2024
2 parents b97ef47 + e4e2fed commit f22d75c
Show file tree
Hide file tree
Showing 7 changed files with 116 additions and 12 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# CountESS 0.0.64
# CountESS 0.0.65

This is CountESS, a modular, Python 3 reimplementation of Enrich2.

Expand Down
2 changes: 1 addition & 1 deletion countess/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""CountESS Project"""

VERSION = "0.0.64"
VERSION = "0.0.65"
2 changes: 1 addition & 1 deletion countess/gui/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def __init__(self, *a, **k) -> None:
self.rowconfigure(0, weight=1)
# self.rowconfigure(1, weight=0)

self.pbars_done : list[LabeledProgressbar] = []
self.pbars_done: list[LabeledProgressbar] = []
self.hide_event()

def logging_callback(self, record: logging.LogRecord) -> None:
Expand Down
43 changes: 35 additions & 8 deletions countess/plugins/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import math
import re
from types import FunctionType, ModuleType, NoneType
from types import BuiltinFunctionType, FunctionType, ModuleType
from typing import Any

import numpy as np
Expand All @@ -14,6 +14,9 @@

logger = logging.getLogger(__name__)

# For 3.9 compatibility
NoneType = type(None)

# XXX pretty sure this is a job for ast.parse rather than just
# running compile() and exec() but that can wait.
# Builtins are restricted but there's still plenty of things which
Expand All @@ -27,18 +30,37 @@

def _module_functions(mod: ModuleType):
"""Extracts just the public functions from a module"""
return {k: v for k, v in mod.__dict__.items() if not k.startswith("_") and type(v) is FunctionType}
return {
k: v
for k, v in mod.__dict__.items()
if not k.startswith("_") and type(v) in (BuiltinFunctionType, FunctionType)
}


SAFE_BUILTINS = {
x: builtins.__dict__[x]
for x in "abs all any ascii bin bool bytearray bytes chr complex dict divmod "
"enumerate filter float format frozenset hash hex id int len list map max min "
"oct ord pow range reversed round set slice sorted str sum tuple type zip".split()
for x in "abs all any ascii bin bool chr "
"float format frozenset hash hex int len max min "
"ord range round sorted str sum type zip".split()
}
MATH_FUNCTIONS = _module_functions(math)
RE_FUNCTIONS = _module_functions(re)
NUMPY_IMPORTS = {"nan": np.nan, "inf": np.inf, "isnan": np.isnan, "isinf": np.isinf}
NUMPY_IMPORTS = {
"nan": np.nan,
"inf": np.inf,
"isnan": np.isnan,
"isinf": np.isinf,
"mean": lambda *x: np.mean(x),
"std": lambda *x: np.std(x),
"var": lambda *x: np.var(x),
"median": lambda *x: np.median(x),
}

CODE_GLOBALS: dict[str, Any] = {"__builtins__": SAFE_BUILTINS, **MATH_FUNCTIONS, **RE_FUNCTIONS, **NUMPY_IMPORTS}

AVAILABLE_FUNCTIONS: list[str] = sorted(
list(SAFE_BUILTINS.keys()) + list(MATH_FUNCTIONS.keys()) + list(RE_FUNCTIONS.keys()) + list(NUMPY_IMPORTS.keys())
)


class PythonPlugin(PandasTransformDictToDictPlugin):
Expand All @@ -48,7 +70,12 @@ class PythonPlugin(PandasTransformDictToDictPlugin):
Columns are mapped to local variables and back.
If you assign to a variable called "__filter",
only rows where that value is true will be kept.
"""
Available Functions:
""" + " ".join(
AVAILABLE_FUNCTIONS
)
link = "https://countess-project.github.io/CountESS/included-plugins/#python-code"

version = VERSION

Expand All @@ -61,7 +88,7 @@ class PythonPlugin(PandasTransformDictToDictPlugin):
def process_dict(self, data: dict):
assert self.code_object is not None
try:
exec(self.code_object, self.code_globals, data) # pylint: disable=exec-used
exec(self.code_object, CODE_GLOBALS, data) # pylint: disable=exec-used
except Exception as exc: # pylint: disable=broad-exception-caught
logger.warning("Exception", exc_info=exc)

Expand Down
2 changes: 1 addition & 1 deletion docs/_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ description: "CountESS: Documentation"
email: nick@zoic.org
baseurl: /
url: https://countess-project.github.io
version: 0.0.64
version: 0.0.65
22 changes: 22 additions & 0 deletions docs/included-plugins/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,28 @@ when pivoted with index on Variant, pivot on Replicate and expanding Count becom
| `2` | `3` | `9` |
| `3` | `0` | `6` |

### Python Code

This lets you embed simple Python expressions into your data processing.

Add one or more Python expressions to the "Python Code".
Each row is processed separately, with column values appearing as local variables.

Functions available:
```
abs acos acosh all any ascii asin asinh atan atan2 atanh
bin bool cbrt ceil chr comb compile copysign cos cosh
degrees dist erf erfc escape exp exp2 expm1
fabs factorial findall finditer float floor fmod
format frexp frozenset fsum fullmatch gamma gcd
hash hex hypot inf int isclose isfinite isinf isinf
isnan isnan isqrt lcm ldexp len lgamma log log10 log1p log2
match max mean median min modf nan nextafter ord
perm pow prod purge radians range remainder round
search sin sinh sorted split sqrt std str sub subn
sum sumprod tan tanh template trunc type ulp var zip
```

## Bioinformatics

### FASTQ Load
Expand Down
55 changes: 55 additions & 0 deletions tests/plugins/test_python.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import logging
import numpy as np
import pandas as pd
import pytest

from countess.plugins.python import PythonPlugin

dfi = pd.DataFrame(
[[1, 1, 2, 7, 11], [2, 2, 3, 8, 1], [3, 3, 4, 9, 3], [4, 4, 5, 10, 4], [5, 5, 6, 12, 7]],
columns=["a", "b", "c", "d", "e"],
)

def test_python_builtins():
plugin = PythonPlugin()
plugin.set_parameter(
"code",
"""
x = mean(a,b,c,d,e)
y = std(a,b,c,d,e)
z = sqrt(pow(a,2) + pow(b,2))
v = var(a,b,c,d,e)
""",
)

plugin.prepare(["test"], None)
dfo = plugin.process_dataframe(dfi)
output = dfo.to_records()

assert output[0]["x"] == 4.4
assert 2.48 < output[1]["y"] < 2.49
assert 4.24 < output[2]["z"] < 4.25
assert 5.43 < output[3]["v"] < 5.45


def test_python_dropna():
plugin = PythonPlugin()
plugin.set_parameter(
"code",
"""
a = None
n = None
if d >= 10: d = None
""",
)
plugin.set_parameter("dropna", True)

plugin.prepare(["test"], None)
dfo = plugin.process_dataframe(dfi)

assert "a" not in dfo.columns
assert "n" not in dfo.columns
assert "d" in dfo.columns

assert any(np.isnan(dfo["d"]))
assert not any(np.isnan(dfo["b"]))

0 comments on commit f22d75c

Please sign in to comment.