diff --git a/countess/plugins/python.py b/countess/plugins/python.py index 2a2b93d..2eec940 100644 --- a/countess/plugins/python.py +++ b/countess/plugins/python.py @@ -70,6 +70,7 @@ class PythonPlugin(PandasTransformDictToDictPlugin): """ + " ".join( AVAILABLE_FUNCTIONS ) + link = "https://countess-project.github.io/CountESS/included-plugins/#python-code" version = VERSION diff --git a/docs/included-plugins/index.md b/docs/included-plugins/index.md index e31e9c1..218c9a3 100644 --- a/docs/included-plugins/index.md +++ b/docs/included-plugins/index.md @@ -201,6 +201,28 @@ when pivoted with index on Variant, pivot on Replicate and expanding Count becom | `2` | `3` | `9` | | `3` | `0` | `6` | +### Python Code + +This lets you embed simple Python expressions into your data processing. + +Add one or more Python expressions to the "Python Code". +Each row is processed separately, with column values appearing as local variables. + +Functions available: +``` +abs acos acosh all any ascii asin asinh atan atan2 atanh +bin bool cbrt ceil chr comb compile copysign cos cosh +degrees dist erf erfc escape exp exp2 expm1 +fabs factorial findall finditer float floor fmod +format frexp frozenset fsum fullmatch gamma gcd +hash hex hypot inf int isclose isfinite isinf isinf +isnan isnan isqrt lcm ldexp len lgamma log log10 log1p log2 +match max mean median min modf nan nextafter ord +perm pow prod purge radians range remainder round +search sin sinh sorted split sqrt std str sub subn +sum sumprod tan tanh template trunc type ulp var zip +``` + ## Bioinformatics ### FASTQ Load diff --git a/tests/plugins/test_python.py b/tests/plugins/test_python.py new file mode 100644 index 0000000..5701f10 --- /dev/null +++ b/tests/plugins/test_python.py @@ -0,0 +1,49 @@ +import pandas as pd +import numpy as np +import pytest + +from countess.core.logger import ConsoleLogger +from countess.plugins.python import PythonPlugin + +logger = ConsoleLogger() + + +dfi = pd.DataFrame([[1,1,2,7,11],[2,2,3,8,1],[3,3,4,9,3],[4,4,5,10,4],[5,5,6,12,7]], columns=['a','b','c','d','e']) + +def test_python_builtins(): + plugin = PythonPlugin() + plugin.set_parameter("code", """ +x = mean(a,b,c,d,e) +y = std(a,b,c,d,e) +z = sqrt(pow(a,2) + pow(b,2)) +v = var(a,b,c,d,e) + """) + + plugin.prepare(["test"], None) + dfo = plugin.process_dataframe(dfi, logger) + output = dfo.to_records() + + assert output[0]["x"] == 4.4 + assert 2.48 < output[1]["y"] < 2.49 + assert 4.24 < output[2]["z"] < 4.25 + assert 5.43 < output[3]["v"] < 5.45 + + +def test_python_dropna(): + plugin = PythonPlugin() + plugin.set_parameter("code", """ +a = None +n = None +if d >= 10: d = None + """) + plugin.set_parameter("dropna", True) + + plugin.prepare(["test"], None) + dfo = plugin.process_dataframe(dfi, logger) + + assert "a" not in dfo.columns + assert "n" not in dfo.columns + assert "d" in dfo.columns + + assert any(np.isnan(dfo['d'])) + assert not any(np.isnan(dfo['b']))