diff --git a/countess/plugins/hgvs_parser.py b/countess/plugins/hgvs_parser.py index 07bf2ab..a5b4b4f 100644 --- a/countess/plugins/hgvs_parser.py +++ b/countess/plugins/hgvs_parser.py @@ -22,11 +22,7 @@ class HgvsParserPlugin(PandasTransformDictToDictPlugin): multi = BooleanParam("Multiple rows", False) def process_dict(self, data: dict): - try: - value = data[str(self.column)] - except KeyError: - return {} - + value = data.get(str(self.column)) if type(value) is not str: return {} @@ -39,7 +35,7 @@ def process_dict(self, data: dict): guides += self.guides_str.value.split(";") if m := re.match(r"(?:([\w.]+):)?([ncg]\.)(.*)", value): - output["reference"] = m.group(1) or '' + output["reference"] = m.group(1) or "" output["prefix"] = m.group(2) value = m.group(3) @@ -55,18 +51,18 @@ def process_dict(self, data: dict): max_variations = int(self.max_var) variations = [v for v in variations if v not in guides] - if len(variations) > max_variations: - return {} output_vars: list[Optional[str]] = [None] * max_variations output_locs: list[Optional[str]] = [None] * max_variations - for n, v in enumerate(variations): - if self.split: - if m := re.match(r"([\d_]+)(.*)", v): - output_locs[n] = m.group(1) - output_vars[n] = m.group(2) - continue - output_vars[n] = v + + if len(variations) <= max_variations: + for n, v in enumerate(variations): + if self.split: + if m := re.match(r"([\d_]+)(.*)", v): + output_locs[n] = m.group(1) + output_vars[n] = m.group(2) + continue + output_vars[n] = v if self.multi: output["var"] = output_vars diff --git a/countess/utils/parallel.py b/countess/utils/parallel.py index 368abfa..35402b0 100644 --- a/countess/utils/parallel.py +++ b/countess/utils/parallel.py @@ -1,9 +1,10 @@ import gc import logging +import threading +import time from multiprocessing import Process, Queue, Value from os import cpu_count, getpid -from queue import Empty, Full -import time +from queue import Empty from typing import Callable, Iterable try: @@ -73,9 +74,9 @@ def multiprocess_map(function, values, *args, **kwargs): output_queue: Queue = Queue(maxsize=3) def __process(): # pragma: no cover - # this is run in a pool of `nproc` processes to handle resource-intensive - # processes which don't play nicely with the GIL. - # XXX Coverage doesn't seem to understand this so we exclude it from coverage. + # this is run in a pool of `nproc` processes to handle resource-intensive + # processes which don't play nicely with the GIL. + # XXX Coverage doesn't seem to understand this so we exclude it from coverage. for data_in in input_queue: for data_out in function(data_in, *args, **(kwargs or {})): diff --git a/tests/plugins/test_hgvs_parser.py b/tests/plugins/test_hgvs_parser.py index a71311f..4df398c 100644 --- a/tests/plugins/test_hgvs_parser.py +++ b/tests/plugins/test_hgvs_parser.py @@ -84,18 +84,16 @@ def test_hgvs_parser_bad(): df = plugin.process_dataframe(df2) - print(df) - assert all(np.isnan(df["var_1"])) - #assert np.isnan(df["var_1"].iloc[0]) - #assert np.isnan(df["var_1"].iloc[1]) - #assert np.isnan(df["var_1"].iloc[2]) + assert np.isnan(df["var_1"].iloc[0]) + assert np.isnan(df["var_1"].iloc[1]) + assert np.isnan(df["var_1"].iloc[2]) def test_hgvs_parser_very_bad(): plugin = HgvsParserPlugin() plugin.set_parameter("column", "hgvs") - dfi = pd.DataFrame([{'a': 1}]) + dfi = pd.DataFrame([{"a": 1}]) dfo = plugin.process_dataframe(dfi) assert all(dfo == dfi) diff --git a/tests/test_parameters.py b/tests/test_parameters.py index 3435ff6..5eea1ff 100644 --- a/tests/test_parameters.py +++ b/tests/test_parameters.py @@ -296,5 +296,5 @@ def test_arrayparam_minmax(): assert len(ap) == 2 # FIX minimum and maximum constraints! - #ap.del_subparam(ap[1]) - #assert len(ap) == 2 + # ap.del_subparam(ap[1]) + # assert len(ap) == 2 diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 13ccb58..fccaf44 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -4,13 +4,8 @@ import pandas as pd import pytest -<<<<<<< HEAD -import countess -from countess.core.plugins import BasePlugin, FileInputPlugin, PandasProductPlugin, get_plugin_classes -======= from countess.core.parameters import ColumnChoiceParam, StringParam from countess.core.plugins import ( - BasePlugin, FileInputPlugin, PandasConcatProcessPlugin, PandasProductPlugin, @@ -19,7 +14,6 @@ PandasTransformSingleToSinglePlugin, get_plugin_classes, ) ->>>>>>> bf58ca03bb7b84f59ec878258834878f375eb369 empty_entry_points_dict = {"countess_plugins": []} @@ -122,7 +116,7 @@ class FIP(FileInputPlugin): def num_files(self): return 3 - def load_file(self, file_number, row_limit): + def load_file(self, file_number, row_limit=None): if row_limit is None: row_limit = 1000000 return [f"hello{file_number}"] * row_limit