Skip to content

Commit

Permalink
fixups
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Aug 30, 2024
1 parent 4e8fc23 commit a5bcf31
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 35 deletions.
26 changes: 11 additions & 15 deletions countess/plugins/hgvs_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,7 @@ class HgvsParserPlugin(PandasTransformDictToDictPlugin):
multi = BooleanParam("Multiple rows", False)

def process_dict(self, data: dict):
try:
value = data[str(self.column)]
except KeyError:
return {}

value = data.get(str(self.column))
if type(value) is not str:
return {}

Expand All @@ -39,7 +35,7 @@ def process_dict(self, data: dict):
guides += self.guides_str.value.split(";")

if m := re.match(r"(?:([\w.]+):)?([ncg]\.)(.*)", value):
output["reference"] = m.group(1) or ''
output["reference"] = m.group(1) or ""
output["prefix"] = m.group(2)
value = m.group(3)

Expand All @@ -55,18 +51,18 @@ def process_dict(self, data: dict):

max_variations = int(self.max_var)
variations = [v for v in variations if v not in guides]
if len(variations) > max_variations:
return {}

output_vars: list[Optional[str]] = [None] * max_variations
output_locs: list[Optional[str]] = [None] * max_variations
for n, v in enumerate(variations):
if self.split:
if m := re.match(r"([\d_]+)(.*)", v):
output_locs[n] = m.group(1)
output_vars[n] = m.group(2)
continue
output_vars[n] = v

if len(variations) <= max_variations:
for n, v in enumerate(variations):
if self.split:
if m := re.match(r"([\d_]+)(.*)", v):
output_locs[n] = m.group(1)
output_vars[n] = m.group(2)
continue
output_vars[n] = v

if self.multi:
output["var"] = output_vars
Expand Down
11 changes: 6 additions & 5 deletions countess/utils/parallel.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import gc
import logging
import threading
import time
from multiprocessing import Process, Queue, Value
from os import cpu_count, getpid
from queue import Empty, Full
import time
from queue import Empty
from typing import Callable, Iterable

try:
Expand Down Expand Up @@ -73,9 +74,9 @@ def multiprocess_map(function, values, *args, **kwargs):
output_queue: Queue = Queue(maxsize=3)

def __process(): # pragma: no cover
# this is run in a pool of `nproc` processes to handle resource-intensive
# processes which don't play nicely with the GIL.
# XXX Coverage doesn't seem to understand this so we exclude it from coverage.
# this is run in a pool of `nproc` processes to handle resource-intensive
# processes which don't play nicely with the GIL.
# XXX Coverage doesn't seem to understand this so we exclude it from coverage.

for data_in in input_queue:
for data_out in function(data_in, *args, **(kwargs or {})):
Expand Down
10 changes: 4 additions & 6 deletions tests/plugins/test_hgvs_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,16 @@ def test_hgvs_parser_bad():

df = plugin.process_dataframe(df2)

print(df)
assert all(np.isnan(df["var_1"]))
#assert np.isnan(df["var_1"].iloc[0])
#assert np.isnan(df["var_1"].iloc[1])
#assert np.isnan(df["var_1"].iloc[2])
assert np.isnan(df["var_1"].iloc[0])
assert np.isnan(df["var_1"].iloc[1])
assert np.isnan(df["var_1"].iloc[2])


def test_hgvs_parser_very_bad():
plugin = HgvsParserPlugin()
plugin.set_parameter("column", "hgvs")

dfi = pd.DataFrame([{'a': 1}])
dfi = pd.DataFrame([{"a": 1}])
dfo = plugin.process_dataframe(dfi)

assert all(dfo == dfi)
4 changes: 2 additions & 2 deletions tests/test_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,5 +296,5 @@ def test_arrayparam_minmax():
assert len(ap) == 2

# FIX minimum and maximum constraints!
#ap.del_subparam(ap[1])
#assert len(ap) == 2
# ap.del_subparam(ap[1])
# assert len(ap) == 2
8 changes: 1 addition & 7 deletions tests/test_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,8 @@
import pandas as pd
import pytest

<<<<<<< HEAD
import countess
from countess.core.plugins import BasePlugin, FileInputPlugin, PandasProductPlugin, get_plugin_classes
=======
from countess.core.parameters import ColumnChoiceParam, StringParam
from countess.core.plugins import (
BasePlugin,
FileInputPlugin,
PandasConcatProcessPlugin,
PandasProductPlugin,
Expand All @@ -19,7 +14,6 @@
PandasTransformSingleToSinglePlugin,
get_plugin_classes,
)
>>>>>>> bf58ca03bb7b84f59ec878258834878f375eb369

empty_entry_points_dict = {"countess_plugins": []}

Expand Down Expand Up @@ -122,7 +116,7 @@ class FIP(FileInputPlugin):
def num_files(self):
return 3

def load_file(self, file_number, row_limit):
def load_file(self, file_number, row_limit=None):
if row_limit is None:
row_limit = 1000000
return [f"hello{file_number}"] * row_limit
Expand Down

0 comments on commit a5bcf31

Please sign in to comment.