Skip to content

Commit

Permalink
fix for hgvs parser, csv indexes
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Oct 23, 2023
1 parent 0b22bb6 commit 759096c
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 17 deletions.
5 changes: 4 additions & 1 deletion countess/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,10 @@ def execute(self, logger: Logger, row_limit: Optional[int] = None):
for pn in self.parent_nodes:
for data_in in pn.result:
logger.progress(self.name, None)
self.result += list(self.plugin.process(data_in, pn.name, logger))
try:
self.result += list(self.plugin.process(data_in, pn.name, logger))
except Exception as exc: # pylint: disable=broad-exception-caught
logger.exception(exc)
logger.progress(self.name, 100)
self.result += list(self.plugin.finalize(logger))

Expand Down
6 changes: 1 addition & 5 deletions countess/plugins/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,10 +177,6 @@ def prepare(self, sources: list[str], row_limit: Optional[int] = None):
def process(self, data: pd.DataFrame, source: str, logger: Logger):
# reset indexes so we can treat all columns equally.
# if there's just a nameless index then we don't care about it, drop it.

# XXX sometimes this doesn't seem to work?
# I've set "index=True" below to emit the indexes

drop_index = data.index.name is None and data.index.names[0] is None
dataframe = flatten_columns(data.reset_index(drop=drop_index))

Expand All @@ -204,7 +200,7 @@ def process(self, data: pd.DataFrame, source: str, logger: Logger):
self.filehandle,
header=emit_header,
columns=self.csv_columns,
index=drop_index,
index=False,
sep=self.SEPARATORS[self.parameters["delimiter"].value],
quoting=bool(self.QUOTING[self.parameters["quoting"].value]),
) # type: ignore [call-overload]
Expand Down
14 changes: 7 additions & 7 deletions countess/plugins/hgvs_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
from typing import Optional

import pandas as pd

Expand Down Expand Up @@ -61,16 +62,15 @@ def process_dict(self, data: dict, logger: Logger):
if len(variations) > max_variations:
return None

output_vars = []
output_locs = []
for v in variations:
output_vars : list[Optional[str]] = [None] * max_variations
output_locs : list[Optional[str]] = [None] * max_variations
for n, v in enumerate(variations):
if self.parameters["split"].value:
if m := re.match(r"([\d_]+)(.*)", v):
output_locs.append(m.group(1))
output_vars.append(m.group(2))
output_locs[n] = m.group(1)
output_vars[n] = m.group(2)
continue
output_locs.append(None)
output_vars.append(v)
output_vars[n] = v

if self.parameters["multi"].value:
output["var"] = output_vars
Expand Down
8 changes: 4 additions & 4 deletions tests/output.csv.expected
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
foo,bar,baz,qux,number,zz
10,2,1,4,232,0.08620689655172414
11,3,2,1,565,0.0584070796460177
12,9,8,7,999,0.10810810810810811
thing,foo,bar,baz,qux,number,zz
bar,10,2,1,4,232,0.08620689655172414
baz,11,3,2,1,565,0.0584070796460177
qux,12,9,8,7,999,0.10810810810810811

0 comments on commit 759096c

Please sign in to comment.