Skip to content

Commit

Permalink
let hgvs parser make multiple rows of output
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Oct 17, 2023
1 parent e5be6fc commit 5c41564
Showing 1 changed file with 33 additions and 6 deletions.
39 changes: 33 additions & 6 deletions countess/plugins/hgvs_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import re

import pandas as pd

from countess import VERSION
from countess.core.logger import Logger
from countess.core.parameters import BooleanParam, ColumnChoiceParam, ColumnOrNoneChoiceParam, IntegerParam, StringParam
Expand All @@ -18,6 +20,7 @@ class HgvsParserPlugin(PandasTransformDictToDictPlugin):
"guides_str": StringParam("Guide(s)"),
"max_var": IntegerParam("Maximum Variations", 1),
"split": BooleanParam("Split Output", False),
"multi": BooleanParam("Multiple rows", False),
}

def process_dict(self, data: dict, logger: Logger):
Expand Down Expand Up @@ -58,12 +61,36 @@ def process_dict(self, data: dict, logger: Logger):
if len(variations) > max_variations:
return None

for n, v in enumerate(variations, 1):
if self.parameters["multi"].value:
output["var"] = []
if self.parameters["split"].value:
if m := re.match(r"([\d_]+)(.*)", v):
output[f"loc_{n}"] = m.group(1)
output[f"var_{n}"] = m.group(2)
continue
output[f"var_{n}"] = v
output["loc"] = []
for v in variations:
if self.parameters["split"].value:
if m := re.match(r"([\d_]+)(.*)", v):
output["loc"].append(m.group(1))
output["var"].append(m.group(2))
continue
output["var"].append(v)
else:
for n, v in enumerate(variations, 1):
if self.parameters["split"].value:
if m := re.match(r"([\d_]+)(.*)", v):
output[f"loc_{n}"] = m.group(1)
output[f"var_{n}"] = m.group(2)
continue
output[f"var_{n}"] = v

return output

def series_to_dataframe(self, series: pd.Series) -> pd.DataFrame:

dataframe = super().series_to_dataframe(series)

if self.parameters["multi"].value:
if self.parameters["split"].value:
dataframe = dataframe.explode(['var', 'loc'])
else:
dataframe = dataframe.explode(['var'])

return dataframe

0 comments on commit 5c41564

Please sign in to comment.