Skip to content

Commit

Permalink
more test coverage ...
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Nov 10, 2023
1 parent bdd5d25 commit 39b35ae
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 5 deletions.
1 change: 1 addition & 0 deletions countess/plugins/hgvs_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def series_to_dataframe(self, series: pd.Series) -> pd.DataFrame:
dataframe = super().series_to_dataframe(series)

if self.parameters["multi"].value:

if self.parameters["split"].value:
dataframe = dataframe.explode(["var", "loc"])
else:
Expand Down
19 changes: 19 additions & 0 deletions docs/included-plugins/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,22 @@ The reference sequence can either be provided directly as a configuration parame

*See also: [countess-minimap2 plugin](https://github.com/CountESS-Project/countess-minimap2), a variant caller which uses 'minimap2' to find sequences within a genome.*

#### Parameters

Input Column
: the input column with the variant sequence

Reference
: (optional) select column which contains the reference sequence ...

Sequence
: (optional) ... or supply a reference sequence as a value

Output Column
: Column name for HGVS string

Max Mutations
: Maximum number of mutations, if no variant with this number or less mutations is found then return a null value for the output

Drop
: Drop rows which would have null values for output
2 changes: 1 addition & 1 deletion script/run-tests
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

coverage run --source=countess -m pytest --doctest-modules countess/ tests/
xvfb-run coverage run --source=countess -m pytest --doctest-modules countess/ tests/

coverage report --skip-empty --sort=-cover

Expand Down
5 changes: 5 additions & 0 deletions tests/input1.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
thing count
foo 1212
bar 232
baz 565
qux 999
5 changes: 5 additions & 0 deletions tests/input1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
thing count
foo 1212
bar 232
baz 565
qux 999
4 changes: 4 additions & 0 deletions tests/input2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
x,y
1,"this line has a comma, and a double "" quote"
2,"this line has a comma, and an escaped \" quote"
3,this line has a comment # don't read this
48 changes: 44 additions & 4 deletions tests/plugins/test_csv.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import pandas as pd
import pytest

from countess.core.logger import MultiprocessLogger
from countess.plugins.csv import LoadCsvPlugin

Expand All @@ -9,10 +6,53 @@

def test_load_csv():
plugin = LoadCsvPlugin()

plugin.set_parameter("files.0.filename", "tests/input1.csv")
output_df = next(plugin.load_file(0, logger))
assert list(output_df.columns) == ["thing", "count"]
assert len(output_df) == 4

def test_load_tsv():
plugin = LoadCsvPlugin()
plugin.set_parameter("files.0.filename", "tests/input1.tsv")
plugin.set_parameter("delimiter", "TAB")
output_df = next(plugin.load_file(0, logger))
assert list(output_df.columns) == ["thing", "count"]
assert len(output_df) == 4

def test_load_txt():
plugin = LoadCsvPlugin()
plugin.set_parameter("files.0.filename", "tests/input1.txt")
plugin.set_parameter("delimiter", "WHITESPACE")
output_df = next(plugin.load_file(0, logger))
assert list(output_df.columns) == ["thing", "count"]
assert len(output_df) == 4

def test_load_quoting_double():
plugin = LoadCsvPlugin()
plugin.set_parameter("files.0.filename", "tests/input2.csv")
plugin.set_parameter("quoting", "Double-Quote")
output_df = next(plugin.load_file(0, logger))
assert output_df['y'].iloc[0] == 'this line has a comma, and a double " quote'

def test_load_quoting_escaped():
plugin = LoadCsvPlugin()
plugin.set_parameter("files.0.filename", "tests/input2.csv")
plugin.set_parameter("quoting", "Quote with Escape")
output_df = next(plugin.load_file(0, logger))
assert output_df['y'].iloc[1] == 'this line has a comma, and an escaped " quote'

def test_load_comment():
plugin = LoadCsvPlugin()
plugin.set_parameter("files.0.filename", "tests/input2.csv")
plugin.set_parameter("quoting", "Double-Quote")
plugin.set_parameter("comment", "#")
output_df = next(plugin.load_file(0, logger))
assert output_df['y'].iloc[2] == "this line has a comment "

def test_filename_column():
plugin = LoadCsvPlugin()
plugin.set_parameter("files.0.filename", "tests/input1.csv")
plugin.set_parameter("filename_column", 'filename')
output_df = next(plugin.load_file(0, logger))
assert 'filename' in output_df.columns
assert output_df['filename'].iloc[1] == "./tests/input1.csv"
78 changes: 78 additions & 0 deletions tests/plugins/test_hgvs_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@

import pandas as pd

from countess.plugins.hgvs_parser import HgvsParserPlugin
from countess.core.logger import MultiprocessLogger

logger = MultiprocessLogger()

df1 = pd.DataFrame([
{'hgvs': 'NC_000017.11:g.[43124022G>C;43124175C>T;43124111A>G]',
'guides': '43124022G>C;43124111A>G' }
])

def test_hgvs_parser():

plugin = HgvsParserPlugin()
plugin.set_parameter('column', 'hgvs')
plugin.set_parameter('guides_col', 'guides')

df = plugin.process_dataframe(df1, logger)

assert df['var_1'].iloc[0] == '43124175C>T'
assert df['guide_1'].iloc[0] == True
assert df['guide_2'].iloc[0] == True

def test_hgvs_parser_guides_str():

plugin = HgvsParserPlugin()
plugin.set_parameter('column', 'hgvs')
plugin.set_parameter('guides_str', '43124022G>C;43124111A>G')

df = plugin.process_dataframe(df1, logger)

assert df['var_1'].iloc[0] == '43124175C>T'
assert df['guide_1'].iloc[0] == True
assert df['guide_2'].iloc[0] == True

def test_hgvs_parser_split():
plugin = HgvsParserPlugin()
plugin.set_parameter('column', 'hgvs')
plugin.set_parameter('guides_col', 'guides')
plugin.set_parameter('split', True)

df = plugin.process_dataframe(df1, logger)

assert df['loc_1'].iloc[0] == '43124175'
assert df['var_1'].iloc[0] == 'C>T'
assert df['guide_1'].iloc[0] == True
assert df['guide_2'].iloc[0] == True

def test_hgvs_parser_multi():

plugin = HgvsParserPlugin()
plugin.set_parameter('column', 'hgvs')
plugin.set_parameter('guides_str', '43124022G>C')
plugin.set_parameter('multi', True)
plugin.set_parameter('max_var', 2)

df = plugin.process_dataframe(df1, logger)

assert df['var'].iloc[0] == '43124175C>T'
assert df['var'].iloc[1] == '43124111A>G'

def test_hgvs_parser_split_and_multi():

plugin = HgvsParserPlugin()
plugin.set_parameter('column', 'hgvs')
plugin.set_parameter('guides_str', '43124022G>C')
plugin.set_parameter('split', True)
plugin.set_parameter('multi', True)
plugin.set_parameter('max_var', 2)

df = plugin.process_dataframe(df1, logger)

assert df['var'].iloc[0] == 'C>T'
assert df['var'].iloc[1] == 'A>G'
assert df['loc'].iloc[0] == '43124175'
assert df['loc'].iloc[1] == '43124111'

0 comments on commit 39b35ae

Please sign in to comment.