more test coverage ...

CountESS-Project · Nov 10, 2023 · 39b35ae · 39b35ae
1 parent bdd5d25
commit 39b35ae
Show file tree

Hide file tree

Showing 8 changed files with 157 additions and 5 deletions.
diff --git a/countess/plugins/hgvs_parser.py b/countess/plugins/hgvs_parser.py
@@ -88,6 +88,7 @@ def series_to_dataframe(self, series: pd.Series) -> pd.DataFrame:
  dataframe = super().series_to_dataframe(series)
 
  if self.parameters["multi"].value:
+
  if self.parameters["split"].value:
  dataframe = dataframe.explode(["var", "loc"])
  else:

diff --git a/docs/included-plugins/index.md b/docs/included-plugins/index.md
@@ -217,3 +217,22 @@ The reference sequence can either be provided directly as a configuration parame
 
 *See also: [countess-minimap2 plugin](https://github.com/CountESS-Project/countess-minimap2), a variant caller which uses 'minimap2' to find sequences within a genome.*
 
+#### Parameters
+
+Input Column
+: the input column with the variant sequence
+
+Reference
+: (optional) select column which contains the reference sequence ...
+
+Sequence
+: (optional) ... or supply a reference sequence as a value
+
+Output Column
+: Column name for HGVS string
+
+Max Mutations
+: Maximum number of mutations, if no variant with this number or less mutations is found then return a null value for the output
+
+Drop
+: Drop rows which would have null values for output
diff --git a/script/run-tests b/script/run-tests
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-coverage run --source=countess -m pytest --doctest-modules countess/ tests/
+xvfb-run coverage run --source=countess -m pytest --doctest-modules countess/ tests/
 
 coverage report --skip-empty --sort=-cover
 

diff --git a/tests/input1.tsv b/tests/input1.tsv
@@ -0,0 +1,5 @@
+thing count
+foo 1212
+bar 232
+baz 565
+qux 999
diff --git a/tests/input1.txt b/tests/input1.txt
@@ -0,0 +1,5 @@
+thing count
+foo 1212
+bar 232
+baz 565
+qux 999
diff --git a/tests/input2.csv b/tests/input2.csv
@@ -0,0 +1,4 @@
+x,y
+1,"this line has a comma, and a double "" quote"
+2,"this line has a comma, and an escaped \" quote"
+3,this line has a comment # don't read this
diff --git a/tests/plugins/test_csv.py b/tests/plugins/test_csv.py
@@ -1,6 +1,3 @@
-import pandas as pd
-import pytest
-
 from countess.core.logger import MultiprocessLogger
 from countess.plugins.csv import LoadCsvPlugin
 
@@ -9,10 +6,53 @@
 
 def test_load_csv():
  plugin = LoadCsvPlugin()
-
  plugin.set_parameter("files.0.filename", "tests/input1.csv")
+ output_df = next(plugin.load_file(0, logger))
+ assert list(output_df.columns) == ["thing", "count"]
+ assert len(output_df) == 4
 
+def test_load_tsv():
+ plugin = LoadCsvPlugin()
+ plugin.set_parameter("files.0.filename", "tests/input1.tsv")
+ plugin.set_parameter("delimiter", "TAB")
  output_df = next(plugin.load_file(0, logger))
+ assert list(output_df.columns) == ["thing", "count"]
+ assert len(output_df) == 4
 
+def test_load_txt():
+ plugin = LoadCsvPlugin()
+ plugin.set_parameter("files.0.filename", "tests/input1.txt")
+ plugin.set_parameter("delimiter", "WHITESPACE")
+ output_df = next(plugin.load_file(0, logger))
  assert list(output_df.columns) == ["thing", "count"]
  assert len(output_df) == 4
+
+def test_load_quoting_double():
+ plugin = LoadCsvPlugin()
+ plugin.set_parameter("files.0.filename", "tests/input2.csv")
+ plugin.set_parameter("quoting", "Double-Quote")
+ output_df = next(plugin.load_file(0, logger))
+ assert output_df['y'].iloc[0] == 'this line has a comma, and a double " quote'
+
+def test_load_quoting_escaped():
+ plugin = LoadCsvPlugin()
+ plugin.set_parameter("files.0.filename", "tests/input2.csv")
+ plugin.set_parameter("quoting", "Quote with Escape")
+ output_df = next(plugin.load_file(0, logger))
+ assert output_df['y'].iloc[1] == 'this line has a comma, and an escaped " quote'
+
+def test_load_comment():
+ plugin = LoadCsvPlugin()
+ plugin.set_parameter("files.0.filename", "tests/input2.csv")
+ plugin.set_parameter("quoting", "Double-Quote")
+ plugin.set_parameter("comment", "#")
+ output_df = next(plugin.load_file(0, logger))
+ assert output_df['y'].iloc[2] == "this line has a comment "
+
+def test_filename_column():
+ plugin = LoadCsvPlugin()
+ plugin.set_parameter("files.0.filename", "tests/input1.csv")
+ plugin.set_parameter("filename_column", 'filename')
+ output_df = next(plugin.load_file(0, logger))
+ assert 'filename' in output_df.columns
+ assert output_df['filename'].iloc[1] == "./tests/input1.csv"
diff --git a/tests/plugins/test_hgvs_parser.py b/tests/plugins/test_hgvs_parser.py
@@ -0,0 +1,78 @@
+
+import pandas as pd
+
+from countess.plugins.hgvs_parser import HgvsParserPlugin
+from countess.core.logger import MultiprocessLogger
+
+logger = MultiprocessLogger()
+
+df1 = pd.DataFrame([
+ {'hgvs': 'NC_000017.11:g.[43124022G>C;43124175C>T;43124111A>G]',
+ 'guides': '43124022G>C;43124111A>G' }
+])
+
+def test_hgvs_parser():
+
+ plugin = HgvsParserPlugin()
+ plugin.set_parameter('column', 'hgvs')
+ plugin.set_parameter('guides_col', 'guides')
+
+ df = plugin.process_dataframe(df1, logger)
+
+ assert df['var_1'].iloc[0] == '43124175C>T'
+ assert df['guide_1'].iloc[0] == True
+ assert df['guide_2'].iloc[0] == True
+
+def test_hgvs_parser_guides_str():
+
+ plugin = HgvsParserPlugin()
+ plugin.set_parameter('column', 'hgvs')
+ plugin.set_parameter('guides_str', '43124022G>C;43124111A>G')
+
+ df = plugin.process_dataframe(df1, logger)
+
+ assert df['var_1'].iloc[0] == '43124175C>T'
+ assert df['guide_1'].iloc[0] == True
+ assert df['guide_2'].iloc[0] == True
+
+def test_hgvs_parser_split():
+ plugin = HgvsParserPlugin()
+ plugin.set_parameter('column', 'hgvs')
+ plugin.set_parameter('guides_col', 'guides')
+ plugin.set_parameter('split', True)
+
+ df = plugin.process_dataframe(df1, logger)
+
+ assert df['loc_1'].iloc[0] == '43124175'
+ assert df['var_1'].iloc[0] == 'C>T'
+ assert df['guide_1'].iloc[0] == True
+ assert df['guide_2'].iloc[0] == True
+
+def test_hgvs_parser_multi():
+
+ plugin = HgvsParserPlugin()
+ plugin.set_parameter('column', 'hgvs')
+ plugin.set_parameter('guides_str', '43124022G>C')
+ plugin.set_parameter('multi', True)
+ plugin.set_parameter('max_var', 2)
+
+ df = plugin.process_dataframe(df1, logger)
+
+ assert df['var'].iloc[0] == '43124175C>T'
+ assert df['var'].iloc[1] == '43124111A>G'
+
+def test_hgvs_parser_split_and_multi():
+
+ plugin = HgvsParserPlugin()
+ plugin.set_parameter('column', 'hgvs')
+ plugin.set_parameter('guides_str', '43124022G>C')
+ plugin.set_parameter('split', True)
+ plugin.set_parameter('multi', True)
+ plugin.set_parameter('max_var', 2)
+
+ df = plugin.process_dataframe(df1, logger)
+
+ assert df['var'].iloc[0] == 'C>T'
+ assert df['var'].iloc[1] == 'A>G'
+ assert df['loc'].iloc[0] == '43124175'
+ assert df['loc'].iloc[1] == '43124111'