diff --git a/countess/plugins/mutagenize.py b/countess/plugins/mutagenize.py index 12b9421..6f689b0 100644 --- a/countess/plugins/mutagenize.py +++ b/countess/plugins/mutagenize.py @@ -24,7 +24,7 @@ def mutagenize( yield sequence[0:n] + b2 + sequence[n:], n + 1, None, b2 if delete: yield sequence[0:n] + sequence[n + 1 :], n + 1, b1, None - if del3: + if del3 and n+3 <= len(sequence): yield sequence[0:n] + sequence[n + 3 :], n + 1, sequence[n : n + 3], None if ins3: for ins in product("ACGT", "ACGT", "ACGT"): diff --git a/countess/plugins/variant.py b/countess/plugins/variant.py index d352205..5a6d09d 100644 --- a/countess/plugins/variant.py +++ b/countess/plugins/variant.py @@ -21,7 +21,6 @@ class VariantPlugin(PandasTransformDictToSinglePlugin): "column": ColumnChoiceParam("Input Column", "sequence"), "reference": ColumnOrNoneChoiceParam("Reference Column"), "sequence": StringParam("*OR* Reference Sequence"), - "auto": BooleanParam("Automatic Reference Sequence?", False), "output": StringParam("Output Column", "variant"), "max_mutations": IntegerParam("Max Mutations", 10), "drop": BooleanParam("Drop unidentified variants", False), diff --git a/tests/plugins/test_csv.py b/tests/plugins/test_csv.py new file mode 100644 index 0000000..d6d674b --- /dev/null +++ b/tests/plugins/test_csv.py @@ -0,0 +1,20 @@ +import pytest + +import pandas as pd + +from countess.plugins.csv import LoadCsvPlugin +from countess.core.logger import MultiprocessLogger + +logger = MultiprocessLogger() + +def test_load_csv(): + + plugin = LoadCsvPlugin() + + plugin.set_parameter('files.0.filename', 'tests/input1.csv') + + + output_df = next(plugin.load_file(0, logger)) + + assert list(output_df.columns) == ['thing', 'count'] + assert len(output_df) == 4 diff --git a/tests/plugins/test_mutagenize.py b/tests/plugins/test_mutagenize.py new file mode 100644 index 0000000..e3acc7d --- /dev/null +++ b/tests/plugins/test_mutagenize.py @@ -0,0 +1,122 @@ +import pytest + +import pandas as pd + +from countess.plugins.mutagenize import MutagenizePlugin +from countess.core.logger import MultiprocessLogger + +logger = MultiprocessLogger() + +def test_mutagenize_mutate(): + + plugin = MutagenizePlugin() + plugin.set_parameter('sequence', 'GATTACA') + plugin.set_parameter('mutate', True) + + output_df = next(plugin.load_file(0, logger)) + output = list(output_df['sequence']) + + assert len(output) == 21 + + +def test_mutagenize_insert(): + + plugin = MutagenizePlugin() + plugin.set_parameter('sequence', 'GATTACA') + plugin.set_parameter('mutate', False) + plugin.set_parameter('insert', True) + + output_df = next(plugin.load_file(0, logger)) + output = list(output_df['sequence']) + + assert len(output) == 32 + + +def test_mutagenize_insert3(): + + plugin = MutagenizePlugin() + plugin.set_parameter('sequence', 'GATTACA') + plugin.set_parameter('mutate', False) + plugin.set_parameter('ins3', True) + + output_df = next(plugin.load_file(0, logger)) + output = list(output_df['sequence']) + + assert len(output) == 512 + +def test_mutagenize_insert_dedup(): + + plugin = MutagenizePlugin() + plugin.set_parameter('sequence', 'GATTACA') + plugin.set_parameter('mutate', False) + plugin.set_parameter('insert', True) + plugin.set_parameter('remove', True) + + output_df = next(plugin.load_file(0, logger)) + + assert len(output_df) == 25 + + +def test_mutagenize_insert3_dedup(): + + plugin = MutagenizePlugin() + plugin.set_parameter('sequence', 'GATTACA') + plugin.set_parameter('mutate', False) + plugin.set_parameter('ins3', True) + plugin.set_parameter('remove', True) + + output_df = next(plugin.load_file(0, logger)) + + assert len(output_df) == 400 + +def test_mutagenize_delete(): + + plugin = MutagenizePlugin() + plugin.set_parameter('sequence', 'GATTACA') + plugin.set_parameter('mutate', False) + plugin.set_parameter('delete', True) + + output_df = next(plugin.load_file(0, logger)) + output = list(output_df['sequence']) + + assert len(output) == 7 + + +def test_mutagenize_del3(): + + plugin = MutagenizePlugin() + plugin.set_parameter('sequence', 'GATTACA') + plugin.set_parameter('mutate', False) + plugin.set_parameter('del3', True) + + output_df = next(plugin.load_file(0, logger)) + output = list(output_df['sequence']) + + assert len(output) == 5 + +def test_mutagenize_delete_dedup(): + + plugin = MutagenizePlugin() + plugin.set_parameter('sequence', 'GATTACA') + plugin.set_parameter('mutate', False) + plugin.set_parameter('delete', True) + plugin.set_parameter('remove', True) + + output_df = next(plugin.load_file(0, logger)) + + assert len(output_df) == 6 + + +def test_mutagenize_del3_dedup(): + + plugin = MutagenizePlugin() + plugin.set_parameter('sequence', 'GATTACA') + plugin.set_parameter('mutate', False) + plugin.set_parameter('del3', True) + plugin.set_parameter('remove', True) + + output_df = next(plugin.load_file(0, logger)) + + assert len(output_df) == 4 + + diff --git a/tests/plugins/test_variant.py b/tests/plugins/test_variant.py new file mode 100644 index 0000000..fe6b973 --- /dev/null +++ b/tests/plugins/test_variant.py @@ -0,0 +1,29 @@ +import pytest + +import pandas as pd + +from countess.plugins.variant import VariantPlugin +from countess.core.logger import MultiprocessLogger + +logger = MultiprocessLogger() + +def test_variant_ref_column(): + + input_df = pd.DataFrame([ + { 'ref': 'TACACACAG', 'seq': 'TACAGACAG' }, + { 'ref': 'ATGGTTGGTTC', 'seq': "ATGGTTGGTGGTTCG" } + ]) + + plugin = VariantPlugin() + plugin.set_parameter('column', 'seq') + plugin.set_parameter('reference', 'ref') + plugin.set_parameter('output', 'out') + + plugin.prepare(['test'], None) + + output_df = plugin.process_dataframe(input_df, logger) + + output = output_df.to_records() + + assert output[0]['out'] == 'g.5C>G' + assert output[1]['out'] == 'g.[7_9dup;11_12insG]'