diff --git a/CHANGES.rst b/CHANGES.rst index 514c7700..4ef73761 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,13 +8,19 @@ development version * Added a ``--max-average-error-rate``/``--max-aer`` option to add a filter that checks if the number of expected errors divided by read length is above a certain threshold. The expected errors are calculated the same as in - ``--max-expected-errors`` and dividing by read length helps for reads that + ``--max-expected-errors``, and dividing by read length helps for reads that have varying lengths. * :issue:`696`: Added a histogram of the lengths of removed poly-A tails to the report. * :issue:`696`: For paired-end data, ``--poly-a`` was changed to trim poly-T - "heads" on R2. + "heads" on R2 (this is still experimental as it is unclear whether that is + the desired behavior. Please give feedback!) * A poly-A tail is only removed if it is at least three nucleotides long. +* :issue:`734`: Fixed misassignments during demultiplexing that would sometimes + happen when there are collisions between adapter sequences + (when the warning "sequence ... cannot be assigned uniquely" was printed). + Previously, sequences could incorrectly be assigned to an adapter that is not + actually the best match. v4.4 (2023-04-28) ----------------- diff --git a/src/cutadapt/adapters.py b/src/cutadapt/adapters.py index 82a57c61..55b4b824 100644 --- a/src/cutadapt/adapters.py +++ b/src/cutadapt/adapters.py @@ -1309,8 +1309,7 @@ def _make_index(self) -> Tuple[List[int], "AdapterIndex"]: if other_matches == matches and not has_warned: self._warn_similar(adapter, other_adapter, k, s, matches) has_warned = True - else: - index[s] = (adapter, errors, matches) + index[s] = (adapter, errors, matches) lengths.add(len(s)) else: n = len(sequence) @@ -1326,8 +1325,7 @@ def _make_index(self) -> Tuple[List[int], "AdapterIndex"]: adapter, other_adapter, k, s, matches ) has_warned = True - else: - index[s] = (adapter, errors, matches) + index[s] = (adapter, errors, matches) lengths.add(n) elapsed = time.time() - start_time logger.info( diff --git a/tests/test_adapters.py b/tests/test_adapters.py index 90b3076a..e9252e83 100644 --- a/tests/test_adapters.py +++ b/tests/test_adapters.py @@ -559,6 +559,18 @@ def test_indexed_prefix_adapters_with_n_wildcard(): assert result.score == 6 +@pytest.mark.parametrize("sequence", ["ANGCATCATAAAAAAAAAA", "AAGCATCATAAAAAAAAAA"]) +def test_indexed_prefix_adapters_with_n_collision(sequence): + a1 = PrefixAdapter("AAGCGCCAT", max_errors=2, indels=False) + a2 = PrefixAdapter("AGGCATCAT", max_errors=2, indels=False) + ipa = IndexedPrefixAdapters([a1, a2]) + + result = ipa.match_to(sequence) + + assert isinstance(result, RemoveBeforeMatch) + assert result.adapter is a2 + + def test_inosine_wildcard(): adapter = BackAdapter("CTGIAIT", max_errors=0, min_overlap=3) match = adapter.match_to("GGCTGAATTGGG")