Skip to content

Commit

Permalink
Fix colliding sequences in the index not being added to the index at all
Browse files Browse the repository at this point in the history
Closes #734
  • Loading branch information
marcelm committed Oct 4, 2023
1 parent ed7ec8d commit 511a8d2
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 6 deletions.
10 changes: 8 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,19 @@ development version
* Added a ``--max-average-error-rate``/``--max-aer`` option to add a filter
that checks if the number of expected errors divided by read length is above a
certain threshold. The expected errors are calculated the same as in
``--max-expected-errors`` and dividing by read length helps for reads that
``--max-expected-errors``, and dividing by read length helps for reads that
have varying lengths.
* :issue:`696`: Added a histogram of the lengths of removed poly-A tails to
the report.
* :issue:`696`: For paired-end data, ``--poly-a`` was changed to trim poly-T
"heads" on R2.
"heads" on R2 (this is still experimental as it is unclear whether that is
the desired behavior. Please give feedback!)
* A poly-A tail is only removed if it is at least three nucleotides long.
* :issue:`734`: Fixed misassignments during demultiplexing that would sometimes
happen when there are collisions between adapter sequences
(when the warning "sequence ... cannot be assigned uniquely" was printed).
Previously, sequences could incorrectly be assigned to an adapter that is not
actually the best match.

v4.4 (2023-04-28)
-----------------
Expand Down
6 changes: 2 additions & 4 deletions src/cutadapt/adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1309,8 +1309,7 @@ def _make_index(self) -> Tuple[List[int], "AdapterIndex"]:
if other_matches == matches and not has_warned:
self._warn_similar(adapter, other_adapter, k, s, matches)
has_warned = True
else:
index[s] = (adapter, errors, matches)
index[s] = (adapter, errors, matches)
lengths.add(len(s))
else:
n = len(sequence)
Expand All @@ -1326,8 +1325,7 @@ def _make_index(self) -> Tuple[List[int], "AdapterIndex"]:
adapter, other_adapter, k, s, matches
)
has_warned = True
else:
index[s] = (adapter, errors, matches)
index[s] = (adapter, errors, matches)
lengths.add(n)
elapsed = time.time() - start_time
logger.info(
Expand Down
12 changes: 12 additions & 0 deletions tests/test_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,18 @@ def test_indexed_prefix_adapters_with_n_wildcard():
assert result.score == 6


@pytest.mark.parametrize("sequence", ["ANGCATCATAAAAAAAAAA", "AAGCATCATAAAAAAAAAA"])
def test_indexed_prefix_adapters_with_n_collision(sequence):
a1 = PrefixAdapter("AAGCGCCAT", max_errors=2, indels=False)
a2 = PrefixAdapter("AGGCATCAT", max_errors=2, indels=False)
ipa = IndexedPrefixAdapters([a1, a2])

result = ipa.match_to(sequence)

assert isinstance(result, RemoveBeforeMatch)
assert result.adapter is a2


def test_inosine_wildcard():
adapter = BackAdapter("CTGIAIT", max_errors=0, min_overlap=3)
match = adapter.match_to("GGCTGAATTGGG")
Expand Down

0 comments on commit 511a8d2

Please sign in to comment.