Skip to content

Commit

Permalink
Merge pull request #727 from rhpvorderman/usedoubles
Browse files Browse the repository at this point in the history
Make expected errors use doubles again for increased precision
  • Loading branch information
marcelm authored Sep 29, 2023
2 parents ddbe8fd + 6829744 commit 2cd48dd
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 34 deletions.
84 changes: 52 additions & 32 deletions src/cutadapt/expected_errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "emmintrin.h"
#endif

static const float SCORE_TO_ERROR_RATE[94] = {
static const double SCORE_TO_ERROR_RATE[94] = {
1.0L, // 0
0.7943282347242815L, // 1
0.6309573444801932L, // 2
Expand Down Expand Up @@ -100,15 +100,15 @@ static const float SCORE_TO_ERROR_RATE[94] = {
5.011872336272714E-10L, // 93
};

static inline float
static inline double
expected_errors_from_phreds(const uint8_t *phreds, size_t phreds_length, uint8_t base) {
const uint8_t *end_ptr = phreds + phreds_length;
const uint8_t *cursor = phreds;
float expected_errors = 0.0;
double expected_errors = 0.0;
uint8_t max_phred = 126 - base;
#ifdef __SSE2__
const uint8_t *vec_end_ptr = end_ptr - sizeof(__m128i);
__m128 accumulator = _mm_set1_ps(0.0);
__m128d accumulator = _mm_set1_pd(0.0);
while (cursor < vec_end_ptr) {
__m128i phred_array = _mm_loadu_si128((__m128i *)cursor);
__m128i illegal_phreds = _mm_cmpgt_epi8(phred_array, _mm_set1_epi8(126));
Expand All @@ -117,39 +117,59 @@ expected_errors_from_phreds(const uint8_t *phreds, size_t phreds_length, uint8_t
if (_mm_movemask_epi8(illegal_phreds)) {
return -1.0;
}
__m128 loader = _mm_set_ps(
SCORE_TO_ERROR_RATE[cursor[0] - base],
SCORE_TO_ERROR_RATE[cursor[1] - base],
SCORE_TO_ERROR_RATE[cursor[2] - base],
SCORE_TO_ERROR_RATE[cursor[3] - base]
/* By explicitly setting multiple accumulators, the processor
can perform out of order execution for increased speed
See also: https://stackoverflow.com/a/36591776/16437839
*/
__m128d accumulator1 = _mm_add_pd(
_mm_set_pd(
SCORE_TO_ERROR_RATE[cursor[0] - base],
SCORE_TO_ERROR_RATE[cursor[1] - base]
),
_mm_set_pd(
SCORE_TO_ERROR_RATE[cursor[2] - base],
SCORE_TO_ERROR_RATE[cursor[3] - base]
)
);
accumulator = _mm_add_ps(accumulator, loader);
loader = _mm_set_ps(
SCORE_TO_ERROR_RATE[cursor[4] - base],
SCORE_TO_ERROR_RATE[cursor[5] - base],
SCORE_TO_ERROR_RATE[cursor[6] - base],
SCORE_TO_ERROR_RATE[cursor[7] - base]
__m128d accumulator2 = _mm_add_pd(
_mm_set_pd(
SCORE_TO_ERROR_RATE[cursor[4] - base],
SCORE_TO_ERROR_RATE[cursor[5] - base]
),
_mm_set_pd(
SCORE_TO_ERROR_RATE[cursor[6] - base],
SCORE_TO_ERROR_RATE[cursor[7] - base]
)
);
accumulator = _mm_add_ps(accumulator, loader);
loader = _mm_set_ps(
SCORE_TO_ERROR_RATE[cursor[8] - base],
SCORE_TO_ERROR_RATE[cursor[9] - base],
SCORE_TO_ERROR_RATE[cursor[10] - base],
SCORE_TO_ERROR_RATE[cursor[11] - base]
__m128d accumulator3 = _mm_add_pd(
_mm_set_pd(
SCORE_TO_ERROR_RATE[cursor[8] - base],
SCORE_TO_ERROR_RATE[cursor[9] - base]
),
_mm_set_pd(
SCORE_TO_ERROR_RATE[cursor[10] - base],
SCORE_TO_ERROR_RATE[cursor[11] - base]
)
);
accumulator = _mm_add_ps(accumulator, loader);
loader = _mm_set_ps(
SCORE_TO_ERROR_RATE[cursor[12] - base],
SCORE_TO_ERROR_RATE[cursor[13] - base],
SCORE_TO_ERROR_RATE[cursor[14] - base],
SCORE_TO_ERROR_RATE[cursor[15] - base]
);
accumulator = _mm_add_ps(accumulator, loader);
__m128d accumulator4 = _mm_add_pd(
_mm_set_pd(
SCORE_TO_ERROR_RATE[cursor[12] - base],
SCORE_TO_ERROR_RATE[cursor[13] - base]
),
_mm_set_pd(
SCORE_TO_ERROR_RATE[cursor[14] - base],
SCORE_TO_ERROR_RATE[cursor[15] - base]
)
);
accumulator = _mm_add_pd(accumulator, accumulator1);
accumulator = _mm_add_pd(accumulator, accumulator2);
accumulator = _mm_add_pd(accumulator, accumulator3);
accumulator = _mm_add_pd(accumulator, accumulator4);
cursor += sizeof(__m128i);
}
float float_store[4];
_mm_store_ps(float_store, accumulator);
expected_errors = float_store[0] + float_store[1] + float_store[2] + float_store[3];
double double_store[2];
_mm_store_pd(double_store, accumulator);
expected_errors = double_store[0] + double_store[1];
#endif
while (cursor < end_ptr) {
uint8_t phred = *cursor - base;
Expand Down
2 changes: 1 addition & 1 deletion src/cutadapt/qualtrim.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def expected_errors(str qualities, uint8_t base=33):
cdef:
uint8_t *quals = <uint8_t *>PyUnicode_DATA(qualities)
size_t qual_length = PyUnicode_GET_LENGTH(qualities)
float e = expected_errors_from_phreds(quals, qual_length, base)
double e = expected_errors_from_phreds(quals, qual_length, base)

if e < 0.0:
for q in qualities:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_invalid_pair_filter_mode():
(chr(43) * 3 + chr(33), 0.1, True), # 3 * 0.1 + 1
(chr(43) * 3 + chr(33), 0.33, False), # 3 * 0.1 + 1
(chr(43) * 3 + chr(33), 0.32, True), # 3 * 0.1 + 1
(chr(103) * 9 + chr(33), 0.1, True), # 9 * 10^-7 + 1
(chr(126) * 9 + chr(33), 0.1, True), # 9 * 10^-9.3 + 1
],
)
def test_too_high_average_error_rate(quals, rate, expected):
Expand Down

0 comments on commit 2cd48dd

Please sign in to comment.