From 7a9c477d5f3bc43a2967bfae64b502c9e12116e1 Mon Sep 17 00:00:00 2001 From: Bo Date: Wed, 15 Nov 2023 08:10:11 +0200 Subject: [PATCH] more nothings --- fio-stl.h | 85 ++++++++++++++++++-------------------------- fio-stl/000 core.h | 11 +++--- fio-stl/001 memalt.h | 60 +++++++++++-------------------- fio-stl/902 memalt.h | 14 ++++---- 4 files changed, 70 insertions(+), 100 deletions(-) diff --git a/fio-stl.h b/fio-stl.h index cd67ce4..22bbb8f 100644 --- a/fio-stl.h +++ b/fio-stl.h @@ -1489,16 +1489,14 @@ Constant-Time Comparison Test /** A timing attack resistant memory comparison function. */ FIO_SFUNC _Bool fio_ct_is_eq(const void *a_, const void *b_, size_t bytes) { - uint64_t ua[8] FIO_ALIGN(16); - uint64_t ub[8] FIO_ALIGN(16); uint64_t flag = 0; const char *a = (const char *)a_; const char *b = (const char *)b_; /* any uneven bytes? */ if (bytes & 63) { /* consume uneven byte head */ - for (size_t i = 0; i < 8; ++i) - ua[i] = ub[i] = 0; + uint64_t ua[8] FIO_ALIGN(16) = {0}; + uint64_t ub[8] FIO_ALIGN(16) = {0}; /* all these if statements can run in parallel */ if (bytes & 32) { fio_memcpy32(ua, a); @@ -1530,6 +1528,8 @@ FIO_SFUNC _Bool fio_ct_is_eq(const void *a_, const void *b_, size_t bytes) { b += bytes & 63; } for (size_t consumes = 63; consumes < bytes; consumes += 64) { + uint64_t ua[8] FIO_ALIGN(16); + uint64_t ub[8] FIO_ALIGN(16); fio_memcpy64(ua, a); fio_memcpy64(ub, b); for (size_t i = 0; i < 8; ++i) @@ -1547,6 +1547,7 @@ FIO_SFUNC _Bool fio_mem_is_eq(const void *a_, const void *b_, size_t bytes) { uint64_t flag = 0; const char *a = (const char *)a_; const char *b = (const char *)b_; + const char *e = a + bytes; if (*a != *b) return 1; /* any uneven bytes? */ @@ -1586,7 +1587,7 @@ FIO_SFUNC _Bool fio_mem_is_eq(const void *a_, const void *b_, size_t bytes) { a += bytes & 63; b += bytes & 63; } - for (size_t consumes = 63; consumes < bytes; consumes += 64) { + while (a < e) { fio_memcpy64(ua, a); fio_memcpy64(ub, b); for (size_t i = 0; i < 8; ++i) @@ -3341,47 +3342,30 @@ SFUNC FIO___ASAN_AVOID size_t fio_strlen(const char *str) { return 0; uintptr_t start = (uintptr_t)str; /* we must align memory, to avoid crushing when nearing last page boundary */ - switch ((start & 7)) { -#define FIO___MEMCHR_UNSAFE_STEP() \ - if (!str[0]) \ - return (uintptr_t)str - start; \ - ++str - case 1: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 2: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 3: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 4: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 5: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 6: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 7: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ -#undef FIO___MEMCHR_UNSAFE_STEP - } - - /* 8 byte aligned */ uint64_t flag = 0; - uint64_t map[8] FIO_ALIGN(16) = {0}; - uint64_t tmp[8] FIO_ALIGN(16) = {0}; - -#define FIO___STRLEN_CYCLE(i) \ - do { \ - map[i] = (*(const uint64_t *)(str + (i << 3))); \ - tmp[i] = \ - map[i] - UINT64_C(0x0101010101010101); /* is 0 or >= 0x80 --> 0x8X */ \ - map[i] = ~map[i]; /* is < 0x80) --> 0x8X */ \ - map[i] &= UINT64_C(0x8080808080808080); \ - map[i] &= tmp[i]; /* only 0x00 will now be 0x80 */ \ - flag |= map[i]; \ - } while (0) - - for (size_t aligner = 0; aligner < 8; ++aligner) { - FIO___STRLEN_CYCLE(0); - if (flag) + uint64_t map[8] FIO_ALIGN(16); + /* align to 4 bytes */ + switch (start & 7) { // clang-format off + case 1: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 2: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 3: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 4: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 5: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 6: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 7: if(*str++ == 0) return (uintptr_t)(str-1) - start; + } // clang-format on + /* align to 64 bytes */ + for (size_t i = 0; i < 9; ++i) { + if ((flag = fio_has_zero_byte64(*(uint64_t *)str))) goto found_nul_byte0; str += 8; } - str = FIO_PTR_MATH_RMASK(const char, str, 6); /* new loop alignment */ - for (;;) { /* loop while aligned on 64 byte boundary */ - for (size_t i = 0; i < 8; ++i) - FIO___STRLEN_CYCLE(i); + str = FIO_PTR_MATH_RMASK(const char, str, 6); + /* loop endlessly */ + for (;;) { + for (size_t i = 0; i < 8; ++i) { + flag |= (map[i] = fio_has_zero_byte64(((uint64_t *)str)[i])); + } if (flag) goto found_nul_byte8; str += 64; @@ -3397,8 +3381,7 @@ SFUNC FIO___ASAN_AVOID size_t fio_strlen(const char *str) { return (uintptr_t)str - start; found_nul_byte0: - flag = fio_has_byte2bitmap(map[0]); - str += fio_lsb_index_unsafe(flag); + str += fio_lsb_index_unsafe(fio_has_byte2bitmap(flag)); return (uintptr_t)str - start; } @@ -45332,7 +45315,7 @@ FIO_SFUNC void FIO_NAME_TEST(stl, memalt)(void) { size_t len = fio_strlen(membuf); membuf[i] = (char)((i & 0xFFU) | 1U); FIO_ASSERT(result == membuf + i, "fio_memchr failed."); - FIO_ASSERT(len == i, "fio_strlen failed."); + FIO_ASSERT(len == i, "fio_strlen failed (%zu != %zu).", len, i); } } #ifndef DEBUG @@ -45670,10 +45653,11 @@ FIO_SFUNC void FIO_NAME_TEST(stl, memalt)(void) { mem[mem_len - 2]++; } + FIO_MEMCPY(b, a, mem_len); /* shouldn't be needed, but anyway */ twister = mem_len - 3; start = fio_time_micro(); for (size_t i = 0; i < repetitions; ++i) { - int cmp = fio_memcmp(a, b, mem_len); + int cmp = memcmp(a, b, mem_len); FIO_COMPILER_GUARD; if (cmp) { ++mem[twister--]; @@ -45684,17 +45668,16 @@ FIO_SFUNC void FIO_NAME_TEST(stl, memalt)(void) { } end = fio_time_micro(); fprintf(stderr, - "\tfio_memcmp\t(up to %zu bytes):\t%zuus\t/ %zu\n", + "\tsystem memcmp\t(up to %zu bytes):\t%zuus\t/ %zu\n", mem_len, (size_t)(end - start), repetitions); FIO_MEMCPY(b, a, mem_len); /* shouldn't be needed, but anyway */ - twister = mem_len - 3; start = fio_time_micro(); for (size_t i = 0; i < repetitions; ++i) { - int cmp = memcmp(a, b, mem_len); + int cmp = fio_memcmp(a, b, mem_len); FIO_COMPILER_GUARD; if (cmp) { ++mem[twister--]; @@ -45705,11 +45688,12 @@ FIO_SFUNC void FIO_NAME_TEST(stl, memalt)(void) { } end = fio_time_micro(); fprintf(stderr, - "\tsystem memcmp\t(up to %zu bytes):\t%zuus\t/ %zu\n", + "\tfio_memcmp\t(up to %zu bytes):\t%zuus\t/ %zu\n", mem_len, (size_t)(end - start), repetitions); + FIO_MEMCPY(b, a, mem_len); /* shouldn't be needed, but anyway */ twister = mem_len - 3; start = fio_time_micro(); for (size_t i = 0; i < repetitions; ++i) { @@ -45729,6 +45713,7 @@ FIO_SFUNC void FIO_NAME_TEST(stl, memalt)(void) { (size_t)(end - start), repetitions); + FIO_MEMCPY(b, a, mem_len); /* shouldn't be needed, but anyway */ twister = mem_len - 3; start = fio_time_micro(); for (size_t i = 0; i < repetitions; ++i) { diff --git a/fio-stl/000 core.h b/fio-stl/000 core.h index 4cd34bd..1383ff9 100644 --- a/fio-stl/000 core.h +++ b/fio-stl/000 core.h @@ -1452,16 +1452,14 @@ Constant-Time Comparison Test /** A timing attack resistant memory comparison function. */ FIO_SFUNC _Bool fio_ct_is_eq(const void *a_, const void *b_, size_t bytes) { - uint64_t ua[8] FIO_ALIGN(16); - uint64_t ub[8] FIO_ALIGN(16); uint64_t flag = 0; const char *a = (const char *)a_; const char *b = (const char *)b_; /* any uneven bytes? */ if (bytes & 63) { /* consume uneven byte head */ - for (size_t i = 0; i < 8; ++i) - ua[i] = ub[i] = 0; + uint64_t ua[8] FIO_ALIGN(16) = {0}; + uint64_t ub[8] FIO_ALIGN(16) = {0}; /* all these if statements can run in parallel */ if (bytes & 32) { fio_memcpy32(ua, a); @@ -1493,6 +1491,8 @@ FIO_SFUNC _Bool fio_ct_is_eq(const void *a_, const void *b_, size_t bytes) { b += bytes & 63; } for (size_t consumes = 63; consumes < bytes; consumes += 64) { + uint64_t ua[8] FIO_ALIGN(16); + uint64_t ub[8] FIO_ALIGN(16); fio_memcpy64(ua, a); fio_memcpy64(ub, b); for (size_t i = 0; i < 8; ++i) @@ -1510,6 +1510,7 @@ FIO_SFUNC _Bool fio_mem_is_eq(const void *a_, const void *b_, size_t bytes) { uint64_t flag = 0; const char *a = (const char *)a_; const char *b = (const char *)b_; + const char *e = a + bytes; if (*a != *b) return 1; /* any uneven bytes? */ @@ -1549,7 +1550,7 @@ FIO_SFUNC _Bool fio_mem_is_eq(const void *a_, const void *b_, size_t bytes) { a += bytes & 63; b += bytes & 63; } - for (size_t consumes = 63; consumes < bytes; consumes += 64) { + while (a < e) { fio_memcpy64(ua, a); fio_memcpy64(ub, b); for (size_t i = 0; i < 8; ++i) diff --git a/fio-stl/001 memalt.h b/fio-stl/001 memalt.h index 0e63114..1c79565 100644 --- a/fio-stl/001 memalt.h +++ b/fio-stl/001 memalt.h @@ -412,47 +412,30 @@ SFUNC FIO___ASAN_AVOID size_t fio_strlen(const char *str) { return 0; uintptr_t start = (uintptr_t)str; /* we must align memory, to avoid crushing when nearing last page boundary */ - switch ((start & 7)) { -#define FIO___MEMCHR_UNSAFE_STEP() \ - if (!str[0]) \ - return (uintptr_t)str - start; \ - ++str - case 1: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 2: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 3: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 4: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 5: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 6: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ - case 7: FIO___MEMCHR_UNSAFE_STEP(); /* fall through */ -#undef FIO___MEMCHR_UNSAFE_STEP - } - - /* 8 byte aligned */ uint64_t flag = 0; - uint64_t map[8] FIO_ALIGN(16) = {0}; - uint64_t tmp[8] FIO_ALIGN(16) = {0}; - -#define FIO___STRLEN_CYCLE(i) \ - do { \ - map[i] = (*(const uint64_t *)(str + (i << 3))); \ - tmp[i] = \ - map[i] - UINT64_C(0x0101010101010101); /* is 0 or >= 0x80 --> 0x8X */ \ - map[i] = ~map[i]; /* is < 0x80) --> 0x8X */ \ - map[i] &= UINT64_C(0x8080808080808080); \ - map[i] &= tmp[i]; /* only 0x00 will now be 0x80 */ \ - flag |= map[i]; \ - } while (0) - - for (size_t aligner = 0; aligner < 8; ++aligner) { - FIO___STRLEN_CYCLE(0); - if (flag) + uint64_t map[8] FIO_ALIGN(16); + /* align to 4 bytes */ + switch (start & 7) { // clang-format off + case 1: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 2: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 3: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 4: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 5: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 6: if(*str++ == 0) return (uintptr_t)(str-1) - start; + case 7: if(*str++ == 0) return (uintptr_t)(str-1) - start; + } // clang-format on + /* align to 64 bytes */ + for (size_t i = 0; i < 9; ++i) { + if ((flag = fio_has_zero_byte64(*(uint64_t *)str))) goto found_nul_byte0; str += 8; } - str = FIO_PTR_MATH_RMASK(const char, str, 6); /* new loop alignment */ - for (;;) { /* loop while aligned on 64 byte boundary */ - for (size_t i = 0; i < 8; ++i) - FIO___STRLEN_CYCLE(i); + str = FIO_PTR_MATH_RMASK(const char, str, 6); + /* loop endlessly */ + for (;;) { + for (size_t i = 0; i < 8; ++i) { + flag |= (map[i] = fio_has_zero_byte64(((uint64_t *)str)[i])); + } if (flag) goto found_nul_byte8; str += 64; @@ -468,8 +451,7 @@ SFUNC FIO___ASAN_AVOID size_t fio_strlen(const char *str) { return (uintptr_t)str - start; found_nul_byte0: - flag = fio_has_byte2bitmap(map[0]); - str += fio_lsb_index_unsafe(flag); + str += fio_lsb_index_unsafe(fio_has_byte2bitmap(flag)); return (uintptr_t)str - start; } diff --git a/fio-stl/902 memalt.h b/fio-stl/902 memalt.h index f592733..2790139 100644 --- a/fio-stl/902 memalt.h +++ b/fio-stl/902 memalt.h @@ -89,7 +89,7 @@ FIO_SFUNC void FIO_NAME_TEST(stl, memalt)(void) { size_t len = fio_strlen(membuf); membuf[i] = (char)((i & 0xFFU) | 1U); FIO_ASSERT(result == membuf + i, "fio_memchr failed."); - FIO_ASSERT(len == i, "fio_strlen failed."); + FIO_ASSERT(len == i, "fio_strlen failed (%zu != %zu).", len, i); } } #ifndef DEBUG @@ -427,10 +427,11 @@ FIO_SFUNC void FIO_NAME_TEST(stl, memalt)(void) { mem[mem_len - 2]++; } + FIO_MEMCPY(b, a, mem_len); /* shouldn't be needed, but anyway */ twister = mem_len - 3; start = fio_time_micro(); for (size_t i = 0; i < repetitions; ++i) { - int cmp = fio_memcmp(a, b, mem_len); + int cmp = memcmp(a, b, mem_len); FIO_COMPILER_GUARD; if (cmp) { ++mem[twister--]; @@ -441,17 +442,16 @@ FIO_SFUNC void FIO_NAME_TEST(stl, memalt)(void) { } end = fio_time_micro(); fprintf(stderr, - "\tfio_memcmp\t(up to %zu bytes):\t%zuus\t/ %zu\n", + "\tsystem memcmp\t(up to %zu bytes):\t%zuus\t/ %zu\n", mem_len, (size_t)(end - start), repetitions); FIO_MEMCPY(b, a, mem_len); /* shouldn't be needed, but anyway */ - twister = mem_len - 3; start = fio_time_micro(); for (size_t i = 0; i < repetitions; ++i) { - int cmp = memcmp(a, b, mem_len); + int cmp = fio_memcmp(a, b, mem_len); FIO_COMPILER_GUARD; if (cmp) { ++mem[twister--]; @@ -462,11 +462,12 @@ FIO_SFUNC void FIO_NAME_TEST(stl, memalt)(void) { } end = fio_time_micro(); fprintf(stderr, - "\tsystem memcmp\t(up to %zu bytes):\t%zuus\t/ %zu\n", + "\tfio_memcmp\t(up to %zu bytes):\t%zuus\t/ %zu\n", mem_len, (size_t)(end - start), repetitions); + FIO_MEMCPY(b, a, mem_len); /* shouldn't be needed, but anyway */ twister = mem_len - 3; start = fio_time_micro(); for (size_t i = 0; i < repetitions; ++i) { @@ -486,6 +487,7 @@ FIO_SFUNC void FIO_NAME_TEST(stl, memalt)(void) { (size_t)(end - start), repetitions); + FIO_MEMCPY(b, a, mem_len); /* shouldn't be needed, but anyway */ twister = mem_len - 3; start = fio_time_micro(); for (size_t i = 0; i < repetitions; ++i) {