Skip to content

Commit

Permalink
performance rollback for fio_ct_is_eq
Browse files Browse the repository at this point in the history
alignment seems less important then instruction ordering
  • Loading branch information
boazsegev committed Nov 13, 2023
1 parent 1163000 commit 60fa6db
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 66 deletions.
66 changes: 33 additions & 33 deletions fio-stl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1481,6 +1481,39 @@ FIO_SFUNC _Bool fio_ct_is_eq(const void *a_, const void *b_, size_t bytes) {
uint64_t flag = 0;
const char *a = (const char *)a_;
const char *b = (const char *)b_;
/* any uneven bytes? */
if (bytes & 63) {
/* consume uneven byte head */
for (size_t i = 0; i < 8; ++i)
ua[i] = ub[i] = 0;
/* all these if statements can run in parallel */
if (bytes & 32) {
fio_memcpy32(ua, a);
fio_memcpy32(ub, b);
}
if (bytes & 16) {
fio_memcpy16(ua + 4, a + (bytes & 32));
fio_memcpy16(ub + 4, b + (bytes & 32));
}
if (bytes & 8) {
fio_memcpy8(ua + 6, a + (bytes & 48));
fio_memcpy8(ub + 6, b + (bytes & 48));
}
if (bytes & 4) {
fio_memcpy4((uint32_t *)ua + 14, a + (bytes & 56));
fio_memcpy4((uint32_t *)ub + 14, b + (bytes & 56));
}
if (bytes & 2) {
fio_memcpy2((uint16_t *)ua + 30, a + (bytes & 60));
fio_memcpy2((uint16_t *)ub + 30, b + (bytes & 60));
}
if (bytes & 1) {
((char *)ua)[62] = *(a + (bytes & 62));
((char *)ub)[62] = *(b + (bytes & 62));
}
for (size_t i = 0; i < 8; ++i)
flag |= ua[i] ^ ub[i];
}
for (size_t consumes = 63; consumes < bytes; consumes += 64) {
fio_memcpy64(ua, a);
fio_memcpy64(ub, b);
Expand All @@ -1489,39 +1522,6 @@ FIO_SFUNC _Bool fio_ct_is_eq(const void *a_, const void *b_, size_t bytes) {
a += 64;
b += 64;
}
/* any uneven bytes? */
if (bytes & (~(size_t)63))
return !flag;
/* consume uneven byte tail */
for (size_t i = 0; i < 8; ++i)
ua[i] = ub[i] = 0;
/* all these if statements can run in parallel */
if (bytes & 32) {
fio_memcpy32(ua, a);
fio_memcpy32(ub, b);
}
if (bytes & 16) {
fio_memcpy16(ua + 4, a + (bytes & 32));
fio_memcpy16(ub + 4, b + (bytes & 32));
}
if (bytes & 8) {
fio_memcpy8(ua + 6, a + (bytes & 48));
fio_memcpy8(ub + 6, b + (bytes & 48));
}
if (bytes & 4) {
fio_memcpy4((uint32_t *)ua + 14, a + (bytes & 56));
fio_memcpy4((uint32_t *)ub + 14, b + (bytes & 56));
}
if (bytes & 2) {
fio_memcpy2((uint16_t *)ua + 30, a + (bytes & 60));
fio_memcpy2((uint16_t *)ub + 30, b + (bytes & 60));
}
if (bytes & 1) {
((char *)ua)[62] = *(a + (bytes & 62));
((char *)ub)[62] = *(b + (bytes & 62));
}
for (size_t i = 0; i < 8; ++i)
flag |= ua[i] ^ ub[i];
return !flag;
}

Expand Down
66 changes: 33 additions & 33 deletions fio-stl/000 core.h
Original file line number Diff line number Diff line change
Expand Up @@ -1444,6 +1444,39 @@ FIO_SFUNC _Bool fio_ct_is_eq(const void *a_, const void *b_, size_t bytes) {
uint64_t flag = 0;
const char *a = (const char *)a_;
const char *b = (const char *)b_;
/* any uneven bytes? */
if (bytes & 63) {
/* consume uneven byte head */
for (size_t i = 0; i < 8; ++i)
ua[i] = ub[i] = 0;
/* all these if statements can run in parallel */
if (bytes & 32) {
fio_memcpy32(ua, a);
fio_memcpy32(ub, b);
}
if (bytes & 16) {
fio_memcpy16(ua + 4, a + (bytes & 32));
fio_memcpy16(ub + 4, b + (bytes & 32));
}
if (bytes & 8) {
fio_memcpy8(ua + 6, a + (bytes & 48));
fio_memcpy8(ub + 6, b + (bytes & 48));
}
if (bytes & 4) {
fio_memcpy4((uint32_t *)ua + 14, a + (bytes & 56));
fio_memcpy4((uint32_t *)ub + 14, b + (bytes & 56));
}
if (bytes & 2) {
fio_memcpy2((uint16_t *)ua + 30, a + (bytes & 60));
fio_memcpy2((uint16_t *)ub + 30, b + (bytes & 60));
}
if (bytes & 1) {
((char *)ua)[62] = *(a + (bytes & 62));
((char *)ub)[62] = *(b + (bytes & 62));
}
for (size_t i = 0; i < 8; ++i)
flag |= ua[i] ^ ub[i];
}
for (size_t consumes = 63; consumes < bytes; consumes += 64) {
fio_memcpy64(ua, a);
fio_memcpy64(ub, b);
Expand All @@ -1452,39 +1485,6 @@ FIO_SFUNC _Bool fio_ct_is_eq(const void *a_, const void *b_, size_t bytes) {
a += 64;
b += 64;
}
/* any uneven bytes? */
if (bytes & (~(size_t)63))
return !flag;
/* consume uneven byte tail */
for (size_t i = 0; i < 8; ++i)
ua[i] = ub[i] = 0;
/* all these if statements can run in parallel */
if (bytes & 32) {
fio_memcpy32(ua, a);
fio_memcpy32(ub, b);
}
if (bytes & 16) {
fio_memcpy16(ua + 4, a + (bytes & 32));
fio_memcpy16(ub + 4, b + (bytes & 32));
}
if (bytes & 8) {
fio_memcpy8(ua + 6, a + (bytes & 48));
fio_memcpy8(ub + 6, b + (bytes & 48));
}
if (bytes & 4) {
fio_memcpy4((uint32_t *)ua + 14, a + (bytes & 56));
fio_memcpy4((uint32_t *)ub + 14, b + (bytes & 56));
}
if (bytes & 2) {
fio_memcpy2((uint16_t *)ua + 30, a + (bytes & 60));
fio_memcpy2((uint16_t *)ub + 30, b + (bytes & 60));
}
if (bytes & 1) {
((char *)ua)[62] = *(a + (bytes & 62));
((char *)ub)[62] = *(b + (bytes & 62));
}
for (size_t i = 0; i < 8; ++i)
flag |= ua[i] ^ ub[i];
return !flag;
}

Expand Down

0 comments on commit 60fa6db

Please sign in to comment.