Skip to content

Commit

Permalink
Merge pull request #7681 from SparkiDev/kyber_improv_1
Browse files Browse the repository at this point in the history
Kyber: Improve performance
  • Loading branch information
JacobBarthelmeh authored Jun 25, 2024
2 parents 38335f4 + aa61f98 commit 22abd37
Show file tree
Hide file tree
Showing 10 changed files with 224 additions and 151 deletions.
64 changes: 43 additions & 21 deletions wolfcrypt/benchmark/benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,6 @@
#define BENCH_RSA 0x00000002
#define BENCH_RSA_SZ 0x00000004
#define BENCH_DH 0x00000010
#define BENCH_KYBER 0x00000020
#define BENCH_ECC_MAKEKEY 0x00001000
#define BENCH_ECC 0x00002000
#define BENCH_ECC_ENCRYPT 0x00004000
Expand All @@ -681,11 +680,22 @@
#define BENCH_SAKKE 0x80000000

/* Post-Quantum Asymmetric algorithms. */
#define BENCH_KYBER512 0x00000020
#define BENCH_KYBER768 0x00000040
#define BENCH_KYBER1024 0x00000080
#define BENCH_KYBER (BENCH_KYBER512 | BENCH_KYBER768 | \
BENCH_KYBER1024)
#define BENCH_FALCON_LEVEL1_SIGN 0x00000001
#define BENCH_FALCON_LEVEL5_SIGN 0x00000002
#define BENCH_DILITHIUM_LEVEL2_SIGN 0x04000000
#define BENCH_DILITHIUM_LEVEL3_SIGN 0x08000000
#define BENCH_DILITHIUM_LEVEL5_SIGN 0x10000000
#define BENCH_ML_DSA_44_SIGN 0x04000000
#define BENCH_ML_DSA_65_SIGN 0x08000000
#define BENCH_ML_DSA_87_SIGN 0x10000000
#define BENCH_ML_DSA_SIGN (BENCH_ML_DSA_44_SIGN | \
BENCH_ML_DSA_65_SIGN | \
BENCH_ML_DSA_87_SIGN)

/* Post-Quantum Asymmetric algorithms. (Part 2) */
#define BENCH_SPHINCS_FAST_LEVEL1_SIGN 0x00000001
Expand Down Expand Up @@ -959,9 +969,6 @@ static const bench_alg bench_asym_opt[] = {
#ifndef NO_DH
{ "-dh", BENCH_DH },
#endif
#ifdef WOLFSSL_HAVE_KYBER
{ "-kyber", BENCH_KYBER },
#endif
#ifdef HAVE_ECC
{ "-ecc-kg", BENCH_ECC_MAKEKEY },
{ "-ecc", BENCH_ECC },
Expand Down Expand Up @@ -1060,7 +1067,8 @@ static const bench_pq_hash_sig_alg bench_pq_hash_sig_opt[] = {
};
#endif /* BENCH_PQ_STATEFUL_HBS */

#if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \
defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
/* The post-quantum-specific mapping of command line option to bit values and
* OQS name. */
typedef struct bench_pq_alg {
Expand All @@ -1073,18 +1081,25 @@ typedef struct bench_pq_alg {
/* All recognized post-quantum asymmetric algorithm choosing command line
* options. */
static const bench_pq_alg bench_pq_asym_opt[] = {
{ "-pq", 0xffffffff },
{ "-pq", 0xffffffff },
#ifdef WOLFSSL_HAVE_KYBER
{ "-kyber", BENCH_KYBER },
{ "-kyber512", BENCH_KYBER512 },
{ "-kyber768", BENCH_KYBER768 },
{ "-kyber1024", BENCH_KYBER1024 },
#endif
#if defined(HAVE_FALCON)
{ "-falcon_level1", BENCH_FALCON_LEVEL1_SIGN },
{ "-falcon_level5", BENCH_FALCON_LEVEL5_SIGN },
{ "-falcon_level1", BENCH_FALCON_LEVEL1_SIGN },
{ "-falcon_level5", BENCH_FALCON_LEVEL5_SIGN },
#endif
#if defined(HAVE_DILITHIUM)
{ "-dilithium_level2", BENCH_DILITHIUM_LEVEL2_SIGN },
{ "-dilithium_level3", BENCH_DILITHIUM_LEVEL3_SIGN },
{ "-dilithium_level5", BENCH_DILITHIUM_LEVEL5_SIGN },
{ "-ml-dsa-44", BENCH_DILITHIUM_LEVEL2_SIGN },
{ "-ml-dsa-65", BENCH_DILITHIUM_LEVEL3_SIGN },
{ "-ml-dsa-87", BENCH_DILITHIUM_LEVEL5_SIGN },
{ "-dilithium_level2", BENCH_DILITHIUM_LEVEL2_SIGN },
{ "-dilithium_level3", BENCH_DILITHIUM_LEVEL3_SIGN },
{ "-dilithium_level5", BENCH_DILITHIUM_LEVEL5_SIGN },
{ "-ml-dsa", BENCH_ML_DSA_SIGN },
{ "-ml-dsa-44", BENCH_ML_DSA_44_SIGN },
{ "-ml-dsa-65", BENCH_ML_DSA_65_SIGN },
{ "-ml-dsa-87", BENCH_ML_DSA_87_SIGN },
#endif
{ NULL, 0 }
};
Expand Down Expand Up @@ -3576,15 +3591,21 @@ static void* benchmarks_do(void* args)
#endif

#ifdef WOLFSSL_HAVE_KYBER
if (bench_all || (bench_asym_algs & BENCH_KYBER)) {
if (bench_all || (bench_pq_asym_algs & BENCH_KYBER)) {
#ifdef WOLFSSL_KYBER512
bench_kyber(KYBER512);
if (bench_pq_asym_algs & BENCH_KYBER512) {
bench_kyber(KYBER512);
}
#endif
#ifdef WOLFSSL_KYBER768
bench_kyber(KYBER768);
if (bench_pq_asym_algs & BENCH_KYBER768) {
bench_kyber(KYBER768);
}
#endif
#ifdef WOLFSSL_KYBER1024
bench_kyber(KYBER1024);
if (bench_pq_asym_algs & BENCH_KYBER1024) {
bench_kyber(KYBER1024);
}
#endif
}
#endif
Expand Down Expand Up @@ -14523,7 +14544,8 @@ static void Usage(void)
print_alg(bench_asym_opt[i].str, &line);
for (i=0; bench_other_opt[i].str != NULL; i++)
print_alg(bench_other_opt[i].str, &line);
#if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \
defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
for (i=0; bench_pq_asym_opt[i].str != NULL; i++)
print_alg(bench_pq_asym_opt[i].str, &line);
#if defined(HAVE_SPHINCS)
Expand Down Expand Up @@ -14799,8 +14821,8 @@ int wolfcrypt_benchmark_main(int argc, char** argv)
optMatched = 1;
}
}
#if defined(HAVE_FALCON) || defined(HAVE_DILITHIUM) || \
defined(HAVE_SPHINCS)
#if defined(WOLFSSL_HAVE_KYBER) || defined(HAVE_FALCON) || \
defined(HAVE_DILITHIUM) || defined(HAVE_SPHINCS)
/* Known asymmetric post-quantum algorithms */
for (i=0; !optMatched && bench_pq_asym_opt[i].str != NULL; i++) {
if (string_matches(argv[1], bench_pq_asym_opt[i].str)) {
Expand Down
110 changes: 56 additions & 54 deletions wolfcrypt/src/port/arm/armv8-32-sha3-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__)
#ifndef WOLFSSL_ARMASM_INLINE
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_ARMASM_NO_NEON
.text
.type L_sha3_arm2_neon_rt, %object
.size L_sha3_arm2_neon_rt, 192
Expand Down Expand Up @@ -85,60 +87,6 @@ L_sha3_arm2_neon_rt:
.word 0x0
.word 0x80008008
.word 0x80000000
.text
.type L_sha3_arm2_rt, %object
.size L_sha3_arm2_rt, 192
.align 4
L_sha3_arm2_rt:
.word 0x1
.word 0x0
.word 0x8082
.word 0x0
.word 0x808a
.word 0x80000000
.word 0x80008000
.word 0x80000000
.word 0x808b
.word 0x0
.word 0x80000001
.word 0x0
.word 0x80008081
.word 0x80000000
.word 0x8009
.word 0x80000000
.word 0x8a
.word 0x0
.word 0x88
.word 0x0
.word 0x80008009
.word 0x0
.word 0x8000000a
.word 0x0
.word 0x8000808b
.word 0x0
.word 0x8b
.word 0x80000000
.word 0x8089
.word 0x80000000
.word 0x8003
.word 0x80000000
.word 0x8002
.word 0x80000000
.word 0x80
.word 0x80000000
.word 0x800a
.word 0x0
.word 0x8000000a
.word 0x80000000
.word 0x80008081
.word 0x80000000
.word 0x8080
.word 0x80000000
.word 0x80000001
.word 0x0
.word 0x80008008
.word 0x80000000
#ifndef WOLFSSL_ARMASM_NO_NEON
.text
.align 4
.globl BlockSha3
Expand Down Expand Up @@ -407,6 +355,59 @@ L_sha3_arm32_neon_begin:
.size BlockSha3,.-BlockSha3
#endif /* WOLFSSL_ARMASM_NO_NEON */
#ifdef WOLFSSL_ARMASM_NO_NEON
.text
.type L_sha3_arm2_rt, %object
.size L_sha3_arm2_rt, 192
.align 4
L_sha3_arm2_rt:
.word 0x1
.word 0x0
.word 0x8082
.word 0x0
.word 0x808a
.word 0x80000000
.word 0x80008000
.word 0x80000000
.word 0x808b
.word 0x0
.word 0x80000001
.word 0x0
.word 0x80008081
.word 0x80000000
.word 0x8009
.word 0x80000000
.word 0x8a
.word 0x0
.word 0x88
.word 0x0
.word 0x80008009
.word 0x0
.word 0x8000000a
.word 0x0
.word 0x8000808b
.word 0x0
.word 0x8b
.word 0x80000000
.word 0x8089
.word 0x80000000
.word 0x8003
.word 0x80000000
.word 0x8002
.word 0x80000000
.word 0x80
.word 0x80000000
.word 0x800a
.word 0x0
.word 0x8000000a
.word 0x80000000
.word 0x80008081
.word 0x80000000
.word 0x8080
.word 0x80000000
.word 0x80000001
.word 0x0
.word 0x80008008
.word 0x80000000
.text
.align 4
.globl BlockSha3
Expand Down Expand Up @@ -2391,6 +2392,7 @@ L_sha3_arm32_begin:
pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
.size BlockSha3,.-BlockSha3
#endif /* WOLFSSL_ARMASM_NO_NEON */
#endif /* WOLFSSL_SHA3 */
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
#endif /* WOLFSSL_ARMASM */

Expand Down
41 changes: 21 additions & 20 deletions wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
#define __asm__ __asm
#define __volatile__ volatile
#endif /* __KEIL__ */
#ifdef WOLFSSL_SHA3
#ifndef WOLFSSL_ARMASM_NO_NEON
static const uint64_t L_sha3_arm2_neon_rt[] = {
0x0000000000000001UL, 0x0000000000008082UL,
0x800000000000808aUL, 0x8000000080008000UL,
Expand All @@ -66,29 +68,12 @@ static const uint64_t L_sha3_arm2_neon_rt[] = {
0x0000000080000001UL, 0x8000000080008008UL,
};

static const uint64_t L_sha3_arm2_rt[] = {
0x0000000000000001UL, 0x0000000000008082UL,
0x800000000000808aUL, 0x8000000080008000UL,
0x000000000000808bUL, 0x0000000080000001UL,
0x8000000080008081UL, 0x8000000000008009UL,
0x000000000000008aUL, 0x0000000000000088UL,
0x0000000080008009UL, 0x000000008000000aUL,
0x000000008000808bUL, 0x800000000000008bUL,
0x8000000000008089UL, 0x8000000000008003UL,
0x8000000000008002UL, 0x8000000000000080UL,
0x000000000000800aUL, 0x800000008000000aUL,
0x8000000080008081UL, 0x8000000000008080UL,
0x0000000080000001UL, 0x8000000080008008UL,
};

#include <wolfssl/wolfcrypt/sha3.h>

#ifndef WOLFSSL_ARMASM_NO_NEON
void BlockSha3(word64* state_p)
{
register word64* state asm ("r0") = (word64*)state_p;
register uint64_t* L_sha3_arm2_neon_rt_c asm ("r1") = (uint64_t*)&L_sha3_arm2_neon_rt;
register uint64_t* L_sha3_arm2_rt_c asm ("r2") = (uint64_t*)&L_sha3_arm2_rt;

__asm__ __volatile__ (
"sub sp, sp, #16\n\t"
Expand Down Expand Up @@ -348,16 +333,31 @@ void BlockSha3(word64* state_p)
"vst1.8 {d20-d23}, [%[state]]!\n\t"
"vst1.8 {d24}, [%[state]]\n\t"
"add sp, sp, #16\n\t"
: [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c), [L_sha3_arm2_rt] "+r" (L_sha3_arm2_rt_c)
: [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c)
:
: "memory", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc"
: "memory", "r2", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc"
);
}

#endif /* WOLFSSL_ARMASM_NO_NEON */
#ifdef WOLFSSL_ARMASM_NO_NEON
static const uint64_t L_sha3_arm2_rt[] = {
0x0000000000000001UL, 0x0000000000008082UL,
0x800000000000808aUL, 0x8000000080008000UL,
0x000000000000808bUL, 0x0000000080000001UL,
0x8000000080008081UL, 0x8000000000008009UL,
0x000000000000008aUL, 0x0000000000000088UL,
0x0000000080008009UL, 0x000000008000000aUL,
0x000000008000808bUL, 0x800000000000008bUL,
0x8000000000008089UL, 0x8000000000008003UL,
0x8000000000008002UL, 0x8000000000000080UL,
0x000000000000800aUL, 0x800000008000000aUL,
0x8000000080008081UL, 0x8000000000008080UL,
0x0000000080000001UL, 0x8000000080008008UL,
};

#include <wolfssl/wolfcrypt/sha3.h>

#ifdef WOLFSSL_ARMASM_NO_NEON
void BlockSha3(word64* state_p)
{
register word64* state asm ("r0") = (word64*)state_p;
Expand Down Expand Up @@ -2348,6 +2348,7 @@ void BlockSha3(word64* state_p)
}

#endif /* WOLFSSL_ARMASM_NO_NEON */
#endif /* WOLFSSL_SHA3 */
#endif /* !__aarch64__ && __arm__ && !__thumb__ */
#endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */
Expand Down
12 changes: 1 addition & 11 deletions wolfcrypt/src/port/arm/thumb2-aes-asm_c.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,12 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#include <wolfssl/wolfcrypt/error-crypt.h>

#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__thumb__)
#include <stdint.h>
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>
#ifdef WOLFSSL_ARMASM_INLINE

#ifdef WOLFSSL_ARMASM
#if !defined(__aarch64__) && defined(__thumb__)

#ifdef __IAR_SYSTEMS_ICC__
#define __asm__ asm
#define __volatile__ volatile
Expand Down Expand Up @@ -3056,7 +3049,4 @@ void AES_GCM_encrypt(const unsigned char* in, unsigned char* out, unsigned long
#endif /* !NO_AES */
#endif /* !__aarch64__ && __thumb__ */
#endif /* WOLFSSL_ARMASM */
#endif /* !defined(__aarch64__) && defined(__thumb__) */
#endif /* WOLFSSL_ARMASM */

#endif /* WOLFSSL_ARMASM_INLINE */
Loading

0 comments on commit 22abd37

Please sign in to comment.