From 5dfdbb47c3f0a00ae566148a2664451aabe6321e Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Mon, 26 Aug 2024 11:48:37 +1000 Subject: [PATCH] Memory usage improvements kdf.c: wc_PRF() - No need for previous, reuse current. sha256.c: Transform_Sha256() - Add slow but small version for many register implementation. sp_int.h: Change 'used' and 'size' fields to 16-bit types when possible. sp_int.c: Fixes for 16-bit used. --- wolfcrypt/src/kdf.c | 34 +++++++++++------------------ wolfcrypt/src/sha256.c | 14 ++++++++++++ wolfcrypt/src/sp_int.c | 44 +++++++++++++++++++------------------- wolfssl/wolfcrypt/sp_int.h | 22 +++++++++++++++++-- 4 files changed, 69 insertions(+), 45 deletions(-) diff --git a/wolfcrypt/src/kdf.c b/wolfcrypt/src/kdf.c index 690774474b..ce80df8e86 100644 --- a/wolfcrypt/src/kdf.c +++ b/wolfcrypt/src/kdf.c @@ -84,11 +84,9 @@ int wc_PRF(byte* result, word32 resLen, const byte* secret, word32 lastTime; int ret = 0; #ifdef WOLFSSL_SMALL_STACK - byte* previous; byte* current; Hmac* hmac; #else - byte previous[P_HASH_MAX_SIZE]; /* max size */ byte current[P_HASH_MAX_SIZE]; /* max size */ Hmac hmac[1]; #endif @@ -153,19 +151,16 @@ int wc_PRF(byte* result, word32 resLen, const byte* secret, lastTime = times - 1; #ifdef WOLFSSL_SMALL_STACK - previous = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST); - current = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST); - hmac = (Hmac*)XMALLOC(sizeof(Hmac), heap, DYNAMIC_TYPE_HMAC); - if (previous == NULL || current == NULL || hmac == NULL) { - XFREE(previous, heap, DYNAMIC_TYPE_DIGEST); + current = (byte*)XMALLOC(P_HASH_MAX_SIZE, heap, DYNAMIC_TYPE_DIGEST); + hmac = (Hmac*)XMALLOC(sizeof(Hmac), heap, DYNAMIC_TYPE_HMAC); + if (current == NULL || hmac == NULL) { XFREE(current, heap, DYNAMIC_TYPE_DIGEST); XFREE(hmac, heap, DYNAMIC_TYPE_HMAC); return MEMORY_E; } #endif #ifdef WOLFSSL_CHECK_MEM_ZERO - XMEMSET(previous, 0xff, P_HASH_MAX_SIZE); - wc_MemZero_Add("wc_PRF previous", previous, P_HASH_MAX_SIZE); + XMEMSET(current, 0xff, P_HASH_MAX_SIZE); wc_MemZero_Add("wc_PRF current", current, P_HASH_MAX_SIZE); wc_MemZero_Add("wc_PRF hmac", hmac, sizeof(Hmac)); #endif @@ -176,13 +171,13 @@ int wc_PRF(byte* result, word32 resLen, const byte* secret, if (ret == 0) ret = wc_HmacUpdate(hmac, seed, seedLen); /* A0 = seed */ if (ret == 0) - ret = wc_HmacFinal(hmac, previous); /* A1 */ + ret = wc_HmacFinal(hmac, current); /* A1 */ if (ret == 0) { word32 i; word32 idx = 0; for (i = 0; i < times; i++) { - ret = wc_HmacUpdate(hmac, previous, len); + ret = wc_HmacUpdate(hmac, current, len); if (ret != 0) break; ret = wc_HmacUpdate(hmac, seed, seedLen); @@ -198,10 +193,10 @@ int wc_PRF(byte* result, word32 resLen, const byte* secret, else { XMEMCPY(&result[idx], current, len); idx += len; - ret = wc_HmacUpdate(hmac, previous, len); + ret = wc_HmacUpdate(hmac, current, len); if (ret != 0) break; - ret = wc_HmacFinal(hmac, previous); + ret = wc_HmacFinal(hmac, current); if (ret != 0) break; } @@ -210,19 +205,16 @@ int wc_PRF(byte* result, word32 resLen, const byte* secret, wc_HmacFree(hmac); } - ForceZero(previous, P_HASH_MAX_SIZE); - ForceZero(current, P_HASH_MAX_SIZE); - ForceZero(hmac, sizeof(Hmac)); + ForceZero(current, P_HASH_MAX_SIZE); + ForceZero(hmac, sizeof(Hmac)); #if defined(WOLFSSL_CHECK_MEM_ZERO) - wc_MemZero_Check(previous, P_HASH_MAX_SIZE); - wc_MemZero_Check(current, P_HASH_MAX_SIZE); - wc_MemZero_Check(hmac, sizeof(Hmac)); + wc_MemZero_Check(current, P_HASH_MAX_SIZE); + wc_MemZero_Check(hmac, sizeof(Hmac)); #endif #ifdef WOLFSSL_SMALL_STACK - XFREE(previous, heap, DYNAMIC_TYPE_DIGEST); - XFREE(current, heap, DYNAMIC_TYPE_DIGEST); + XFREE(current, heap, DYNAMIC_TYPE_DIGEST); XFREE(hmac, heap, DYNAMIC_TYPE_HMAC); #endif diff --git a/wolfcrypt/src/sha256.c b/wolfcrypt/src/sha256.c index 2ba9ca62d1..136369151a 100644 --- a/wolfcrypt/src/sha256.c +++ b/wolfcrypt/src/sha256.c @@ -1255,6 +1255,9 @@ static int InitSha256(wc_Sha256* sha256) { word32 S[8], t0, t1; int i; + #ifdef USE_SLOW_SHA256 + int j; + #endif word32 W[WC_SHA256_BLOCK_SIZE/sizeof(word32)]; /* Copy digest to working vars */ @@ -1268,6 +1271,16 @@ static int InitSha256(wc_Sha256* sha256) S[7] = sha256->digest[7]; i = 0; + #ifdef USE_SLOW_SHA256 + for (j = 0; j < 16; j++) { + RND1(j); + } + for (i = 16; i < 64; i += 16) { + for (j = 0; j < 16; j++) { + RNDN(j); + } + } + #else RND1( 0); RND1( 1); RND1( 2); RND1( 3); RND1( 4); RND1( 5); RND1( 6); RND1( 7); RND1( 8); RND1( 9); RND1(10); RND1(11); @@ -1279,6 +1292,7 @@ static int InitSha256(wc_Sha256* sha256) RNDN( 8); RNDN( 9); RNDN(10); RNDN(11); RNDN(12); RNDN(13); RNDN(14); RNDN(15); } + #endif /* Add the working vars back into digest */ sha256->digest[0] += S[0]; diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index 8c727d738f..9fc1d88226 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -5722,7 +5722,7 @@ int sp_cnt_lsb(const sp_int* a) unsigned int j; /* Count least significant words that are zero. */ - for (i = 0; i < a->used && a->dp[i] == 0; i++, bc += SP_WORD_SIZE) { + for (i = 0; (i < a->used) && (a->dp[i] == 0); i++, bc += SP_WORD_SIZE) { } /* Use 4-bit table to get count. */ @@ -8302,7 +8302,7 @@ int sp_rshb(const sp_int* a, int n, sp_int* r) } else { /* Move the bits down starting at least significant digit. */ - for (j = 0; i < a->used-1; i++, j++) + for (j = 0; i < (unsigned int)a->used - 1; i++, j++) r->dp[j] = (a->dp[i] >> n) | (a->dp[i+1] << (SP_WORD_SIZE - n)); /* Most significant digit has no higher digit to pull from. */ r->dp[j] = a->dp[i] >> n; @@ -8936,7 +8936,7 @@ static int _sp_mul_nxn(const sp_int* a, const sp_int* b, sp_int* r) t[0] = h; h = 0; o = 0; - for (k = 1; k <= a->used - 1; k++) { + for (k = 1; k <= (unsigned int)a->used - 1; k++) { j = (int)k; dp = a->dp; for (; j >= 0; dp++, j--) { @@ -8947,7 +8947,7 @@ static int _sp_mul_nxn(const sp_int* a, const sp_int* b, sp_int* r) h = o; o = 0; } - for (; k <= (a->used - 1) * 2; k++) { + for (; k <= ((unsigned int)a->used - 1) * 2; k++) { i = k - (b->used - 1); dp = &b->dp[b->used - 1]; for (; i < a->used; i++, dp--) { @@ -9012,7 +9012,7 @@ static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r) t[0] = h; h = 0; o = 0; - for (k = 1; k <= b->used - 1; k++) { + for (k = 1; k <= (unsigned int)b->used - 1; k++) { i = 0; j = (int)k; for (; (i < a->used) && (j >= 0); i++, j--) { @@ -9023,7 +9023,7 @@ static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r) h = o; o = 0; } - for (; k <= (a->used - 1) + (b->used - 1); k++) { + for (; k <= (unsigned int)((a->used - 1) + (b->used - 1)); k++) { j = (int)(b->used - 1); i = k - (unsigned int)j; for (; (i < a->used) && (j >= 0); i++, j--) { @@ -9092,7 +9092,7 @@ static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r) #ifdef SP_WORD_OVERFLOW o = 0; #endif - for (k = 1; k <= (a->used - 1) + (b->used - 1); k++) { + for (k = 1; k <= (unsigned int)((a->used - 1) + (b->used - 1)); k++) { i = k - (b->used - 1); i &= (((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U); j = (int)(k - i); @@ -14640,7 +14640,7 @@ static int _sp_sqr(const sp_int* a, sp_int* r) t[0] = h; h = 0; o = 0; - for (k = 1; k < (a->used + 1) / 2; k++) { + for (k = 1; k < ((unsigned int)a->used + 1) / 2; k++) { i = k; j = (int)(k - 1); for (; (j >= 0); i++, j--) { @@ -14752,7 +14752,7 @@ static int _sp_sqr(const sp_int* a, sp_int* r) #ifdef SP_WORD_OVERFLOW o = 0; #endif - for (k = 1; k <= (a->used - 1) * 2; k++) { + for (k = 1; k <= ((unsigned int)a->used - 1) * 2; k++) { i = k / 2; j = (int)(k - i); if (i == (unsigned int)j) { @@ -17148,7 +17148,7 @@ static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct) /* Adding numbers into m->used * 2 digits - zero out unused digits. */ #ifndef WOLFSSL_NO_CT_OPS if (ct) { - for (i = 0; i < m->used * 2; i++) { + for (i = 0; i < (unsigned int)m->used * 2; i++) { a->dp[i] &= (sp_int_digit) (sp_int_sdigit)ctMaskIntGTE((int)(a->used-1), (int)i); @@ -17157,7 +17157,7 @@ static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct) else #endif /* !WOLFSSL_NO_CT_OPS */ { - for (i = a->used; i < m->used * 2; i++) { + for (i = a->used; i < (unsigned int)m->used * 2; i++) { a->dp[i] = 0; } } @@ -17205,7 +17205,7 @@ static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct) a->dp[i] = (sp_int_digit)w; w >>= SP_WORD_SIZE; /* 2.4. For j = 1 up to NumDigits(m)-2 */ - for (j = 1; j < m->used - 1; j++) { + for (j = 1; j < (unsigned int)m->used - 1; j++) { /* 2.4.1 a += mu * DigitMask(m, j) */ w += a->dp[i + j]; w += (sp_int_word)mu * m->dp[j]; @@ -17276,7 +17276,7 @@ static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct) #ifndef WOLFSSL_NO_CT_OPS if (ct) { - for (i = 0; i < m->used * 2; i++) { + for (i = 0; i < (unsigned int)m->used * 2; i++) { a->dp[i] &= (sp_int_digit) (sp_int_sdigit)ctMaskIntGTE((int)(a->used-1), (int)i); @@ -17285,7 +17285,7 @@ static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct) else #endif { - for (i = a->used; i < m->used * 2; i++) { + for (i = a->used; i < (unsigned int)m->used * 2; i++) { a->dp[i] = 0; } } @@ -17456,7 +17456,7 @@ static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct) h = 0; SP_ASM_MUL_ADD_NO(l, h, mu, *(md++)); l = h; - for (j = 1; j + 1 < m->used - 1; j += 2) { + for (j = 1; j + 1 < (unsigned int)m->used - 1; j += 2) { h = 0; SP_ASM_ADDC(l, h, ad[j]); SP_ASM_MUL_ADD_NO(l, h, mu, *(md++)); @@ -17466,7 +17466,7 @@ static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct) SP_ASM_MUL_ADD_NO(h, l, mu, *(md++)); ad[j] = h; } - for (; j < m->used - 1; j++) { + for (; j < (unsigned int)m->used - 1; j++) { h = 0; SP_ASM_ADDC(l, h, ad[j]); SP_ASM_MUL_ADD_NO(l, h, mu, *(md++)); @@ -17517,7 +17517,7 @@ static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct) /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */ mu = mp * ad[0]; /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */ - if ((i == m->used - 1) && (mask != 0)) { + if ((i == (unsigned int)m->used - 1) && (mask != 0)) { mu &= mask; } @@ -17528,7 +17528,7 @@ static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct) ad[0] = l; l = h; /* 2.4. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */ - for (j = 1; j + 1 < m->used - 1; j += 2) { + for (j = 1; j + 1 < (unsigned int)m->used - 1; j += 2) { h = 0; /* 2.4.1. a += mu * DigitMask(m, j) */ SP_ASM_ADDC(l, h, ad[j + 0]); @@ -17540,7 +17540,7 @@ static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp, int ct) SP_ASM_MUL_ADD_NO(h, l, mu, *(md++)); ad[j + 1] = h; } - for (; j < m->used - 1; j++) { + for (; j < (unsigned int)m->used - 1; j++) { h = 0; /* 2.4.1. a += mu * DigitMask(m, j) */ SP_ASM_ADDC(l, h, ad[j]); @@ -17930,7 +17930,7 @@ int sp_to_unsigned_bin_len(const sp_int* a, byte* out, int outSz) d >>= 8; /* Stop if the output buffer is filled. */ if (j < 0) { - if ((i < a->used - 1) || (d > 0)) { + if ((i < (unsigned int)a->used - 1) || (d > 0)) { err = MP_VAL; } break; @@ -18004,7 +18004,7 @@ int sp_to_unsigned_bin_len_ct(const sp_int* a, byte* out, int outSz) out[j--] = (byte)(d & mask); d >>= 8; } - mask &= (sp_int_digit)0 - (i < a->used - 1); + mask &= (sp_int_digit)0 - (i < (unsigned int)a->used - 1); i += (unsigned int)(1 & mask); } } @@ -18020,7 +18020,7 @@ int sp_to_unsigned_bin_len_ct(const sp_int* a, byte* out, int outSz) i = 0; for (j = outSz - 1; j >= 0; j--) { out[j] = a->dp[i] & mask; - mask &= (sp_int_digit)0 - (i < a->used - 1); + mask &= (sp_int_digit)0 - (i < (unsigned int)a->used - 1); i += (unsigned int)(1 & mask); } } diff --git a/wolfssl/wolfcrypt/sp_int.h b/wolfssl/wolfcrypt/sp_int.h index 626af9e0b7..24f188cb96 100644 --- a/wolfssl/wolfcrypt/sp_int.h +++ b/wolfssl/wolfcrypt/sp_int.h @@ -872,13 +872,20 @@ while (0) * dp at end so user can allocate a smaller amount and set size. */ typedef struct sp_int { +#if SP_INT_DIGITS < 16384 + /** Number of words that contain data. */ + word16 used; + /** Maximum number of words in data. */ + word16 size; +#else /** Number of words that contain data. */ unsigned int used; /** Maximum number of words in data. */ unsigned int size; +#endif #ifdef WOLFSSL_SP_INT_NEGATIVE /** Indicates whether number is 0/positive or negative. */ - unsigned int sign; + sp_uint8 sign; #endif #ifdef HAVE_WOLF_BIGINT /** Unsigned binary (big endian) representation of number. */ @@ -889,12 +896,23 @@ typedef struct sp_int { } sp_int; typedef struct sp_int_minimal { +#if SP_INT_DIGITS < 16384 + /** Number of words that contain data. */ + word16 used; + /** Maximum number of words in data. */ + word16 size; +#else + /** Number of words that contain data. */ unsigned int used; + /** Maximum number of words in data. */ unsigned int size; +#endif #ifdef WOLFSSL_SP_INT_NEGATIVE - unsigned int sign; + /** Indicates whether number is 0/positive or negative. */ + sp_uint8 sign; #endif #ifdef HAVE_WOLF_BIGINT + /** Unsigned binary (big endian) representation of number. */ struct WC_BIGINT raw; #endif /** First digit of number. */