From 249537ea8654405f3d581d2ead90a8b8df84fe8a Mon Sep 17 00:00:00 2001 From: Aido Date: Wed, 24 Jan 2024 14:25:04 +0000 Subject: [PATCH] Implement Montgomery multiplication Fixes #362 --- src/bolos/cx_bn.c | 130 ++++++++++++++++++++++++++++++++++++++ src/bolos/cxlib.h | 16 +++++ src/emulate_lnsp_1.0.c | 15 +++++ src/emulate_unified_sdk.c | 30 +++++++++ 4 files changed, 191 insertions(+) diff --git a/src/bolos/cx_bn.c b/src/bolos/cx_bn.c index 99205844..71d24c4c 100644 --- a/src/bolos/cx_bn.c +++ b/src/bolos/cx_bn.c @@ -499,3 +499,133 @@ cx_err_t sys_cx_bn_gf2_n_mul(cx_bn_t bn_r, const cx_bn_t bn_a, end: return error; } + +/* ========================================================================= */ +/* === MONTGOMERY MODULAR ARITHMETIC === */ +/* ========================================================================= */ + +#define _2POWB \ + { \ + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x00, 0x00, 0x00, 0x00 \ + } /*2^128*/ +#define _2POWBm1 \ + { \ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, \ + 0xff, 0xff, 0xff, 0xff \ + } /* 2^128-1*/ + +cx_err_t sys_cx_mont_alloc(cx_bn_mont_ctx_t *ctx, size_t length) +{ + cx_err_t error; + + CX_CHECK(sys_cx_bn_alloc(&ctx->n, length)); + CX_CHECK(sys_cx_bn_alloc(&ctx->h, length)); + +end: + return error; +} + +cx_err_t sys_cx_mont_init(cx_bn_mont_ctx_t *ctx, const cx_bn_t n) +{ + cx_err_t error; + size_t sizen; + uint8_t tu8_basis[] = _2POWB; + uint8_t tu8_basism1[] = _2POWBm1; + cx_bn_t basis, temp; + + CX_CHECK(sys_cx_bn_nbytes(n, &sizen)); + CX_CHECK(sys_cx_bn_alloc_init(&basis, sizen, tu8_basis, + (size_t)sizeof(tu8_basis))); + CX_CHECK(sys_cx_bn_alloc(&temp, sizen)); + + /* copy modulus*/ + CX_CHECK(sys_cx_bn_copy((ctx->n), n)); + /* -p^-1 mod 2^sizeword*/ + CX_CHECK(sys_cx_bn_reduce(temp, n, basis)); + + CX_CHECK(sys_cx_bn_mod_pow(ctx->h, temp, tu8_basism1, sizeof(tu8_basism1), + basis)); /*1/P mod 2^n*/ + + /* 2^bitsizeof(n) mod n */ + CX_CHECK(sys_cx_bn_xor(basis, basis, basis)); /* zero*/ + CX_CHECK(sys_cx_bn_set_bit( + basis, + (sizen << 3) - 1)); /*2^(sizeofp)-1 to fit in memory before reduction*/ + CX_CHECK(sys_cx_bn_mod_add(temp, basis, basis, n)); /* 2^(bitsize(p))*/ + CX_CHECK(sys_cx_bn_mod_mul(ctx->h, temp, temp, n)); /* 2^(bitsize(p))^2*/ + + sys_cx_bn_destroy(&temp); + sys_cx_bn_destroy(&basis); + +end: + return error; +} + +cx_err_t sys_cx_mont_init2(cx_bn_mont_ctx_t *ctx, const cx_bn_t n, + const cx_bn_t h) +{ + cx_err_t error = CX_OK; // By default, until some error occurs + + CX_CHECK(sys_cx_bn_copy((ctx->n), n)); + CX_CHECK(sys_cx_bn_copy((ctx->h), h)); + +end: + return error; +} + +/* a horrible emulation of cx_mont_mul, not present in speculos, compute aR*bR=abR mod p*/ +/* todo: integrate some decent Montgomery implementation (maybe OpenSSL BN_mod_mul_montgomery()) */ +cx_err_t sys_cx_mont_mul(cx_bn_t r, const cx_bn_t a, const cx_bn_t b, + const cx_bn_mont_ctx_t *ctx) +{ + cx_err_t error = CX_OK; // By default, until some error occurs + size_t field; + + cx_bn_t temp; + + + CX_CHECK(sys_cx_bn_nbytes(ctx->n, &field)); + + CX_CHECK(sys_cx_bn_alloc(&temp, field)); + + CX_CHECK(sys_cx_bn_mod_invert_nprime( temp, ctx->h, ctx->n));/* R^-1 (yes an inversion to emulate a mul, god forgive me)*/ + + CX_CHECK(sys_cx_bn_mod_mul(temp, a, temp, ctx->n)); + CX_CHECK(sys_cx_bn_mod_mul(r, b, temp, ctx->n)); + + sys_cx_bn_destroy(&temp); + +end: + return error; +} + +cx_err_t sys_cx_mont_to_montgomery(cx_bn_t x, const cx_bn_t z, + const cx_bn_mont_ctx_t *ctx) +{ + cx_err_t error = CX_OK; // By default, until some error occurs + + CX_CHECK(sys_cx_bn_mod_mul(x, ctx->h, z, ctx->n)); + +end: + return error; +} + +cx_err_t sys_cx_mont_from_montgomery(cx_bn_t z, const cx_bn_t x, + const cx_bn_mont_ctx_t *ctx) +{ + cx_err_t error = CX_OK; // By default, until some error occurs + cx_bn_t temp; + size_t field; + + CX_CHECK(sys_cx_bn_nbytes(ctx->h, &field)); + CX_CHECK(sys_cx_bn_alloc(&temp, field)); + + CX_CHECK(sys_cx_bn_set_u32(temp, 1)); + CX_CHECK(sys_cx_mont_mul(z, temp, x, ctx)); + + CX_CHECK(sys_cx_bn_destroy(&temp)); + +end: + return error; +} diff --git a/src/bolos/cxlib.h b/src/bolos/cxlib.h index 747d2272..784e8615 100644 --- a/src/bolos/cxlib.h +++ b/src/bolos/cxlib.h @@ -66,6 +66,12 @@ typedef struct cx_mpi_ecpoint_s { } cx_mpi_ecpoint_t; +// Montgomery context. +typedef struct { + cx_bn_t n; // Modulus + cx_bn_t h; // Second Montgomery constant +} cx_bn_mont_ctx_t; + //----------------------------------------------------------------------------- // Prototypes //----------------------------------------------------------------------------- @@ -201,6 +207,16 @@ cx_err_t sys_cx_bn_next_prime(const cx_bn_t bn_x); cx_err_t sys_cx_bn_gf2_n_mul(cx_bn_t bn_r, const cx_bn_t bn_a, const cx_bn_t bn_b, const cx_bn_t bn_n, const cx_bn_t bn_h); +cx_err_t sys_cx_mont_alloc(cx_bn_mont_ctx_t *ctx, size_t length); +cx_err_t sys_cx_mont_init(cx_bn_mont_ctx_t *ctx, const cx_bn_t n); +cx_err_t sys_cx_mont_init2(cx_bn_mont_ctx_t *ctx, const cx_bn_t n, + const cx_bn_t h); +cx_err_t sys_cx_mont_to_montgomery(cx_bn_t x, const cx_bn_t z, + const cx_bn_mont_ctx_t *ctx); +cx_err_t sys_cx_mont_from_montgomery(cx_bn_t z, const cx_bn_t x, + const cx_bn_mont_ctx_t *ctx); +cx_err_t sys_cx_mont_mul(cx_bn_t r, const cx_bn_t a, const cx_bn_t b, + const cx_bn_mont_ctx_t *ctx); // cx_ecdomain.c int cx_nid_from_curve(cx_curve_t curve); diff --git a/src/emulate_lnsp_1.0.c b/src/emulate_lnsp_1.0.c index daead103..c780eb02 100644 --- a/src/emulate_lnsp_1.0.c +++ b/src/emulate_lnsp_1.0.c @@ -269,6 +269,21 @@ int emulate_nanosp_1_0(unsigned long syscall, unsigned long *parameters, SYSCALL5(cx_bn_gf2_n_mul, "(%u, %u, %u, %u, %u)", uint32_t, r, uint32_t, a, uint32_t, b, uint32_t, n, uint32_t, h); + SYSCALL2(cx_mont_alloc, "(%p, %u)", void *, ctx, size_t, length); + + SYSCALL2(cx_mont_init, "(%p, %u)", void *, ctx, uint32_t, n); + + SYSCALL3(cx_mont_init2, "(%p, %u, %u)", void *, ctx, uint32_t, n, uint32_t, h); + + SYSCALL4(cx_mont_mul, "(%u, %u, %u, %p)", uint32_t, r, uint32_t, a, uint32_t, b, + void *, ctx); + + SYSCALL3(cx_mont_to_montgomery, "(%u, %u, %p)", uint32_t, x, uint32_t, z, + void *, ctx); + + SYSCALL3(cx_mont_from_montgomery, "(%u, %u, %p)", uint32_t, z, uint32_t, x, + void *, ctx); + // SYSCALLs that may exists on other SDK versions, but with a different ID: SYSCALL0i(os_perso_isonboarded, os_perso_isonboarded_2_0); diff --git a/src/emulate_unified_sdk.c b/src/emulate_unified_sdk.c index b63e00e0..a8793151 100644 --- a/src/emulate_unified_sdk.c +++ b/src/emulate_unified_sdk.c @@ -542,6 +542,36 @@ int emulate_syscall_cx(unsigned long syscall, unsigned long *parameters, uint32_t, n, uint32_t, h); + SYSCALL2(cx_mont_alloc, "(%p, %u)", + void *, ctx, + size_t, length); + + + SYSCALL2(cx_mont_init, "(%p, %u)", + void *, ctx, + uint32_t, n); + + SYSCALL3(cx_mont_init2, "(%p, %u, %u)", + void *, ctx, + uint32_t, n, + uint32_t, h); + + SYSCALL4(cx_mont_mul, "(%u, %u, %u, %p)", + uint32_t, r, + uint32_t, a, + uint32_t, b, + void *, ctx); + + SYSCALL3(cx_mont_to_montgomery, "(%u, %u, %p)", + uint32_t, x, + uint32_t, z, + void *, ctx); + + SYSCALL3(cx_mont_from_montgomery, "(%u, %u, %p)", + uint32_t, z, + uint32_t, x, + void *, ctx); + SYSCALL10(cx_bls12381_key_gen, "(%u, %p, %u, %p, %u, %p, %u, %p, %p, %u)", uint8_t, mode, uint8_t *, secret,