From 8734f1251d7f509ed951d45f7c7d9cb6f06cc21b Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 20 Jun 2024 11:46:00 +1000 Subject: [PATCH] SHA-3 Thumb2, ARM32 ASM: Add assembly implemention Add SHA-3 assembly implementation for Thumb2 and ARM32. --- src/include.am | 27 + wolfcrypt/src/port/arm/armv8-32-aes-asm.S | 4 +- wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c | 8 +- wolfcrypt/src/port/arm/armv8-32-curve25519.S | 4 +- .../src/port/arm/armv8-32-curve25519_c.c | 8 +- wolfcrypt/src/port/arm/armv8-32-sha256-asm.S | 4 +- .../src/port/arm/armv8-32-sha256-asm_c.c | 8 +- wolfcrypt/src/port/arm/armv8-32-sha3-asm.S | 2400 +++++++++++++++++ wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c | 2356 ++++++++++++++++ wolfcrypt/src/port/arm/armv8-32-sha512-asm.S | 4 +- .../src/port/arm/armv8-32-sha512-asm_c.c | 8 +- wolfcrypt/src/port/arm/thumb2-sha3-asm.S | 1174 ++++++++ wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c | 1170 ++++++++ wolfcrypt/src/port/arm/thumb2-sha512-asm.S | 2 +- wolfcrypt/src/sha3.c | 3 +- wolfssl/wolfcrypt/sha3.h | 3 +- 16 files changed, 7156 insertions(+), 27 deletions(-) create mode 100644 wolfcrypt/src/port/arm/armv8-32-sha3-asm.S create mode 100644 wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c create mode 100644 wolfcrypt/src/port/arm/thumb2-sha3-asm.S create mode 100644 wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c diff --git a/src/include.am b/src/include.am index 28677d23ef..1679e3b567 100644 --- a/src/include.am +++ b/src/include.am @@ -269,6 +269,15 @@ else src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha3-asm.S endif !BUILD_ARMASM_INLINE endif BUILD_ARMASM_NEON +if BUILD_ARMASM +if BUILD_ARMASM_INLINE +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c +else +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha3-asm.S +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha3-asm.S +endif !BUILD_ARMASM_INLINE +endif BUILD_ARMASM if !BUILD_X86_ASM if BUILD_INTELASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha3_asm.S @@ -413,6 +422,15 @@ else src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha3-asm.S endif !BUILD_ARMASM_INLINE endif BUILD_ARMASM_NEON +if BUILD_ARMASM +if BUILD_ARMASM_INLINE +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c +else +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha3-asm.S +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha3-asm.S +endif !BUILD_ARMASM_INLINE +endif BUILD_ARMASM if BUILD_INTELASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha3_asm.S endif @@ -748,6 +766,15 @@ else src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-sha3-asm.S endif !BUILD_ARMASM_INLINE endif BUILD_ARMASM_NEON +if BUILD_ARMASM +if BUILD_ARMASM_INLINE +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c +else +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/armv8-32-sha3-asm.S +src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/port/arm/thumb2-sha3-asm.S +endif !BUILD_ARMASM_INLINE +endif BUILD_ARMASM if !BUILD_X86_ASM if BUILD_INTELASM src_libwolfssl@LIBSUFFIX@_la_SOURCES += wolfcrypt/src/sha3_asm.S diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S index 12578411f6..99812b3dbf 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S @@ -30,7 +30,7 @@ #include #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #ifndef WOLFSSL_ARMASM_INLINE #ifndef NO_AES #ifdef HAVE_AES_DECRYPT @@ -5304,7 +5304,7 @@ L_AES_GCM_encrypt_end: .size AES_GCM_encrypt,.-AES_GCM_encrypt #endif /* HAVE_AESGCM */ #endif /* !NO_AES */ -#endif /* !__aarch64__ && !__thumb__ */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index fbc60fbdfb..daaf1235d6 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -31,7 +31,7 @@ #include #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #include #ifdef HAVE_CONFIG_H #include @@ -41,7 +41,7 @@ #ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm @@ -4786,9 +4786,9 @@ void AES_GCM_encrypt(const unsigned char* in_p, unsigned char* out_p, unsigned l #endif /* HAVE_AESGCM */ #endif /* !NO_AES */ -#endif /* !__aarch64__ && !__thumb__ */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ #endif /* WOLFSSL_ARMASM */ -#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519.S b/wolfcrypt/src/port/arm/armv8-32-curve25519.S index 52cdcf41a7..a09996238d 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519.S +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519.S @@ -30,7 +30,7 @@ #include #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #ifndef WOLFSSL_ARMASM_INLINE #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519) #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) @@ -8771,7 +8771,7 @@ sc_muladd: #endif /* !CURVE25519_SMALL || !ED25519_SMALL */ #endif /* HAVE_CURVE25519 || HAVE_ED25519 */ -#endif /* !__aarch64__ && !__thumb__ */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index 8981d4f0e3..2b97581d82 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -31,7 +31,7 @@ #include #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #include #ifdef HAVE_CONFIG_H #include @@ -41,7 +41,7 @@ #ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm @@ -8995,9 +8995,9 @@ void sc_muladd(byte* s_p, const byte* a_p, const byte* b_p, const byte* c_p) #endif /* !CURVE25519_SMALL || !ED25519_SMALL */ #endif /* HAVE_CURVE25519 || HAVE_ED25519 */ -#endif /* !__aarch64__ && !__thumb__ */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ #endif /* WOLFSSL_ARMASM */ -#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S index 65b4757737..381cd25584 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm.S @@ -30,7 +30,7 @@ #include #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #ifndef WOLFSSL_ARMASM_INLINE #ifndef NO_SHA256 #ifdef WOLFSSL_ARMASM_NO_NEON @@ -2865,7 +2865,7 @@ L_SHA256_transform_neon_len_start: .size Transform_Sha256_Len,.-Transform_Sha256_Len #endif /* WOLFSSL_ARMASM_NO_NEON */ #endif /* !NO_SHA256 */ -#endif /* !__aarch64__ && !__thumb__ */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) diff --git a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c index 3a5e200e6e..a10241f814 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha256-asm_c.c @@ -31,7 +31,7 @@ #include #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #include #ifdef HAVE_CONFIG_H #include @@ -41,7 +41,7 @@ #ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm @@ -2802,9 +2802,9 @@ void Transform_Sha256_Len(wc_Sha256* sha256_p, const byte* data_p, word32 len_p) #endif /* WOLFSSL_ARMASM_NO_NEON */ #endif /* !NO_SHA256 */ -#endif /* !__aarch64__ && !__thumb__ */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ #endif /* WOLFSSL_ARMASM */ -#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S new file mode 100644 index 0000000000..f667eb6e43 --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S @@ -0,0 +1,2400 @@ +/* armv8-32-sha3-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha3/sha3.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) +#ifndef WOLFSSL_ARMASM_INLINE + .text + .type L_sha3_arm2_neon_rt, %object + .size L_sha3_arm2_neon_rt, 192 + .align 4 +L_sha3_arm2_neon_rt: + .word 0x1 + .word 0x0 + .word 0x8082 + .word 0x0 + .word 0x808a + .word 0x80000000 + .word 0x80008000 + .word 0x80000000 + .word 0x808b + .word 0x0 + .word 0x80000001 + .word 0x0 + .word 0x80008081 + .word 0x80000000 + .word 0x8009 + .word 0x80000000 + .word 0x8a + .word 0x0 + .word 0x88 + .word 0x0 + .word 0x80008009 + .word 0x0 + .word 0x8000000a + .word 0x0 + .word 0x8000808b + .word 0x0 + .word 0x8b + .word 0x80000000 + .word 0x8089 + .word 0x80000000 + .word 0x8003 + .word 0x80000000 + .word 0x8002 + .word 0x80000000 + .word 0x80 + .word 0x80000000 + .word 0x800a + .word 0x0 + .word 0x8000000a + .word 0x80000000 + .word 0x80008081 + .word 0x80000000 + .word 0x8080 + .word 0x80000000 + .word 0x80000001 + .word 0x0 + .word 0x80008008 + .word 0x80000000 + .text + .type L_sha3_arm2_rt, %object + .size L_sha3_arm2_rt, 192 + .align 4 +L_sha3_arm2_rt: + .word 0x1 + .word 0x0 + .word 0x8082 + .word 0x0 + .word 0x808a + .word 0x80000000 + .word 0x80008000 + .word 0x80000000 + .word 0x808b + .word 0x0 + .word 0x80000001 + .word 0x0 + .word 0x80008081 + .word 0x80000000 + .word 0x8009 + .word 0x80000000 + .word 0x8a + .word 0x0 + .word 0x88 + .word 0x0 + .word 0x80008009 + .word 0x0 + .word 0x8000000a + .word 0x0 + .word 0x8000808b + .word 0x0 + .word 0x8b + .word 0x80000000 + .word 0x8089 + .word 0x80000000 + .word 0x8003 + .word 0x80000000 + .word 0x8002 + .word 0x80000000 + .word 0x80 + .word 0x80000000 + .word 0x800a + .word 0x0 + .word 0x8000000a + .word 0x80000000 + .word 0x80008081 + .word 0x80000000 + .word 0x8080 + .word 0x80000000 + .word 0x80000001 + .word 0x0 + .word 0x80008008 + .word 0x80000000 +#ifndef WOLFSSL_ARMASM_NO_NEON + .text + .align 4 + .globl BlockSha3 + .type BlockSha3, %function +BlockSha3: + vpush {d8-d15} + sub sp, sp, #16 + adr r1, L_sha3_arm2_neon_rt + mov r2, #24 + mov r3, sp + vld1.8 {d0-d3}, [r0]! + vld1.8 {d4-d7}, [r0]! + vld1.8 {d8-d11}, [r0]! + vld1.8 {d12-d15}, [r0]! + vld1.8 {d16-d19}, [r0]! + vld1.8 {d20-d23}, [r0]! + vld1.8 {d24}, [r0] + sub r0, r0, #0xc0 +L_sha3_arm32_neon_begin: + # Calc b[0..4] + veor d26, d0, d5 + veor d27, d1, d6 + veor d28, d2, d7 + veor d29, d3, d8 + veor d25, d4, d9 + veor d26, d26, d10 + veor d27, d27, d11 + veor d28, d28, d12 + veor d29, d29, d13 + veor d25, d25, d14 + veor d26, d26, d15 + veor d27, d27, d16 + veor d28, d28, d17 + veor d29, d29, d18 + veor d25, d25, d19 + veor d26, d26, d20 + veor d27, d27, d21 + veor d28, d28, d22 + veor d29, d29, d23 + veor d25, d25, d24 + vst1.8 {d25, d26}, [r3] + # Calc t[0..4] and XOR into s[i*5..i*5+4] + # t[0] + vshr.u64 d30, d27, #63 + vshl.u64 d31, d27, #1 + veor d25, d25, d30 + veor d25, d25, d31 + # t[1] + vshr.u64 d30, d28, #63 + vshl.u64 d31, d28, #1 + veor d26, d26, d30 + veor d26, d26, d31 + # t[2] + vshr.u64 d30, d29, #63 + vshl.u64 d31, d29, #1 + veor d27, d27, d30 + veor d27, d27, d31 + # t[3] + vldr.8 d31, [r3] + vshr.u64 d30, d31, #63 + vshl.u64 d31, d31, #1 + veor d28, d28, d30 + veor d28, d28, d31 + # t[4] + vldr.8 d31, [r3, #8] + vshr.u64 d30, d31, #63 + vshl.u64 d31, d31, #1 + veor d29, d29, d30 + veor d29, d29, d31 + sub r3, r3, #16 + veor d0, d0, d25 + # s[1] => s[10] (tmp) + veor d30, d1, d26 + vshr.u64 d31, d30, #63 + vshl.u64 d30, d30, #1 + veor d30, d30, d31 + # s[6] => s[1] + veor d1, d6, d26 + vshr.u64 d31, d1, #20 + vshl.u64 d1, d1, #44 + veor d1, d1, d31 + # s[9] => s[6] + veor d6, d9, d29 + vshr.u64 d31, d6, #44 + vshl.u64 d6, d6, #20 + veor d6, d6, d31 + # s[22] => s[9] + veor d9, d22, d27 + vshr.u64 d31, d9, #3 + vshl.u64 d9, d9, #61 + veor d9, d9, d31 + # s[14] => s[22] + veor d22, d14, d29 + vshr.u64 d31, d22, #25 + vshl.u64 d22, d22, #39 + veor d22, d22, d31 + # s[20] => s[14] + veor d14, d20, d25 + vshr.u64 d31, d14, #46 + vshl.u64 d14, d14, #18 + veor d14, d14, d31 + # s[2] => s[20] + veor d20, d2, d27 + vshr.u64 d31, d20, #2 + vshl.u64 d20, d20, #62 + veor d20, d20, d31 + # s[12] => s[2] + veor d2, d12, d27 + vshr.u64 d31, d2, #21 + vshl.u64 d2, d2, #43 + veor d2, d2, d31 + # s[13] => s[12] + veor d12, d13, d28 + vshr.u64 d31, d12, #39 + vshl.u64 d12, d12, #25 + veor d12, d12, d31 + # s[19] => s[13] + veor d13, d19, d29 + vshr.u64 d31, d13, #56 + vshl.u64 d13, d13, #8 + veor d13, d13, d31 + # s[23] => s[19] + veor d19, d23, d28 + vshr.u64 d31, d19, #8 + vshl.u64 d19, d19, #56 + veor d19, d19, d31 + # s[15] => s[23] + veor d23, d15, d25 + vshr.u64 d31, d23, #23 + vshl.u64 d23, d23, #41 + veor d23, d23, d31 + # s[4] => s[15] + veor d15, d4, d29 + vshr.u64 d31, d15, #37 + vshl.u64 d15, d15, #27 + veor d15, d15, d31 + # s[24] => s[4] + veor d4, d24, d29 + vshr.u64 d31, d4, #50 + vshl.u64 d4, d4, #14 + veor d4, d4, d31 + # s[21] => s[24] + veor d24, d21, d26 + vshr.u64 d31, d24, #62 + vshl.u64 d24, d24, #2 + veor d24, d24, d31 + # s[8] => s[21] + veor d21, d8, d28 + vshr.u64 d31, d21, #9 + vshl.u64 d21, d21, #55 + veor d21, d21, d31 + # s[16] => s[8] + veor d8, d16, d26 + vshr.u64 d31, d8, #19 + vshl.u64 d8, d8, #45 + veor d8, d8, d31 + # s[5] => s[16] + veor d16, d5, d25 + vshr.u64 d31, d16, #28 + vshl.u64 d16, d16, #36 + veor d16, d16, d31 + # s[3] => s[5] + veor d5, d3, d28 + vshr.u64 d31, d5, #36 + vshl.u64 d5, d5, #28 + veor d5, d5, d31 + # s[18] => s[3] + veor d3, d18, d28 + vshr.u64 d31, d3, #43 + vshl.u64 d3, d3, #21 + veor d3, d3, d31 + # s[17] => s[18] + veor d18, d17, d27 + vshr.u64 d31, d18, #49 + vshl.u64 d18, d18, #15 + veor d18, d18, d31 + # s[11] => s[17] + veor d17, d11, d26 + vshr.u64 d31, d17, #54 + vshl.u64 d17, d17, #10 + veor d17, d17, d31 + # s[7] => s[11] + veor d11, d7, d27 + vshr.u64 d31, d11, #58 + vshl.u64 d11, d11, #6 + veor d11, d11, d31 + # s[10] => s[7] + veor d7, d10, d25 + vshr.u64 d31, d7, #61 + vshl.u64 d7, d7, #3 + veor d7, d7, d31 + # Row Mix + vmov d25, d0 + vmov d26, d1 + vbic d31, d2, d26 + veor d0, d25, d31 + vbic d31, d3, d2 + veor d1, d26, d31 + vbic d31, d4, d3 + veor d2, d2, d31 + vbic d31, d25, d4 + veor d3, d3, d31 + vbic d31, d26, d25 + veor d4, d4, d31 + vmov d25, d5 + vmov d26, d6 + vbic d31, d7, d26 + veor d5, d25, d31 + vbic d31, d8, d7 + veor d6, d26, d31 + vbic d31, d9, d8 + veor d7, d7, d31 + vbic d31, d25, d9 + veor d8, d8, d31 + vbic d31, d26, d25 + veor d9, d9, d31 + vmov d26, d11 + vbic d31, d12, d26 + veor d10, d30, d31 + vbic d31, d13, d12 + veor d11, d26, d31 + vbic d31, d14, d13 + veor d12, d12, d31 + vbic d31, d30, d14 + veor d13, d13, d31 + vbic d31, d26, d30 + veor d14, d14, d31 + vmov d25, d15 + vmov d26, d16 + vbic d31, d17, d26 + veor d15, d25, d31 + vbic d31, d18, d17 + veor d16, d26, d31 + vbic d31, d19, d18 + veor d17, d17, d31 + vbic d31, d25, d19 + veor d18, d18, d31 + vbic d31, d26, d25 + veor d19, d19, d31 + vmov d25, d20 + vmov d26, d21 + vbic d31, d22, d26 + veor d20, d25, d31 + vbic d31, d23, d22 + veor d21, d26, d31 + vbic d31, d24, d23 + veor d22, d22, d31 + vbic d31, d25, d24 + veor d23, d23, d31 + vbic d31, d26, d25 + veor d24, d24, d31 + vld1.8 {d30}, [r1]! + subs r2, r2, #1 + veor d0, d0, d30 + bne L_sha3_arm32_neon_begin + vst1.8 {d0-d3}, [r0]! + vst1.8 {d4-d7}, [r0]! + vst1.8 {d8-d11}, [r0]! + vst1.8 {d12-d15}, [r0]! + vst1.8 {d16-d19}, [r0]! + vst1.8 {d20-d23}, [r0]! + vst1.8 {d24}, [r0] + add sp, sp, #16 + vpop {d8-d15} + bx lr + .size BlockSha3,.-BlockSha3 +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#ifdef WOLFSSL_ARMASM_NO_NEON + .text + .align 4 + .globl BlockSha3 + .type BlockSha3, %function +BlockSha3: + push {r4, r5, r6, r7, r8, r9, r10, r11, lr} + sub sp, sp, #0xcc + adr r1, L_sha3_arm2_rt + mov r2, #12 +L_sha3_arm32_begin: + str r2, [sp, #200] + # Round even + # Calc b[4] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #40] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #72] + ldr r7, [r0, #80] +#else + ldrd r6, r7, [r0, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #112] + ldr r9, [r0, #120] +#else + ldrd r8, r9, [r0, #112] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #152] + ldr r11, [r0, #160] +#else + ldrd r10, r11, [r0, #152] +#endif + ldr r12, [r0, #192] + ldr lr, [r0, #196] + eor r2, r4, r6 + eor r3, r5, r7 + eor r2, r2, r8 + eor r3, r3, r9 + eor r2, r2, r10 + eor r3, r3, r11 + eor r2, r2, r12 + eor r3, r3, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r2, [sp, #32] + str r3, [sp, #40] +#else + strd r2, r3, [sp, #32] +#endif + # Calc b[1] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #16] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #56] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #88] + ldr r9, [r0, #96] +#else + ldrd r8, r9, [r0, #88] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #128] + ldr r11, [r0, #136] +#else + ldrd r10, r11, [r0, #128] +#endif + ldr r12, [r0, #168] + ldr lr, [r0, #172] + eor r4, r4, r6 + eor r5, r5, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r4, r4, r10 + eor r5, r5, r11 + eor r4, r4, r12 + eor r5, r5, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #8] + str r5, [sp, #16] +#else + strd r4, r5, [sp, #8] +#endif + # Calc t[0] + eor r2, r2, r5, lsr #31 + eor r3, r3, r4, lsr #31 + eor r2, r2, r4, lsl #1 + eor r3, r3, r5, lsl #1 + # Calc b[0] and XOR t[0] into s[x*5+0] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #8] +#else + ldrd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #40] + ldr r7, [r0, #48] +#else + ldrd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #80] + ldr r9, [r0, #88] +#else + ldrd r8, r9, [r0, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #120] + ldr r11, [r0, #128] +#else + ldrd r10, r11, [r0, #120] +#endif + eor r12, r4, r6 + eor lr, r5, r7 + eor r12, r12, r8 + eor lr, lr, r9 + eor r12, r12, r10 + eor lr, lr, r11 + eor r4, r4, r2 + eor r5, r5, r3 + eor r6, r6, r2 + eor r7, r7, r3 + eor r8, r8, r2 + eor r9, r9, r3 + eor r10, r10, r2 + eor r11, r11, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0] + str r5, [r0, #8] +#else + strd r4, r5, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #40] + str r7, [r0, #48] +#else + strd r6, r7, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #80] + str r9, [r0, #88] +#else + strd r8, r9, [r0, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [r0, #120] + str r11, [r0, #128] +#else + strd r10, r11, [r0, #120] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #160] + ldr r11, [r0, #168] +#else + ldrd r10, r11, [r0, #160] +#endif + eor r12, r12, r10 + eor lr, lr, r11 + eor r10, r10, r2 + eor r11, r11, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [r0, #160] + str r11, [r0, #168] +#else + strd r10, r11, [r0, #160] +#endif + str r12, [sp] + str lr, [sp, #4] + # Calc b[3] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #32] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #64] + ldr r7, [r0, #72] +#else + ldrd r6, r7, [r0, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #104] + ldr r9, [r0, #112] +#else + ldrd r8, r9, [r0, #104] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #144] + ldr r11, [r0, #152] +#else + ldrd r10, r11, [r0, #144] +#endif + ldr r12, [r0, #184] + ldr lr, [r0, #188] + eor r4, r4, r6 + eor r5, r5, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r4, r4, r10 + eor r5, r5, r11 + eor r4, r4, r12 + eor r5, r5, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #24] + str r5, [sp, #32] +#else + strd r4, r5, [sp, #24] +#endif + # Calc t[2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp, #8] + ldr r3, [sp, #16] +#else + ldrd r2, r3, [sp, #8] +#endif + eor r2, r2, r5, lsr #31 + eor r3, r3, r4, lsr #31 + eor r2, r2, r4, lsl #1 + eor r3, r3, r5, lsl #1 + # Calc b[2] and XOR t[2] into s[x*5+2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #16] + ldr r5, [r0, #24] +#else + ldrd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #56] + ldr r7, [r0, #64] +#else + ldrd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #96] + ldr r9, [r0, #104] +#else + ldrd r8, r9, [r0, #96] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #136] + ldr r11, [r0, #144] +#else + ldrd r10, r11, [r0, #136] +#endif + eor r12, r4, r6 + eor lr, r5, r7 + eor r12, r12, r8 + eor lr, lr, r9 + eor r12, r12, r10 + eor lr, lr, r11 + eor r4, r4, r2 + eor r5, r5, r3 + eor r6, r6, r2 + eor r7, r7, r3 + eor r8, r8, r2 + eor r9, r9, r3 + eor r10, r10, r2 + eor r11, r11, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #16] + str r5, [r0, #24] +#else + strd r4, r5, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #56] + str r7, [r0, #64] +#else + strd r6, r7, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #96] + str r9, [r0, #104] +#else + strd r8, r9, [r0, #96] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [r0, #136] + str r11, [r0, #144] +#else + strd r10, r11, [r0, #136] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #176] + ldr r11, [r0, #184] +#else + ldrd r10, r11, [r0, #176] +#endif + eor r12, r12, r10 + eor lr, lr, r11 + eor r10, r10, r2 + eor r11, r11, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [r0, #176] + str r11, [r0, #184] +#else + strd r10, r11, [r0, #176] +#endif + str r12, [sp, #16] + str lr, [sp, #20] + # Calc t[1] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp] + ldr r3, [sp, #8] +#else + ldrd r2, r3, [sp] +#endif + eor r2, r2, lr, lsr #31 + eor r3, r3, r12, lsr #31 + eor r2, r2, r12, lsl #1 + eor r3, r3, lr, lsl #1 + # XOR t[1] into s[x*5+1] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #8] + ldr r5, [r0, #16] +#else + ldrd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #48] + ldr r7, [r0, #56] +#else + ldrd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #88] + ldr r9, [r0, #96] +#else + ldrd r8, r9, [r0, #88] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #128] + ldr r11, [r0, #136] +#else + ldrd r10, r11, [r0, #128] +#endif + ldr r12, [r0, #168] + ldr lr, [r0, #172] + eor r4, r4, r2 + eor r5, r5, r3 + eor r6, r6, r2 + eor r7, r7, r3 + eor r8, r8, r2 + eor r9, r9, r3 + eor r10, r10, r2 + eor r11, r11, r3 + eor r12, r12, r2 + eor lr, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #16] +#else + strd r4, r5, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #48] + str r7, [r0, #56] +#else + strd r6, r7, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #88] + str r9, [r0, #96] +#else + strd r8, r9, [r0, #88] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [r0, #128] + str r11, [r0, #136] +#else + strd r10, r11, [r0, #128] +#endif + str r12, [r0, #168] + str lr, [r0, #172] + # Calc t[3] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp, #16] + ldr r3, [sp, #24] +#else + ldrd r2, r3, [sp, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #32] + ldr r5, [sp, #40] +#else + ldrd r4, r5, [sp, #32] +#endif + eor r2, r2, r5, lsr #31 + eor r3, r3, r4, lsr #31 + eor r2, r2, r4, lsl #1 + eor r3, r3, r5, lsl #1 + # XOR t[3] into s[x*5+3] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #24] + ldr r5, [r0, #32] +#else + ldrd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #64] + ldr r7, [r0, #72] +#else + ldrd r6, r7, [r0, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #104] + ldr r9, [r0, #112] +#else + ldrd r8, r9, [r0, #104] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #144] + ldr r11, [r0, #152] +#else + ldrd r10, r11, [r0, #144] +#endif + ldr r12, [r0, #184] + ldr lr, [r0, #188] + eor r4, r4, r2 + eor r5, r5, r3 + eor r6, r6, r2 + eor r7, r7, r3 + eor r8, r8, r2 + eor r9, r9, r3 + eor r10, r10, r2 + eor r11, r11, r3 + eor r12, r12, r2 + eor lr, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #32] +#else + strd r4, r5, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #64] + str r7, [r0, #72] +#else + strd r6, r7, [r0, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #104] + str r9, [r0, #112] +#else + strd r8, r9, [r0, #104] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [r0, #144] + str r11, [r0, #152] +#else + strd r10, r11, [r0, #144] +#endif + str r12, [r0, #184] + str lr, [r0, #188] + # Calc t[4] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp, #24] + ldr r3, [sp, #32] +#else + ldrd r2, r3, [sp, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #8] +#else + ldrd r4, r5, [sp] +#endif + eor r2, r2, r5, lsr #31 + eor r3, r3, r4, lsr #31 + eor r2, r2, r4, lsl #1 + eor r3, r3, r5, lsl #1 + # XOR t[4] into s[x*5+4] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #40] +#else + ldrd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #72] + ldr r7, [r0, #80] +#else + ldrd r6, r7, [r0, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #112] + ldr r9, [r0, #120] +#else + ldrd r8, r9, [r0, #112] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #152] + ldr r11, [r0, #160] +#else + ldrd r10, r11, [r0, #152] +#endif + ldr r12, [r0, #192] + ldr lr, [r0, #196] + eor r4, r4, r2 + eor r5, r5, r3 + eor r6, r6, r2 + eor r7, r7, r3 + eor r8, r8, r2 + eor r9, r9, r3 + eor r10, r10, r2 + eor r11, r11, r3 + eor r12, r12, r2 + eor lr, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #32] + str r5, [r0, #40] +#else + strd r4, r5, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [r0, #72] + str r7, [r0, #80] +#else + strd r6, r7, [r0, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [r0, #112] + str r9, [r0, #120] +#else + strd r8, r9, [r0, #112] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [r0, #152] + str r11, [r0, #160] +#else + strd r10, r11, [r0, #152] +#endif + str r12, [r0, #192] + str lr, [r0, #196] + # Row Mix + # Row 0 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [r0] + ldr r3, [r0, #8] +#else + ldrd r2, r3, [r0] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #48] + ldr r5, [r0, #56] +#else + ldrd r4, r5, [r0, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #96] + ldr r7, [r0, #104] +#else + ldrd r6, r7, [r0, #96] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #144] + ldr r9, [r0, #152] +#else + ldrd r8, r9, [r0, #144] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #192] + ldr r11, [r0, #200] +#else + ldrd r10, r11, [r0, #192] +#endif + # s[1] <<< 44 + mov lr, r4 + lsr r12, r5, #20 + lsr r4, r4, #20 + orr r4, r4, r5, lsl #12 + orr r5, r12, lr, lsl #12 + # s[2] <<< 43 + mov lr, r6 + lsr r12, r7, #21 + lsr r6, r6, #21 + orr r6, r6, r7, lsl #11 + orr r7, r12, lr, lsl #11 + # s[3] <<< 21 + lsr r12, r9, #11 + lsr lr, r8, #11 + orr r8, r12, r8, lsl #21 + orr r9, lr, r9, lsl #21 + # s[4] <<< 14 + lsr r12, r11, #18 + lsr lr, r10, #18 + orr r10, r12, r10, lsl #14 + orr r11, lr, r11, lsl #14 + bic r12, r8, r6 + bic lr, r9, r7 + eor r12, r12, r4 + eor lr, lr, r5 + str r12, [sp, #8] + str lr, [sp, #12] + bic r12, r10, r8 + bic lr, r11, r9 + eor r12, r12, r6 + eor lr, lr, r7 + str r12, [sp, #16] + str lr, [sp, #20] + bic r12, r2, r10 + bic lr, r3, r11 + eor r12, r12, r8 + eor lr, lr, r9 + str r12, [sp, #24] + str lr, [sp, #28] + bic r12, r4, r2 + bic lr, r5, r3 + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [sp, #32] + str lr, [sp, #36] + # Get constant +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r1] + ldr r11, [r1, #4] +#else + ldrd r10, r11, [r1] +#endif + add r1, r1, #8 + bic r12, r6, r4 + bic lr, r7, r5 + eor r12, r12, r2 + eor lr, lr, r3 + # XOR in constant + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [sp] + str lr, [sp, #4] + # Row 1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [r0, #24] + ldr r3, [r0, #32] +#else + ldrd r2, r3, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #72] + ldr r5, [r0, #80] +#else + ldrd r4, r5, [r0, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #80] + ldr r7, [r0, #88] +#else + ldrd r6, r7, [r0, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #128] + ldr r9, [r0, #136] +#else + ldrd r8, r9, [r0, #128] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #176] + ldr r11, [r0, #184] +#else + ldrd r10, r11, [r0, #176] +#endif + # s[0] <<< 28 + lsr r12, r3, #4 + lsr lr, r2, #4 + orr r2, r12, r2, lsl #28 + orr r3, lr, r3, lsl #28 + # s[1] <<< 20 + lsr r12, r5, #12 + lsr lr, r4, #12 + orr r4, r12, r4, lsl #20 + orr r5, lr, r5, lsl #20 + # s[2] <<< 3 + lsr r12, r7, #29 + lsr lr, r6, #29 + orr r6, r12, r6, lsl #3 + orr r7, lr, r7, lsl #3 + # s[3] <<< 45 + mov lr, r8 + lsr r12, r9, #19 + lsr r8, r8, #19 + orr r8, r8, r9, lsl #13 + orr r9, r12, lr, lsl #13 + # s[4] <<< 61 + mov lr, r10 + lsr r12, r11, #3 + lsr r10, r10, #3 + orr r10, r10, r11, lsl #29 + orr r11, r12, lr, lsl #29 + bic r12, r8, r6 + bic lr, r9, r7 + eor r12, r12, r4 + eor lr, lr, r5 + str r12, [sp, #48] + str lr, [sp, #52] + bic r12, r10, r8 + bic lr, r11, r9 + eor r12, r12, r6 + eor lr, lr, r7 + str r12, [sp, #56] + str lr, [sp, #60] + bic r12, r2, r10 + bic lr, r3, r11 + eor r12, r12, r8 + eor lr, lr, r9 + str r12, [sp, #64] + str lr, [sp, #68] + bic r12, r4, r2 + bic lr, r5, r3 + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [sp, #72] + str lr, [sp, #76] + bic r12, r6, r4 + bic lr, r7, r5 + eor r12, r12, r2 + eor lr, lr, r3 + str r12, [sp, #40] + str lr, [sp, #44] + # Row 2 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [r0, #8] + ldr r3, [r0, #16] +#else + ldrd r2, r3, [r0, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #56] + ldr r5, [r0, #64] +#else + ldrd r4, r5, [r0, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #104] + ldr r7, [r0, #112] +#else + ldrd r6, r7, [r0, #104] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #152] + ldr r9, [r0, #160] +#else + ldrd r8, r9, [r0, #152] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #160] + ldr r11, [r0, #168] +#else + ldrd r10, r11, [r0, #160] +#endif + # s[0] <<< 1 + lsr r12, r3, #31 + lsr lr, r2, #31 + orr r2, r12, r2, lsl #1 + orr r3, lr, r3, lsl #1 + # s[1] <<< 6 + lsr r12, r5, #26 + lsr lr, r4, #26 + orr r4, r12, r4, lsl #6 + orr r5, lr, r5, lsl #6 + # s[2] <<< 25 + lsr r12, r7, #7 + lsr lr, r6, #7 + orr r6, r12, r6, lsl #25 + orr r7, lr, r7, lsl #25 + # s[3] <<< 8 + lsr r12, r9, #24 + lsr lr, r8, #24 + orr r8, r12, r8, lsl #8 + orr r9, lr, r9, lsl #8 + # s[4] <<< 18 + lsr r12, r11, #14 + lsr lr, r10, #14 + orr r10, r12, r10, lsl #18 + orr r11, lr, r11, lsl #18 + bic r12, r8, r6 + bic lr, r9, r7 + eor r12, r12, r4 + eor lr, lr, r5 + str r12, [sp, #88] + str lr, [sp, #92] + bic r12, r10, r8 + bic lr, r11, r9 + eor r12, r12, r6 + eor lr, lr, r7 + str r12, [sp, #96] + str lr, [sp, #100] + bic r12, r2, r10 + bic lr, r3, r11 + eor r12, r12, r8 + eor lr, lr, r9 + str r12, [sp, #104] + str lr, [sp, #108] + bic r12, r4, r2 + bic lr, r5, r3 + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [sp, #112] + str lr, [sp, #116] + bic r12, r6, r4 + bic lr, r7, r5 + eor r12, r12, r2 + eor lr, lr, r3 + str r12, [sp, #80] + str lr, [sp, #84] + # Row 3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [r0, #32] + ldr r3, [r0, #40] +#else + ldrd r2, r3, [r0, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #40] + ldr r5, [r0, #48] +#else + ldrd r4, r5, [r0, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #88] + ldr r7, [r0, #96] +#else + ldrd r6, r7, [r0, #88] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #136] + ldr r9, [r0, #144] +#else + ldrd r8, r9, [r0, #136] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #184] + ldr r11, [r0, #192] +#else + ldrd r10, r11, [r0, #184] +#endif + # s[0] <<< 27 + lsr r12, r3, #5 + lsr lr, r2, #5 + orr r2, r12, r2, lsl #27 + orr r3, lr, r3, lsl #27 + # s[1] <<< 36 + mov lr, r4 + lsr r12, r5, #28 + lsr r4, r4, #28 + orr r4, r4, r5, lsl #4 + orr r5, r12, lr, lsl #4 + # s[2] <<< 10 + lsr r12, r7, #22 + lsr lr, r6, #22 + orr r6, r12, r6, lsl #10 + orr r7, lr, r7, lsl #10 + # s[3] <<< 15 + lsr r12, r9, #17 + lsr lr, r8, #17 + orr r8, r12, r8, lsl #15 + orr r9, lr, r9, lsl #15 + # s[4] <<< 56 + mov lr, r10 + lsr r12, r11, #8 + lsr r10, r10, #8 + orr r10, r10, r11, lsl #24 + orr r11, r12, lr, lsl #24 + bic r12, r8, r6 + bic lr, r9, r7 + eor r12, r12, r4 + eor lr, lr, r5 + str r12, [sp, #128] + str lr, [sp, #132] + bic r12, r10, r8 + bic lr, r11, r9 + eor r12, r12, r6 + eor lr, lr, r7 + str r12, [sp, #136] + str lr, [sp, #140] + bic r12, r2, r10 + bic lr, r3, r11 + eor r12, r12, r8 + eor lr, lr, r9 + str r12, [sp, #144] + str lr, [sp, #148] + bic r12, r4, r2 + bic lr, r5, r3 + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [sp, #152] + str lr, [sp, #156] + bic r12, r6, r4 + bic lr, r7, r5 + eor r12, r12, r2 + eor lr, lr, r3 + str r12, [sp, #120] + str lr, [sp, #124] + # Row 4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [r0, #16] + ldr r3, [r0, #24] +#else + ldrd r2, r3, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #64] + ldr r5, [r0, #72] +#else + ldrd r4, r5, [r0, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [r0, #112] + ldr r7, [r0, #120] +#else + ldrd r6, r7, [r0, #112] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [r0, #120] + ldr r9, [r0, #128] +#else + ldrd r8, r9, [r0, #120] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r0, #168] + ldr r11, [r0, #176] +#else + ldrd r10, r11, [r0, #168] +#endif + # s[0] <<< 62 + mov lr, r2 + lsr r12, r3, #2 + lsr r2, r2, #2 + orr r2, r2, r3, lsl #30 + orr r3, r12, lr, lsl #30 + # s[1] <<< 55 + mov lr, r4 + lsr r12, r5, #9 + lsr r4, r4, #9 + orr r4, r4, r5, lsl #23 + orr r5, r12, lr, lsl #23 + # s[2] <<< 39 + mov lr, r6 + lsr r12, r7, #25 + lsr r6, r6, #25 + orr r6, r6, r7, lsl #7 + orr r7, r12, lr, lsl #7 + # s[3] <<< 41 + mov lr, r8 + lsr r12, r9, #23 + lsr r8, r8, #23 + orr r8, r8, r9, lsl #9 + orr r9, r12, lr, lsl #9 + # s[4] <<< 2 + lsr r12, r11, #30 + lsr lr, r10, #30 + orr r10, r12, r10, lsl #2 + orr r11, lr, r11, lsl #2 + bic r12, r8, r6 + bic lr, r9, r7 + eor r12, r12, r4 + eor lr, lr, r5 + str r12, [sp, #168] + str lr, [sp, #172] + bic r12, r10, r8 + bic lr, r11, r9 + eor r12, r12, r6 + eor lr, lr, r7 + str r12, [sp, #176] + str lr, [sp, #180] + bic r12, r2, r10 + bic lr, r3, r11 + eor r12, r12, r8 + eor lr, lr, r9 + str r12, [sp, #184] + str lr, [sp, #188] + bic r12, r4, r2 + bic lr, r5, r3 + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [sp, #192] + str lr, [sp, #196] + bic r12, r6, r4 + bic lr, r7, r5 + eor r12, r12, r2 + eor lr, lr, r3 + str r12, [sp, #160] + str lr, [sp, #164] + # Round odd + # Calc b[4] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #32] + ldr r5, [sp, #40] +#else + ldrd r4, r5, [sp, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #72] + ldr r7, [sp, #80] +#else + ldrd r6, r7, [sp, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #112] + ldr r9, [sp, #120] +#else + ldrd r8, r9, [sp, #112] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #152] + ldr r11, [sp, #160] +#else + ldrd r10, r11, [sp, #152] +#endif + ldr r12, [sp, #192] + ldr lr, [sp, #196] + eor r2, r4, r6 + eor r3, r5, r7 + eor r2, r2, r8 + eor r3, r3, r9 + eor r2, r2, r10 + eor r3, r3, r11 + eor r2, r2, r12 + eor r3, r3, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r2, [r0, #32] + str r3, [r0, #40] +#else + strd r2, r3, [r0, #32] +#endif + # Calc b[1] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #8] + ldr r5, [sp, #16] +#else + ldrd r4, r5, [sp, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #48] + ldr r7, [sp, #56] +#else + ldrd r6, r7, [sp, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #88] + ldr r9, [sp, #96] +#else + ldrd r8, r9, [sp, #88] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #128] + ldr r11, [sp, #136] +#else + ldrd r10, r11, [sp, #128] +#endif + ldr r12, [sp, #168] + ldr lr, [sp, #172] + eor r4, r4, r6 + eor r5, r5, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r4, r4, r10 + eor r5, r5, r11 + eor r4, r4, r12 + eor r5, r5, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #8] + str r5, [r0, #16] +#else + strd r4, r5, [r0, #8] +#endif + # Calc t[0] + eor r2, r2, r5, lsr #31 + eor r3, r3, r4, lsr #31 + eor r2, r2, r4, lsl #1 + eor r3, r3, r5, lsl #1 + # Calc b[0] and XOR t[0] into s[x*5+0] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp] + ldr r5, [sp, #8] +#else + ldrd r4, r5, [sp] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #40] + ldr r7, [sp, #48] +#else + ldrd r6, r7, [sp, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #80] + ldr r9, [sp, #88] +#else + ldrd r8, r9, [sp, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #120] + ldr r11, [sp, #128] +#else + ldrd r10, r11, [sp, #120] +#endif + eor r12, r4, r6 + eor lr, r5, r7 + eor r12, r12, r8 + eor lr, lr, r9 + eor r12, r12, r10 + eor lr, lr, r11 + eor r4, r4, r2 + eor r5, r5, r3 + eor r6, r6, r2 + eor r7, r7, r3 + eor r8, r8, r2 + eor r9, r9, r3 + eor r10, r10, r2 + eor r11, r11, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp] + str r5, [sp, #8] +#else + strd r4, r5, [sp] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #40] + str r7, [sp, #48] +#else + strd r6, r7, [sp, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [sp, #80] + str r9, [sp, #88] +#else + strd r8, r9, [sp, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #120] + str r11, [sp, #128] +#else + strd r10, r11, [sp, #120] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #160] + ldr r11, [sp, #168] +#else + ldrd r10, r11, [sp, #160] +#endif + eor r12, r12, r10 + eor lr, lr, r11 + eor r10, r10, r2 + eor r11, r11, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #160] + str r11, [sp, #168] +#else + strd r10, r11, [sp, #160] +#endif + str r12, [r0] + str lr, [r0, #4] + # Calc b[3] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #24] + ldr r5, [sp, #32] +#else + ldrd r4, r5, [sp, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #64] + ldr r7, [sp, #72] +#else + ldrd r6, r7, [sp, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #104] + ldr r9, [sp, #112] +#else + ldrd r8, r9, [sp, #104] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #144] + ldr r11, [sp, #152] +#else + ldrd r10, r11, [sp, #144] +#endif + ldr r12, [sp, #184] + ldr lr, [sp, #188] + eor r4, r4, r6 + eor r5, r5, r7 + eor r4, r4, r8 + eor r5, r5, r9 + eor r4, r4, r10 + eor r5, r5, r11 + eor r4, r4, r12 + eor r5, r5, lr +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [r0, #24] + str r5, [r0, #32] +#else + strd r4, r5, [r0, #24] +#endif + # Calc t[2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [r0, #8] + ldr r3, [r0, #16] +#else + ldrd r2, r3, [r0, #8] +#endif + eor r2, r2, r5, lsr #31 + eor r3, r3, r4, lsr #31 + eor r2, r2, r4, lsl #1 + eor r3, r3, r5, lsl #1 + # Calc b[2] and XOR t[2] into s[x*5+2] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #16] + ldr r5, [sp, #24] +#else + ldrd r4, r5, [sp, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #56] + ldr r7, [sp, #64] +#else + ldrd r6, r7, [sp, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #96] + ldr r9, [sp, #104] +#else + ldrd r8, r9, [sp, #96] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #136] + ldr r11, [sp, #144] +#else + ldrd r10, r11, [sp, #136] +#endif + eor r12, r4, r6 + eor lr, r5, r7 + eor r12, r12, r8 + eor lr, lr, r9 + eor r12, r12, r10 + eor lr, lr, r11 + eor r4, r4, r2 + eor r5, r5, r3 + eor r6, r6, r2 + eor r7, r7, r3 + eor r8, r8, r2 + eor r9, r9, r3 + eor r10, r10, r2 + eor r11, r11, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #16] + str r5, [sp, #24] +#else + strd r4, r5, [sp, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #56] + str r7, [sp, #64] +#else + strd r6, r7, [sp, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [sp, #96] + str r9, [sp, #104] +#else + strd r8, r9, [sp, #96] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #136] + str r11, [sp, #144] +#else + strd r10, r11, [sp, #136] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #176] + ldr r11, [sp, #184] +#else + ldrd r10, r11, [sp, #176] +#endif + eor r12, r12, r10 + eor lr, lr, r11 + eor r10, r10, r2 + eor r11, r11, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #176] + str r11, [sp, #184] +#else + strd r10, r11, [sp, #176] +#endif + str r12, [r0, #16] + str lr, [r0, #20] + # Calc t[1] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [r0] + ldr r3, [r0, #8] +#else + ldrd r2, r3, [r0] +#endif + eor r2, r2, lr, lsr #31 + eor r3, r3, r12, lsr #31 + eor r2, r2, r12, lsl #1 + eor r3, r3, lr, lsl #1 + # XOR t[1] into s[x*5+1] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #8] + ldr r5, [sp, #16] +#else + ldrd r4, r5, [sp, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #48] + ldr r7, [sp, #56] +#else + ldrd r6, r7, [sp, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #88] + ldr r9, [sp, #96] +#else + ldrd r8, r9, [sp, #88] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #128] + ldr r11, [sp, #136] +#else + ldrd r10, r11, [sp, #128] +#endif + ldr r12, [sp, #168] + ldr lr, [sp, #172] + eor r4, r4, r2 + eor r5, r5, r3 + eor r6, r6, r2 + eor r7, r7, r3 + eor r8, r8, r2 + eor r9, r9, r3 + eor r10, r10, r2 + eor r11, r11, r3 + eor r12, r12, r2 + eor lr, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #8] + str r5, [sp, #16] +#else + strd r4, r5, [sp, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #48] + str r7, [sp, #56] +#else + strd r6, r7, [sp, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [sp, #88] + str r9, [sp, #96] +#else + strd r8, r9, [sp, #88] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #128] + str r11, [sp, #136] +#else + strd r10, r11, [sp, #128] +#endif + str r12, [sp, #168] + str lr, [sp, #172] + # Calc t[3] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [r0, #16] + ldr r3, [r0, #24] +#else + ldrd r2, r3, [r0, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0, #32] + ldr r5, [r0, #40] +#else + ldrd r4, r5, [r0, #32] +#endif + eor r2, r2, r5, lsr #31 + eor r3, r3, r4, lsr #31 + eor r2, r2, r4, lsl #1 + eor r3, r3, r5, lsl #1 + # XOR t[3] into s[x*5+3] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #24] + ldr r5, [sp, #32] +#else + ldrd r4, r5, [sp, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #64] + ldr r7, [sp, #72] +#else + ldrd r6, r7, [sp, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #104] + ldr r9, [sp, #112] +#else + ldrd r8, r9, [sp, #104] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #144] + ldr r11, [sp, #152] +#else + ldrd r10, r11, [sp, #144] +#endif + ldr r12, [sp, #184] + ldr lr, [sp, #188] + eor r4, r4, r2 + eor r5, r5, r3 + eor r6, r6, r2 + eor r7, r7, r3 + eor r8, r8, r2 + eor r9, r9, r3 + eor r10, r10, r2 + eor r11, r11, r3 + eor r12, r12, r2 + eor lr, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #24] + str r5, [sp, #32] +#else + strd r4, r5, [sp, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #64] + str r7, [sp, #72] +#else + strd r6, r7, [sp, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [sp, #104] + str r9, [sp, #112] +#else + strd r8, r9, [sp, #104] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #144] + str r11, [sp, #152] +#else + strd r10, r11, [sp, #144] +#endif + str r12, [sp, #184] + str lr, [sp, #188] + # Calc t[4] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [r0, #24] + ldr r3, [r0, #32] +#else + ldrd r2, r3, [r0, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [r0] + ldr r5, [r0, #8] +#else + ldrd r4, r5, [r0] +#endif + eor r2, r2, r5, lsr #31 + eor r3, r3, r4, lsr #31 + eor r2, r2, r4, lsl #1 + eor r3, r3, r5, lsl #1 + # XOR t[4] into s[x*5+4] +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #32] + ldr r5, [sp, #40] +#else + ldrd r4, r5, [sp, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #72] + ldr r7, [sp, #80] +#else + ldrd r6, r7, [sp, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #112] + ldr r9, [sp, #120] +#else + ldrd r8, r9, [sp, #112] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #152] + ldr r11, [sp, #160] +#else + ldrd r10, r11, [sp, #152] +#endif + ldr r12, [sp, #192] + ldr lr, [sp, #196] + eor r4, r4, r2 + eor r5, r5, r3 + eor r6, r6, r2 + eor r7, r7, r3 + eor r8, r8, r2 + eor r9, r9, r3 + eor r10, r10, r2 + eor r11, r11, r3 + eor r12, r12, r2 + eor lr, lr, r3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r4, [sp, #32] + str r5, [sp, #40] +#else + strd r4, r5, [sp, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r6, [sp, #72] + str r7, [sp, #80] +#else + strd r6, r7, [sp, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r8, [sp, #112] + str r9, [sp, #120] +#else + strd r8, r9, [sp, #112] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + str r10, [sp, #152] + str r11, [sp, #160] +#else + strd r10, r11, [sp, #152] +#endif + str r12, [sp, #192] + str lr, [sp, #196] + # Row Mix + # Row 0 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp] + ldr r3, [sp, #8] +#else + ldrd r2, r3, [sp] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #48] + ldr r5, [sp, #56] +#else + ldrd r4, r5, [sp, #48] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #96] + ldr r7, [sp, #104] +#else + ldrd r6, r7, [sp, #96] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #144] + ldr r9, [sp, #152] +#else + ldrd r8, r9, [sp, #144] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #192] + ldr r11, [sp, #200] +#else + ldrd r10, r11, [sp, #192] +#endif + # s[1] <<< 44 + mov lr, r4 + lsr r12, r5, #20 + lsr r4, r4, #20 + orr r4, r4, r5, lsl #12 + orr r5, r12, lr, lsl #12 + # s[2] <<< 43 + mov lr, r6 + lsr r12, r7, #21 + lsr r6, r6, #21 + orr r6, r6, r7, lsl #11 + orr r7, r12, lr, lsl #11 + # s[3] <<< 21 + lsr r12, r9, #11 + lsr lr, r8, #11 + orr r8, r12, r8, lsl #21 + orr r9, lr, r9, lsl #21 + # s[4] <<< 14 + lsr r12, r11, #18 + lsr lr, r10, #18 + orr r10, r12, r10, lsl #14 + orr r11, lr, r11, lsl #14 + bic r12, r8, r6 + bic lr, r9, r7 + eor r12, r12, r4 + eor lr, lr, r5 + str r12, [r0, #8] + str lr, [r0, #12] + bic r12, r10, r8 + bic lr, r11, r9 + eor r12, r12, r6 + eor lr, lr, r7 + str r12, [r0, #16] + str lr, [r0, #20] + bic r12, r2, r10 + bic lr, r3, r11 + eor r12, r12, r8 + eor lr, lr, r9 + str r12, [r0, #24] + str lr, [r0, #28] + bic r12, r4, r2 + bic lr, r5, r3 + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [r0, #32] + str lr, [r0, #36] + # Get constant +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [r1] + ldr r11, [r1, #4] +#else + ldrd r10, r11, [r1] +#endif + add r1, r1, #8 + bic r12, r6, r4 + bic lr, r7, r5 + eor r12, r12, r2 + eor lr, lr, r3 + # XOR in constant + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [r0] + str lr, [r0, #4] + # Row 1 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp, #24] + ldr r3, [sp, #32] +#else + ldrd r2, r3, [sp, #24] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #72] + ldr r5, [sp, #80] +#else + ldrd r4, r5, [sp, #72] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #80] + ldr r7, [sp, #88] +#else + ldrd r6, r7, [sp, #80] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #128] + ldr r9, [sp, #136] +#else + ldrd r8, r9, [sp, #128] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #176] + ldr r11, [sp, #184] +#else + ldrd r10, r11, [sp, #176] +#endif + # s[0] <<< 28 + lsr r12, r3, #4 + lsr lr, r2, #4 + orr r2, r12, r2, lsl #28 + orr r3, lr, r3, lsl #28 + # s[1] <<< 20 + lsr r12, r5, #12 + lsr lr, r4, #12 + orr r4, r12, r4, lsl #20 + orr r5, lr, r5, lsl #20 + # s[2] <<< 3 + lsr r12, r7, #29 + lsr lr, r6, #29 + orr r6, r12, r6, lsl #3 + orr r7, lr, r7, lsl #3 + # s[3] <<< 45 + mov lr, r8 + lsr r12, r9, #19 + lsr r8, r8, #19 + orr r8, r8, r9, lsl #13 + orr r9, r12, lr, lsl #13 + # s[4] <<< 61 + mov lr, r10 + lsr r12, r11, #3 + lsr r10, r10, #3 + orr r10, r10, r11, lsl #29 + orr r11, r12, lr, lsl #29 + bic r12, r8, r6 + bic lr, r9, r7 + eor r12, r12, r4 + eor lr, lr, r5 + str r12, [r0, #48] + str lr, [r0, #52] + bic r12, r10, r8 + bic lr, r11, r9 + eor r12, r12, r6 + eor lr, lr, r7 + str r12, [r0, #56] + str lr, [r0, #60] + bic r12, r2, r10 + bic lr, r3, r11 + eor r12, r12, r8 + eor lr, lr, r9 + str r12, [r0, #64] + str lr, [r0, #68] + bic r12, r4, r2 + bic lr, r5, r3 + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [r0, #72] + str lr, [r0, #76] + bic r12, r6, r4 + bic lr, r7, r5 + eor r12, r12, r2 + eor lr, lr, r3 + str r12, [r0, #40] + str lr, [r0, #44] + # Row 2 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp, #8] + ldr r3, [sp, #16] +#else + ldrd r2, r3, [sp, #8] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #56] + ldr r5, [sp, #64] +#else + ldrd r4, r5, [sp, #56] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #104] + ldr r7, [sp, #112] +#else + ldrd r6, r7, [sp, #104] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #152] + ldr r9, [sp, #160] +#else + ldrd r8, r9, [sp, #152] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #160] + ldr r11, [sp, #168] +#else + ldrd r10, r11, [sp, #160] +#endif + # s[0] <<< 1 + lsr r12, r3, #31 + lsr lr, r2, #31 + orr r2, r12, r2, lsl #1 + orr r3, lr, r3, lsl #1 + # s[1] <<< 6 + lsr r12, r5, #26 + lsr lr, r4, #26 + orr r4, r12, r4, lsl #6 + orr r5, lr, r5, lsl #6 + # s[2] <<< 25 + lsr r12, r7, #7 + lsr lr, r6, #7 + orr r6, r12, r6, lsl #25 + orr r7, lr, r7, lsl #25 + # s[3] <<< 8 + lsr r12, r9, #24 + lsr lr, r8, #24 + orr r8, r12, r8, lsl #8 + orr r9, lr, r9, lsl #8 + # s[4] <<< 18 + lsr r12, r11, #14 + lsr lr, r10, #14 + orr r10, r12, r10, lsl #18 + orr r11, lr, r11, lsl #18 + bic r12, r8, r6 + bic lr, r9, r7 + eor r12, r12, r4 + eor lr, lr, r5 + str r12, [r0, #88] + str lr, [r0, #92] + bic r12, r10, r8 + bic lr, r11, r9 + eor r12, r12, r6 + eor lr, lr, r7 + str r12, [r0, #96] + str lr, [r0, #100] + bic r12, r2, r10 + bic lr, r3, r11 + eor r12, r12, r8 + eor lr, lr, r9 + str r12, [r0, #104] + str lr, [r0, #108] + bic r12, r4, r2 + bic lr, r5, r3 + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [r0, #112] + str lr, [r0, #116] + bic r12, r6, r4 + bic lr, r7, r5 + eor r12, r12, r2 + eor lr, lr, r3 + str r12, [r0, #80] + str lr, [r0, #84] + # Row 3 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp, #32] + ldr r3, [sp, #40] +#else + ldrd r2, r3, [sp, #32] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #40] + ldr r5, [sp, #48] +#else + ldrd r4, r5, [sp, #40] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #88] + ldr r7, [sp, #96] +#else + ldrd r6, r7, [sp, #88] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #136] + ldr r9, [sp, #144] +#else + ldrd r8, r9, [sp, #136] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #184] + ldr r11, [sp, #192] +#else + ldrd r10, r11, [sp, #184] +#endif + # s[0] <<< 27 + lsr r12, r3, #5 + lsr lr, r2, #5 + orr r2, r12, r2, lsl #27 + orr r3, lr, r3, lsl #27 + # s[1] <<< 36 + mov lr, r4 + lsr r12, r5, #28 + lsr r4, r4, #28 + orr r4, r4, r5, lsl #4 + orr r5, r12, lr, lsl #4 + # s[2] <<< 10 + lsr r12, r7, #22 + lsr lr, r6, #22 + orr r6, r12, r6, lsl #10 + orr r7, lr, r7, lsl #10 + # s[3] <<< 15 + lsr r12, r9, #17 + lsr lr, r8, #17 + orr r8, r12, r8, lsl #15 + orr r9, lr, r9, lsl #15 + # s[4] <<< 56 + mov lr, r10 + lsr r12, r11, #8 + lsr r10, r10, #8 + orr r10, r10, r11, lsl #24 + orr r11, r12, lr, lsl #24 + bic r12, r8, r6 + bic lr, r9, r7 + eor r12, r12, r4 + eor lr, lr, r5 + str r12, [r0, #128] + str lr, [r0, #132] + bic r12, r10, r8 + bic lr, r11, r9 + eor r12, r12, r6 + eor lr, lr, r7 + str r12, [r0, #136] + str lr, [r0, #140] + bic r12, r2, r10 + bic lr, r3, r11 + eor r12, r12, r8 + eor lr, lr, r9 + str r12, [r0, #144] + str lr, [r0, #148] + bic r12, r4, r2 + bic lr, r5, r3 + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [r0, #152] + str lr, [r0, #156] + bic r12, r6, r4 + bic lr, r7, r5 + eor r12, r12, r2 + eor lr, lr, r3 + str r12, [r0, #120] + str lr, [r0, #124] + # Row 4 +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r2, [sp, #16] + ldr r3, [sp, #24] +#else + ldrd r2, r3, [sp, #16] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r4, [sp, #64] + ldr r5, [sp, #72] +#else + ldrd r4, r5, [sp, #64] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r6, [sp, #112] + ldr r7, [sp, #120] +#else + ldrd r6, r7, [sp, #112] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r8, [sp, #120] + ldr r9, [sp, #128] +#else + ldrd r8, r9, [sp, #120] +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + ldr r10, [sp, #168] + ldr r11, [sp, #176] +#else + ldrd r10, r11, [sp, #168] +#endif + # s[0] <<< 62 + mov lr, r2 + lsr r12, r3, #2 + lsr r2, r2, #2 + orr r2, r2, r3, lsl #30 + orr r3, r12, lr, lsl #30 + # s[1] <<< 55 + mov lr, r4 + lsr r12, r5, #9 + lsr r4, r4, #9 + orr r4, r4, r5, lsl #23 + orr r5, r12, lr, lsl #23 + # s[2] <<< 39 + mov lr, r6 + lsr r12, r7, #25 + lsr r6, r6, #25 + orr r6, r6, r7, lsl #7 + orr r7, r12, lr, lsl #7 + # s[3] <<< 41 + mov lr, r8 + lsr r12, r9, #23 + lsr r8, r8, #23 + orr r8, r8, r9, lsl #9 + orr r9, r12, lr, lsl #9 + # s[4] <<< 2 + lsr r12, r11, #30 + lsr lr, r10, #30 + orr r10, r12, r10, lsl #2 + orr r11, lr, r11, lsl #2 + bic r12, r8, r6 + bic lr, r9, r7 + eor r12, r12, r4 + eor lr, lr, r5 + str r12, [r0, #168] + str lr, [r0, #172] + bic r12, r10, r8 + bic lr, r11, r9 + eor r12, r12, r6 + eor lr, lr, r7 + str r12, [r0, #176] + str lr, [r0, #180] + bic r12, r2, r10 + bic lr, r3, r11 + eor r12, r12, r8 + eor lr, lr, r9 + str r12, [r0, #184] + str lr, [r0, #188] + bic r12, r4, r2 + bic lr, r5, r3 + eor r12, r12, r10 + eor lr, lr, r11 + str r12, [r0, #192] + str lr, [r0, #196] + bic r12, r6, r4 + bic lr, r7, r5 + eor r12, r12, r2 + eor lr, lr, r3 + str r12, [r0, #160] + str lr, [r0, #164] + ldr r2, [sp, #200] + subs r2, r2, #1 + bne L_sha3_arm32_begin + add sp, sp, #0xcc + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} + .size BlockSha3,.-BlockSha3 +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ +#endif /* WOLFSSL_ARMASM */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif +#endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c new file mode 100644 index 0000000000..388ec457a8 --- /dev/null +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c @@ -0,0 +1,2356 @@ +/* armv8-32-sha3-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha3/sha3.rb arm32 ../wolfssl/wolfcrypt/src/port/arm/armv8-32-sha3-asm.c + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) +#include +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#include +#ifdef WOLFSSL_ARMASM_INLINE + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) + +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif /* __KEIL__ */ +static const uint64_t L_sha3_arm2_neon_rt[] = { + 0x0000000000000001UL, 0x0000000000008082UL, + 0x800000000000808aUL, 0x8000000080008000UL, + 0x000000000000808bUL, 0x0000000080000001UL, + 0x8000000080008081UL, 0x8000000000008009UL, + 0x000000000000008aUL, 0x0000000000000088UL, + 0x0000000080008009UL, 0x000000008000000aUL, + 0x000000008000808bUL, 0x800000000000008bUL, + 0x8000000000008089UL, 0x8000000000008003UL, + 0x8000000000008002UL, 0x8000000000000080UL, + 0x000000000000800aUL, 0x800000008000000aUL, + 0x8000000080008081UL, 0x8000000000008080UL, + 0x0000000080000001UL, 0x8000000080008008UL, +}; + +static const uint64_t L_sha3_arm2_rt[] = { + 0x0000000000000001UL, 0x0000000000008082UL, + 0x800000000000808aUL, 0x8000000080008000UL, + 0x000000000000808bUL, 0x0000000080000001UL, + 0x8000000080008081UL, 0x8000000000008009UL, + 0x000000000000008aUL, 0x0000000000000088UL, + 0x0000000080008009UL, 0x000000008000000aUL, + 0x000000008000808bUL, 0x800000000000008bUL, + 0x8000000000008089UL, 0x8000000000008003UL, + 0x8000000000008002UL, 0x8000000000000080UL, + 0x000000000000800aUL, 0x800000008000000aUL, + 0x8000000080008081UL, 0x8000000000008080UL, + 0x0000000080000001UL, 0x8000000080008008UL, +}; + +#include + +#ifndef WOLFSSL_ARMASM_NO_NEON +void BlockSha3(word64* state_p) +{ + register word64* state asm ("r0") = (word64*)state_p; + register uint64_t* L_sha3_arm2_neon_rt_c asm ("r1") = (uint64_t*)&L_sha3_arm2_neon_rt; + register uint64_t* L_sha3_arm2_rt_c asm ("r2") = (uint64_t*)&L_sha3_arm2_rt; + + __asm__ __volatile__ ( + "sub sp, sp, #16\n\t" + "mov r2, #24\n\t" + "mov r3, sp\n\t" + "vld1.8 {d0-d3}, [%[state]]!\n\t" + "vld1.8 {d4-d7}, [%[state]]!\n\t" + "vld1.8 {d8-d11}, [%[state]]!\n\t" + "vld1.8 {d12-d15}, [%[state]]!\n\t" + "vld1.8 {d16-d19}, [%[state]]!\n\t" + "vld1.8 {d20-d23}, [%[state]]!\n\t" + "vld1.8 {d24}, [%[state]]\n\t" + "sub %[state], %[state], #0xc0\n\t" + "\n" + "L_sha3_arm32_neon_begin_%=: \n\t" + /* Calc b[0..4] */ + "veor d26, d0, d5\n\t" + "veor d27, d1, d6\n\t" + "veor d28, d2, d7\n\t" + "veor d29, d3, d8\n\t" + "veor d25, d4, d9\n\t" + "veor d26, d26, d10\n\t" + "veor d27, d27, d11\n\t" + "veor d28, d28, d12\n\t" + "veor d29, d29, d13\n\t" + "veor d25, d25, d14\n\t" + "veor d26, d26, d15\n\t" + "veor d27, d27, d16\n\t" + "veor d28, d28, d17\n\t" + "veor d29, d29, d18\n\t" + "veor d25, d25, d19\n\t" + "veor d26, d26, d20\n\t" + "veor d27, d27, d21\n\t" + "veor d28, d28, d22\n\t" + "veor d29, d29, d23\n\t" + "veor d25, d25, d24\n\t" + "vst1.8 {d25-d26}, [r3]\n\t" + /* Calc t[0..4] and XOR into s[i*5..i*5+4] */ + /* t[0] */ + "vshr.u64 d30, d27, #63\n\t" + "vshl.u64 d31, d27, #1\n\t" + "veor d25, d25, d30\n\t" + "veor d25, d25, d31\n\t" + /* t[1] */ + "vshr.u64 d30, d28, #63\n\t" + "vshl.u64 d31, d28, #1\n\t" + "veor d26, d26, d30\n\t" + "veor d26, d26, d31\n\t" + /* t[2] */ + "vshr.u64 d30, d29, #63\n\t" + "vshl.u64 d31, d29, #1\n\t" + "veor d27, d27, d30\n\t" + "veor d27, d27, d31\n\t" + /* t[3] */ + "vldr.8 d31, [r3]\n\t" + "vshr.u64 d30, d31, #63\n\t" + "vshl.u64 d31, d31, #1\n\t" + "veor d28, d28, d30\n\t" + "veor d28, d28, d31\n\t" + /* t[4] */ + "vldr.8 d31, [r3, #8]\n\t" + "vshr.u64 d30, d31, #63\n\t" + "vshl.u64 d31, d31, #1\n\t" + "veor d29, d29, d30\n\t" + "veor d29, d29, d31\n\t" + "sub r3, r3, #16\n\t" + "veor d0, d0, d25\n\t" + /* s[1] => s[10] (tmp) */ + "veor d30, d1, d26\n\t" + "vshr.u64 d31, d30, #63\n\t" + "vshl.u64 d30, d30, #1\n\t" + "veor d30, d30, d31\n\t" + /* s[6] => s[1] */ + "veor d1, d6, d26\n\t" + "vshr.u64 d31, d1, #20\n\t" + "vshl.u64 d1, d1, #44\n\t" + "veor d1, d1, d31\n\t" + /* s[9] => s[6] */ + "veor d6, d9, d29\n\t" + "vshr.u64 d31, d6, #44\n\t" + "vshl.u64 d6, d6, #20\n\t" + "veor d6, d6, d31\n\t" + /* s[22] => s[9] */ + "veor d9, d22, d27\n\t" + "vshr.u64 d31, d9, #3\n\t" + "vshl.u64 d9, d9, #61\n\t" + "veor d9, d9, d31\n\t" + /* s[14] => s[22] */ + "veor d22, d14, d29\n\t" + "vshr.u64 d31, d22, #25\n\t" + "vshl.u64 d22, d22, #39\n\t" + "veor d22, d22, d31\n\t" + /* s[20] => s[14] */ + "veor d14, d20, d25\n\t" + "vshr.u64 d31, d14, #46\n\t" + "vshl.u64 d14, d14, #18\n\t" + "veor d14, d14, d31\n\t" + /* s[2] => s[20] */ + "veor d20, d2, d27\n\t" + "vshr.u64 d31, d20, #2\n\t" + "vshl.u64 d20, d20, #62\n\t" + "veor d20, d20, d31\n\t" + /* s[12] => s[2] */ + "veor d2, d12, d27\n\t" + "vshr.u64 d31, d2, #21\n\t" + "vshl.u64 d2, d2, #43\n\t" + "veor d2, d2, d31\n\t" + /* s[13] => s[12] */ + "veor d12, d13, d28\n\t" + "vshr.u64 d31, d12, #39\n\t" + "vshl.u64 d12, d12, #25\n\t" + "veor d12, d12, d31\n\t" + /* s[19] => s[13] */ + "veor d13, d19, d29\n\t" + "vshr.u64 d31, d13, #56\n\t" + "vshl.u64 d13, d13, #8\n\t" + "veor d13, d13, d31\n\t" + /* s[23] => s[19] */ + "veor d19, d23, d28\n\t" + "vshr.u64 d31, d19, #8\n\t" + "vshl.u64 d19, d19, #56\n\t" + "veor d19, d19, d31\n\t" + /* s[15] => s[23] */ + "veor d23, d15, d25\n\t" + "vshr.u64 d31, d23, #23\n\t" + "vshl.u64 d23, d23, #41\n\t" + "veor d23, d23, d31\n\t" + /* s[4] => s[15] */ + "veor d15, d4, d29\n\t" + "vshr.u64 d31, d15, #37\n\t" + "vshl.u64 d15, d15, #27\n\t" + "veor d15, d15, d31\n\t" + /* s[24] => s[4] */ + "veor d4, d24, d29\n\t" + "vshr.u64 d31, d4, #50\n\t" + "vshl.u64 d4, d4, #14\n\t" + "veor d4, d4, d31\n\t" + /* s[21] => s[24] */ + "veor d24, d21, d26\n\t" + "vshr.u64 d31, d24, #62\n\t" + "vshl.u64 d24, d24, #2\n\t" + "veor d24, d24, d31\n\t" + /* s[8] => s[21] */ + "veor d21, d8, d28\n\t" + "vshr.u64 d31, d21, #9\n\t" + "vshl.u64 d21, d21, #55\n\t" + "veor d21, d21, d31\n\t" + /* s[16] => s[8] */ + "veor d8, d16, d26\n\t" + "vshr.u64 d31, d8, #19\n\t" + "vshl.u64 d8, d8, #45\n\t" + "veor d8, d8, d31\n\t" + /* s[5] => s[16] */ + "veor d16, d5, d25\n\t" + "vshr.u64 d31, d16, #28\n\t" + "vshl.u64 d16, d16, #36\n\t" + "veor d16, d16, d31\n\t" + /* s[3] => s[5] */ + "veor d5, d3, d28\n\t" + "vshr.u64 d31, d5, #36\n\t" + "vshl.u64 d5, d5, #28\n\t" + "veor d5, d5, d31\n\t" + /* s[18] => s[3] */ + "veor d3, d18, d28\n\t" + "vshr.u64 d31, d3, #43\n\t" + "vshl.u64 d3, d3, #21\n\t" + "veor d3, d3, d31\n\t" + /* s[17] => s[18] */ + "veor d18, d17, d27\n\t" + "vshr.u64 d31, d18, #49\n\t" + "vshl.u64 d18, d18, #15\n\t" + "veor d18, d18, d31\n\t" + /* s[11] => s[17] */ + "veor d17, d11, d26\n\t" + "vshr.u64 d31, d17, #54\n\t" + "vshl.u64 d17, d17, #10\n\t" + "veor d17, d17, d31\n\t" + /* s[7] => s[11] */ + "veor d11, d7, d27\n\t" + "vshr.u64 d31, d11, #58\n\t" + "vshl.u64 d11, d11, #6\n\t" + "veor d11, d11, d31\n\t" + /* s[10] => s[7] */ + "veor d7, d10, d25\n\t" + "vshr.u64 d31, d7, #61\n\t" + "vshl.u64 d7, d7, #3\n\t" + "veor d7, d7, d31\n\t" + /* Row Mix */ + "vmov d25, d0\n\t" + "vmov d26, d1\n\t" + "vbic d31, d2, d26\n\t" + "veor d0, d25, d31\n\t" + "vbic d31, d3, d2\n\t" + "veor d1, d26, d31\n\t" + "vbic d31, d4, d3\n\t" + "veor d2, d2, d31\n\t" + "vbic d31, d25, d4\n\t" + "veor d3, d3, d31\n\t" + "vbic d31, d26, d25\n\t" + "veor d4, d4, d31\n\t" + "vmov d25, d5\n\t" + "vmov d26, d6\n\t" + "vbic d31, d7, d26\n\t" + "veor d5, d25, d31\n\t" + "vbic d31, d8, d7\n\t" + "veor d6, d26, d31\n\t" + "vbic d31, d9, d8\n\t" + "veor d7, d7, d31\n\t" + "vbic d31, d25, d9\n\t" + "veor d8, d8, d31\n\t" + "vbic d31, d26, d25\n\t" + "veor d9, d9, d31\n\t" + "vmov d26, d11\n\t" + "vbic d31, d12, d26\n\t" + "veor d10, d30, d31\n\t" + "vbic d31, d13, d12\n\t" + "veor d11, d26, d31\n\t" + "vbic d31, d14, d13\n\t" + "veor d12, d12, d31\n\t" + "vbic d31, d30, d14\n\t" + "veor d13, d13, d31\n\t" + "vbic d31, d26, d30\n\t" + "veor d14, d14, d31\n\t" + "vmov d25, d15\n\t" + "vmov d26, d16\n\t" + "vbic d31, d17, d26\n\t" + "veor d15, d25, d31\n\t" + "vbic d31, d18, d17\n\t" + "veor d16, d26, d31\n\t" + "vbic d31, d19, d18\n\t" + "veor d17, d17, d31\n\t" + "vbic d31, d25, d19\n\t" + "veor d18, d18, d31\n\t" + "vbic d31, d26, d25\n\t" + "veor d19, d19, d31\n\t" + "vmov d25, d20\n\t" + "vmov d26, d21\n\t" + "vbic d31, d22, d26\n\t" + "veor d20, d25, d31\n\t" + "vbic d31, d23, d22\n\t" + "veor d21, d26, d31\n\t" + "vbic d31, d24, d23\n\t" + "veor d22, d22, d31\n\t" + "vbic d31, d25, d24\n\t" + "veor d23, d23, d31\n\t" + "vbic d31, d26, d25\n\t" + "veor d24, d24, d31\n\t" + "vld1.8 {d30}, [r1]!\n\t" + "subs r2, r2, #1\n\t" + "veor d0, d0, d30\n\t" + "bne L_sha3_arm32_neon_begin_%=\n\t" + "vst1.8 {d0-d3}, [%[state]]!\n\t" + "vst1.8 {d4-d7}, [%[state]]!\n\t" + "vst1.8 {d8-d11}, [%[state]]!\n\t" + "vst1.8 {d12-d15}, [%[state]]!\n\t" + "vst1.8 {d16-d19}, [%[state]]!\n\t" + "vst1.8 {d20-d23}, [%[state]]!\n\t" + "vst1.8 {d24}, [%[state]]\n\t" + "add sp, sp, #16\n\t" + : [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c), [L_sha3_arm2_rt] "+r" (L_sha3_arm2_rt_c) + : + : "memory", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31", "cc" + ); +} + +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#include + +#ifdef WOLFSSL_ARMASM_NO_NEON +void BlockSha3(word64* state_p) +{ + register word64* state asm ("r0") = (word64*)state_p; + register uint64_t* L_sha3_arm2_neon_rt_c asm ("r1") = (uint64_t*)&L_sha3_arm2_neon_rt; + register uint64_t* L_sha3_arm2_rt_c asm ("r2") = (uint64_t*)&L_sha3_arm2_rt; + + __asm__ __volatile__ ( + "sub sp, sp, #0xcc\n\t" + "mov r1, %[L_sha3_arm2_rt]\n\t" + "mov r2, #12\n\t" + "\n" + "L_sha3_arm32_begin_%=: \n\t" + "str r2, [sp, #200]\n\t" + /* Round even */ + /* Calc b[4] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #32]\n\t" + "ldr r5, [%[state], #40]\n\t" +#else + "ldrd r4, r5, [%[state], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #72]\n\t" + "ldr r7, [%[state], #80]\n\t" +#else + "ldrd r6, r7, [%[state], #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #112]\n\t" + "ldr r9, [%[state], #120]\n\t" +#else + "ldrd r8, r9, [%[state], #112]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #152]\n\t" + "ldr r11, [%[state], #160]\n\t" +#else + "ldrd r10, r11, [%[state], #152]\n\t" +#endif + "ldr r12, [%[state], #192]\n\t" + "ldr lr, [%[state], #196]\n\t" + "eor r2, r4, r6\n\t" + "eor r3, r5, r7\n\t" + "eor r2, r2, r8\n\t" + "eor r3, r3, r9\n\t" + "eor r2, r2, r10\n\t" + "eor r3, r3, r11\n\t" + "eor r2, r2, r12\n\t" + "eor r3, r3, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r2, [sp, #32]\n\t" + "str r3, [sp, #40]\n\t" +#else + "strd r2, r3, [sp, #32]\n\t" +#endif + /* Calc b[1] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #8]\n\t" + "ldr r5, [%[state], #16]\n\t" +#else + "ldrd r4, r5, [%[state], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #48]\n\t" + "ldr r7, [%[state], #56]\n\t" +#else + "ldrd r6, r7, [%[state], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #88]\n\t" + "ldr r9, [%[state], #96]\n\t" +#else + "ldrd r8, r9, [%[state], #88]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #128]\n\t" + "ldr r11, [%[state], #136]\n\t" +#else + "ldrd r10, r11, [%[state], #128]\n\t" +#endif + "ldr r12, [%[state], #168]\n\t" + "ldr lr, [%[state], #172]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r4, r4, r10\n\t" + "eor r5, r5, r11\n\t" + "eor r4, r4, r12\n\t" + "eor r5, r5, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #8]\n\t" + "str r5, [sp, #16]\n\t" +#else + "strd r4, r5, [sp, #8]\n\t" +#endif + /* Calc t[0] */ + "eor r2, r2, r5, lsr #31\n\t" + "eor r3, r3, r4, lsr #31\n\t" + "eor r2, r2, r4, lsl #1\n\t" + "eor r3, r3, r5, lsl #1\n\t" + /* Calc b[0] and XOR t[0] into s[x*5+0] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state]]\n\t" + "ldr r5, [%[state], #8]\n\t" +#else + "ldrd r4, r5, [%[state]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #40]\n\t" + "ldr r7, [%[state], #48]\n\t" +#else + "ldrd r6, r7, [%[state], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #80]\n\t" + "ldr r9, [%[state], #88]\n\t" +#else + "ldrd r8, r9, [%[state], #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #120]\n\t" + "ldr r11, [%[state], #128]\n\t" +#else + "ldrd r10, r11, [%[state], #120]\n\t" +#endif + "eor r12, r4, r6\n\t" + "eor lr, r5, r7\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "eor r4, r4, r2\n\t" + "eor r5, r5, r3\n\t" + "eor r6, r6, r2\n\t" + "eor r7, r7, r3\n\t" + "eor r8, r8, r2\n\t" + "eor r9, r9, r3\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[state]]\n\t" + "str r5, [%[state], #8]\n\t" +#else + "strd r4, r5, [%[state]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[state], #40]\n\t" + "str r7, [%[state], #48]\n\t" +#else + "strd r6, r7, [%[state], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[state], #80]\n\t" + "str r9, [%[state], #88]\n\t" +#else + "strd r8, r9, [%[state], #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [%[state], #120]\n\t" + "str r11, [%[state], #128]\n\t" +#else + "strd r10, r11, [%[state], #120]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #160]\n\t" + "ldr r11, [%[state], #168]\n\t" +#else + "ldrd r10, r11, [%[state], #160]\n\t" +#endif + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [%[state], #160]\n\t" + "str r11, [%[state], #168]\n\t" +#else + "strd r10, r11, [%[state], #160]\n\t" +#endif + "str r12, [sp]\n\t" + "str lr, [sp, #4]\n\t" + /* Calc b[3] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #24]\n\t" + "ldr r5, [%[state], #32]\n\t" +#else + "ldrd r4, r5, [%[state], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #64]\n\t" + "ldr r7, [%[state], #72]\n\t" +#else + "ldrd r6, r7, [%[state], #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #104]\n\t" + "ldr r9, [%[state], #112]\n\t" +#else + "ldrd r8, r9, [%[state], #104]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #144]\n\t" + "ldr r11, [%[state], #152]\n\t" +#else + "ldrd r10, r11, [%[state], #144]\n\t" +#endif + "ldr r12, [%[state], #184]\n\t" + "ldr lr, [%[state], #188]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r4, r4, r10\n\t" + "eor r5, r5, r11\n\t" + "eor r4, r4, r12\n\t" + "eor r5, r5, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #24]\n\t" + "str r5, [sp, #32]\n\t" +#else + "strd r4, r5, [sp, #24]\n\t" +#endif + /* Calc t[2] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp, #8]\n\t" + "ldr r3, [sp, #16]\n\t" +#else + "ldrd r2, r3, [sp, #8]\n\t" +#endif + "eor r2, r2, r5, lsr #31\n\t" + "eor r3, r3, r4, lsr #31\n\t" + "eor r2, r2, r4, lsl #1\n\t" + "eor r3, r3, r5, lsl #1\n\t" + /* Calc b[2] and XOR t[2] into s[x*5+2] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #16]\n\t" + "ldr r5, [%[state], #24]\n\t" +#else + "ldrd r4, r5, [%[state], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #56]\n\t" + "ldr r7, [%[state], #64]\n\t" +#else + "ldrd r6, r7, [%[state], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #96]\n\t" + "ldr r9, [%[state], #104]\n\t" +#else + "ldrd r8, r9, [%[state], #96]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #136]\n\t" + "ldr r11, [%[state], #144]\n\t" +#else + "ldrd r10, r11, [%[state], #136]\n\t" +#endif + "eor r12, r4, r6\n\t" + "eor lr, r5, r7\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "eor r4, r4, r2\n\t" + "eor r5, r5, r3\n\t" + "eor r6, r6, r2\n\t" + "eor r7, r7, r3\n\t" + "eor r8, r8, r2\n\t" + "eor r9, r9, r3\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[state], #16]\n\t" + "str r5, [%[state], #24]\n\t" +#else + "strd r4, r5, [%[state], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[state], #56]\n\t" + "str r7, [%[state], #64]\n\t" +#else + "strd r6, r7, [%[state], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[state], #96]\n\t" + "str r9, [%[state], #104]\n\t" +#else + "strd r8, r9, [%[state], #96]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [%[state], #136]\n\t" + "str r11, [%[state], #144]\n\t" +#else + "strd r10, r11, [%[state], #136]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #176]\n\t" + "ldr r11, [%[state], #184]\n\t" +#else + "ldrd r10, r11, [%[state], #176]\n\t" +#endif + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [%[state], #176]\n\t" + "str r11, [%[state], #184]\n\t" +#else + "strd r10, r11, [%[state], #176]\n\t" +#endif + "str r12, [sp, #16]\n\t" + "str lr, [sp, #20]\n\t" + /* Calc t[1] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp]\n\t" + "ldr r3, [sp, #8]\n\t" +#else + "ldrd r2, r3, [sp]\n\t" +#endif + "eor r2, r2, lr, lsr #31\n\t" + "eor r3, r3, r12, lsr #31\n\t" + "eor r2, r2, r12, lsl #1\n\t" + "eor r3, r3, lr, lsl #1\n\t" + /* XOR t[1] into s[x*5+1] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #8]\n\t" + "ldr r5, [%[state], #16]\n\t" +#else + "ldrd r4, r5, [%[state], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #48]\n\t" + "ldr r7, [%[state], #56]\n\t" +#else + "ldrd r6, r7, [%[state], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #88]\n\t" + "ldr r9, [%[state], #96]\n\t" +#else + "ldrd r8, r9, [%[state], #88]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #128]\n\t" + "ldr r11, [%[state], #136]\n\t" +#else + "ldrd r10, r11, [%[state], #128]\n\t" +#endif + "ldr r12, [%[state], #168]\n\t" + "ldr lr, [%[state], #172]\n\t" + "eor r4, r4, r2\n\t" + "eor r5, r5, r3\n\t" + "eor r6, r6, r2\n\t" + "eor r7, r7, r3\n\t" + "eor r8, r8, r2\n\t" + "eor r9, r9, r3\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[state], #8]\n\t" + "str r5, [%[state], #16]\n\t" +#else + "strd r4, r5, [%[state], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[state], #48]\n\t" + "str r7, [%[state], #56]\n\t" +#else + "strd r6, r7, [%[state], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[state], #88]\n\t" + "str r9, [%[state], #96]\n\t" +#else + "strd r8, r9, [%[state], #88]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [%[state], #128]\n\t" + "str r11, [%[state], #136]\n\t" +#else + "strd r10, r11, [%[state], #128]\n\t" +#endif + "str r12, [%[state], #168]\n\t" + "str lr, [%[state], #172]\n\t" + /* Calc t[3] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp, #16]\n\t" + "ldr r3, [sp, #24]\n\t" +#else + "ldrd r2, r3, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #32]\n\t" + "ldr r5, [sp, #40]\n\t" +#else + "ldrd r4, r5, [sp, #32]\n\t" +#endif + "eor r2, r2, r5, lsr #31\n\t" + "eor r3, r3, r4, lsr #31\n\t" + "eor r2, r2, r4, lsl #1\n\t" + "eor r3, r3, r5, lsl #1\n\t" + /* XOR t[3] into s[x*5+3] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #24]\n\t" + "ldr r5, [%[state], #32]\n\t" +#else + "ldrd r4, r5, [%[state], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #64]\n\t" + "ldr r7, [%[state], #72]\n\t" +#else + "ldrd r6, r7, [%[state], #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #104]\n\t" + "ldr r9, [%[state], #112]\n\t" +#else + "ldrd r8, r9, [%[state], #104]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #144]\n\t" + "ldr r11, [%[state], #152]\n\t" +#else + "ldrd r10, r11, [%[state], #144]\n\t" +#endif + "ldr r12, [%[state], #184]\n\t" + "ldr lr, [%[state], #188]\n\t" + "eor r4, r4, r2\n\t" + "eor r5, r5, r3\n\t" + "eor r6, r6, r2\n\t" + "eor r7, r7, r3\n\t" + "eor r8, r8, r2\n\t" + "eor r9, r9, r3\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[state], #24]\n\t" + "str r5, [%[state], #32]\n\t" +#else + "strd r4, r5, [%[state], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[state], #64]\n\t" + "str r7, [%[state], #72]\n\t" +#else + "strd r6, r7, [%[state], #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[state], #104]\n\t" + "str r9, [%[state], #112]\n\t" +#else + "strd r8, r9, [%[state], #104]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [%[state], #144]\n\t" + "str r11, [%[state], #152]\n\t" +#else + "strd r10, r11, [%[state], #144]\n\t" +#endif + "str r12, [%[state], #184]\n\t" + "str lr, [%[state], #188]\n\t" + /* Calc t[4] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp, #24]\n\t" + "ldr r3, [sp, #32]\n\t" +#else + "ldrd r2, r3, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #8]\n\t" +#else + "ldrd r4, r5, [sp]\n\t" +#endif + "eor r2, r2, r5, lsr #31\n\t" + "eor r3, r3, r4, lsr #31\n\t" + "eor r2, r2, r4, lsl #1\n\t" + "eor r3, r3, r5, lsl #1\n\t" + /* XOR t[4] into s[x*5+4] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #32]\n\t" + "ldr r5, [%[state], #40]\n\t" +#else + "ldrd r4, r5, [%[state], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #72]\n\t" + "ldr r7, [%[state], #80]\n\t" +#else + "ldrd r6, r7, [%[state], #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #112]\n\t" + "ldr r9, [%[state], #120]\n\t" +#else + "ldrd r8, r9, [%[state], #112]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #152]\n\t" + "ldr r11, [%[state], #160]\n\t" +#else + "ldrd r10, r11, [%[state], #152]\n\t" +#endif + "ldr r12, [%[state], #192]\n\t" + "ldr lr, [%[state], #196]\n\t" + "eor r4, r4, r2\n\t" + "eor r5, r5, r3\n\t" + "eor r6, r6, r2\n\t" + "eor r7, r7, r3\n\t" + "eor r8, r8, r2\n\t" + "eor r9, r9, r3\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[state], #32]\n\t" + "str r5, [%[state], #40]\n\t" +#else + "strd r4, r5, [%[state], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [%[state], #72]\n\t" + "str r7, [%[state], #80]\n\t" +#else + "strd r6, r7, [%[state], #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [%[state], #112]\n\t" + "str r9, [%[state], #120]\n\t" +#else + "strd r8, r9, [%[state], #112]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [%[state], #152]\n\t" + "str r11, [%[state], #160]\n\t" +#else + "strd r10, r11, [%[state], #152]\n\t" +#endif + "str r12, [%[state], #192]\n\t" + "str lr, [%[state], #196]\n\t" + /* Row Mix */ + /* Row 0 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[state]]\n\t" + "ldr r3, [%[state], #8]\n\t" +#else + "ldrd r2, r3, [%[state]]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #48]\n\t" + "ldr r5, [%[state], #56]\n\t" +#else + "ldrd r4, r5, [%[state], #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #96]\n\t" + "ldr r7, [%[state], #104]\n\t" +#else + "ldrd r6, r7, [%[state], #96]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #144]\n\t" + "ldr r9, [%[state], #152]\n\t" +#else + "ldrd r8, r9, [%[state], #144]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #192]\n\t" + "ldr r11, [%[state], #200]\n\t" +#else + "ldrd r10, r11, [%[state], #192]\n\t" +#endif + /* s[1] <<< 44 */ + "mov lr, r4\n\t" + "lsr r12, r5, #20\n\t" + "lsr r4, r4, #20\n\t" + "orr r4, r4, r5, lsl #12\n\t" + "orr r5, r12, lr, lsl #12\n\t" + /* s[2] <<< 43 */ + "mov lr, r6\n\t" + "lsr r12, r7, #21\n\t" + "lsr r6, r6, #21\n\t" + "orr r6, r6, r7, lsl #11\n\t" + "orr r7, r12, lr, lsl #11\n\t" + /* s[3] <<< 21 */ + "lsr r12, r9, #11\n\t" + "lsr lr, r8, #11\n\t" + "orr r8, r12, r8, lsl #21\n\t" + "orr r9, lr, r9, lsl #21\n\t" + /* s[4] <<< 14 */ + "lsr r12, r11, #18\n\t" + "lsr lr, r10, #18\n\t" + "orr r10, r12, r10, lsl #14\n\t" + "orr r11, lr, r11, lsl #14\n\t" + "bic r12, r8, r6\n\t" + "bic lr, r9, r7\n\t" + "eor r12, r12, r4\n\t" + "eor lr, lr, r5\n\t" + "str r12, [sp, #8]\n\t" + "str lr, [sp, #12]\n\t" + "bic r12, r10, r8\n\t" + "bic lr, r11, r9\n\t" + "eor r12, r12, r6\n\t" + "eor lr, lr, r7\n\t" + "str r12, [sp, #16]\n\t" + "str lr, [sp, #20]\n\t" + "bic r12, r2, r10\n\t" + "bic lr, r3, r11\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "str r12, [sp, #24]\n\t" + "str lr, [sp, #28]\n\t" + "bic r12, r4, r2\n\t" + "bic lr, r5, r3\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [sp, #32]\n\t" + "str lr, [sp, #36]\n\t" + /* Get constant */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [r1]\n\t" + "ldr r11, [r1, #4]\n\t" +#else + "ldrd r10, r11, [r1]\n\t" +#endif + "add r1, r1, #8\n\t" + "bic r12, r6, r4\n\t" + "bic lr, r7, r5\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" + /* XOR in constant */ + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [sp]\n\t" + "str lr, [sp, #4]\n\t" + /* Row 1 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[state], #24]\n\t" + "ldr r3, [%[state], #32]\n\t" +#else + "ldrd r2, r3, [%[state], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #72]\n\t" + "ldr r5, [%[state], #80]\n\t" +#else + "ldrd r4, r5, [%[state], #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #80]\n\t" + "ldr r7, [%[state], #88]\n\t" +#else + "ldrd r6, r7, [%[state], #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #128]\n\t" + "ldr r9, [%[state], #136]\n\t" +#else + "ldrd r8, r9, [%[state], #128]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #176]\n\t" + "ldr r11, [%[state], #184]\n\t" +#else + "ldrd r10, r11, [%[state], #176]\n\t" +#endif + /* s[0] <<< 28 */ + "lsr r12, r3, #4\n\t" + "lsr lr, r2, #4\n\t" + "orr r2, r12, r2, lsl #28\n\t" + "orr r3, lr, r3, lsl #28\n\t" + /* s[1] <<< 20 */ + "lsr r12, r5, #12\n\t" + "lsr lr, r4, #12\n\t" + "orr r4, r12, r4, lsl #20\n\t" + "orr r5, lr, r5, lsl #20\n\t" + /* s[2] <<< 3 */ + "lsr r12, r7, #29\n\t" + "lsr lr, r6, #29\n\t" + "orr r6, r12, r6, lsl #3\n\t" + "orr r7, lr, r7, lsl #3\n\t" + /* s[3] <<< 45 */ + "mov lr, r8\n\t" + "lsr r12, r9, #19\n\t" + "lsr r8, r8, #19\n\t" + "orr r8, r8, r9, lsl #13\n\t" + "orr r9, r12, lr, lsl #13\n\t" + /* s[4] <<< 61 */ + "mov lr, r10\n\t" + "lsr r12, r11, #3\n\t" + "lsr r10, r10, #3\n\t" + "orr r10, r10, r11, lsl #29\n\t" + "orr r11, r12, lr, lsl #29\n\t" + "bic r12, r8, r6\n\t" + "bic lr, r9, r7\n\t" + "eor r12, r12, r4\n\t" + "eor lr, lr, r5\n\t" + "str r12, [sp, #48]\n\t" + "str lr, [sp, #52]\n\t" + "bic r12, r10, r8\n\t" + "bic lr, r11, r9\n\t" + "eor r12, r12, r6\n\t" + "eor lr, lr, r7\n\t" + "str r12, [sp, #56]\n\t" + "str lr, [sp, #60]\n\t" + "bic r12, r2, r10\n\t" + "bic lr, r3, r11\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "str r12, [sp, #64]\n\t" + "str lr, [sp, #68]\n\t" + "bic r12, r4, r2\n\t" + "bic lr, r5, r3\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [sp, #72]\n\t" + "str lr, [sp, #76]\n\t" + "bic r12, r6, r4\n\t" + "bic lr, r7, r5\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" + "str r12, [sp, #40]\n\t" + "str lr, [sp, #44]\n\t" + /* Row 2 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[state], #8]\n\t" + "ldr r3, [%[state], #16]\n\t" +#else + "ldrd r2, r3, [%[state], #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #56]\n\t" + "ldr r5, [%[state], #64]\n\t" +#else + "ldrd r4, r5, [%[state], #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #104]\n\t" + "ldr r7, [%[state], #112]\n\t" +#else + "ldrd r6, r7, [%[state], #104]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #152]\n\t" + "ldr r9, [%[state], #160]\n\t" +#else + "ldrd r8, r9, [%[state], #152]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #160]\n\t" + "ldr r11, [%[state], #168]\n\t" +#else + "ldrd r10, r11, [%[state], #160]\n\t" +#endif + /* s[0] <<< 1 */ + "lsr r12, r3, #31\n\t" + "lsr lr, r2, #31\n\t" + "orr r2, r12, r2, lsl #1\n\t" + "orr r3, lr, r3, lsl #1\n\t" + /* s[1] <<< 6 */ + "lsr r12, r5, #26\n\t" + "lsr lr, r4, #26\n\t" + "orr r4, r12, r4, lsl #6\n\t" + "orr r5, lr, r5, lsl #6\n\t" + /* s[2] <<< 25 */ + "lsr r12, r7, #7\n\t" + "lsr lr, r6, #7\n\t" + "orr r6, r12, r6, lsl #25\n\t" + "orr r7, lr, r7, lsl #25\n\t" + /* s[3] <<< 8 */ + "lsr r12, r9, #24\n\t" + "lsr lr, r8, #24\n\t" + "orr r8, r12, r8, lsl #8\n\t" + "orr r9, lr, r9, lsl #8\n\t" + /* s[4] <<< 18 */ + "lsr r12, r11, #14\n\t" + "lsr lr, r10, #14\n\t" + "orr r10, r12, r10, lsl #18\n\t" + "orr r11, lr, r11, lsl #18\n\t" + "bic r12, r8, r6\n\t" + "bic lr, r9, r7\n\t" + "eor r12, r12, r4\n\t" + "eor lr, lr, r5\n\t" + "str r12, [sp, #88]\n\t" + "str lr, [sp, #92]\n\t" + "bic r12, r10, r8\n\t" + "bic lr, r11, r9\n\t" + "eor r12, r12, r6\n\t" + "eor lr, lr, r7\n\t" + "str r12, [sp, #96]\n\t" + "str lr, [sp, #100]\n\t" + "bic r12, r2, r10\n\t" + "bic lr, r3, r11\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "str r12, [sp, #104]\n\t" + "str lr, [sp, #108]\n\t" + "bic r12, r4, r2\n\t" + "bic lr, r5, r3\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [sp, #112]\n\t" + "str lr, [sp, #116]\n\t" + "bic r12, r6, r4\n\t" + "bic lr, r7, r5\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" + "str r12, [sp, #80]\n\t" + "str lr, [sp, #84]\n\t" + /* Row 3 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[state], #32]\n\t" + "ldr r3, [%[state], #40]\n\t" +#else + "ldrd r2, r3, [%[state], #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #40]\n\t" + "ldr r5, [%[state], #48]\n\t" +#else + "ldrd r4, r5, [%[state], #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #88]\n\t" + "ldr r7, [%[state], #96]\n\t" +#else + "ldrd r6, r7, [%[state], #88]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #136]\n\t" + "ldr r9, [%[state], #144]\n\t" +#else + "ldrd r8, r9, [%[state], #136]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #184]\n\t" + "ldr r11, [%[state], #192]\n\t" +#else + "ldrd r10, r11, [%[state], #184]\n\t" +#endif + /* s[0] <<< 27 */ + "lsr r12, r3, #5\n\t" + "lsr lr, r2, #5\n\t" + "orr r2, r12, r2, lsl #27\n\t" + "orr r3, lr, r3, lsl #27\n\t" + /* s[1] <<< 36 */ + "mov lr, r4\n\t" + "lsr r12, r5, #28\n\t" + "lsr r4, r4, #28\n\t" + "orr r4, r4, r5, lsl #4\n\t" + "orr r5, r12, lr, lsl #4\n\t" + /* s[2] <<< 10 */ + "lsr r12, r7, #22\n\t" + "lsr lr, r6, #22\n\t" + "orr r6, r12, r6, lsl #10\n\t" + "orr r7, lr, r7, lsl #10\n\t" + /* s[3] <<< 15 */ + "lsr r12, r9, #17\n\t" + "lsr lr, r8, #17\n\t" + "orr r8, r12, r8, lsl #15\n\t" + "orr r9, lr, r9, lsl #15\n\t" + /* s[4] <<< 56 */ + "mov lr, r10\n\t" + "lsr r12, r11, #8\n\t" + "lsr r10, r10, #8\n\t" + "orr r10, r10, r11, lsl #24\n\t" + "orr r11, r12, lr, lsl #24\n\t" + "bic r12, r8, r6\n\t" + "bic lr, r9, r7\n\t" + "eor r12, r12, r4\n\t" + "eor lr, lr, r5\n\t" + "str r12, [sp, #128]\n\t" + "str lr, [sp, #132]\n\t" + "bic r12, r10, r8\n\t" + "bic lr, r11, r9\n\t" + "eor r12, r12, r6\n\t" + "eor lr, lr, r7\n\t" + "str r12, [sp, #136]\n\t" + "str lr, [sp, #140]\n\t" + "bic r12, r2, r10\n\t" + "bic lr, r3, r11\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "str r12, [sp, #144]\n\t" + "str lr, [sp, #148]\n\t" + "bic r12, r4, r2\n\t" + "bic lr, r5, r3\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [sp, #152]\n\t" + "str lr, [sp, #156]\n\t" + "bic r12, r6, r4\n\t" + "bic lr, r7, r5\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" + "str r12, [sp, #120]\n\t" + "str lr, [sp, #124]\n\t" + /* Row 4 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[state], #16]\n\t" + "ldr r3, [%[state], #24]\n\t" +#else + "ldrd r2, r3, [%[state], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #64]\n\t" + "ldr r5, [%[state], #72]\n\t" +#else + "ldrd r4, r5, [%[state], #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [%[state], #112]\n\t" + "ldr r7, [%[state], #120]\n\t" +#else + "ldrd r6, r7, [%[state], #112]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [%[state], #120]\n\t" + "ldr r9, [%[state], #128]\n\t" +#else + "ldrd r8, r9, [%[state], #120]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [%[state], #168]\n\t" + "ldr r11, [%[state], #176]\n\t" +#else + "ldrd r10, r11, [%[state], #168]\n\t" +#endif + /* s[0] <<< 62 */ + "mov lr, r2\n\t" + "lsr r12, r3, #2\n\t" + "lsr r2, r2, #2\n\t" + "orr r2, r2, r3, lsl #30\n\t" + "orr r3, r12, lr, lsl #30\n\t" + /* s[1] <<< 55 */ + "mov lr, r4\n\t" + "lsr r12, r5, #9\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "orr r5, r12, lr, lsl #23\n\t" + /* s[2] <<< 39 */ + "mov lr, r6\n\t" + "lsr r12, r7, #25\n\t" + "lsr r6, r6, #25\n\t" + "orr r6, r6, r7, lsl #7\n\t" + "orr r7, r12, lr, lsl #7\n\t" + /* s[3] <<< 41 */ + "mov lr, r8\n\t" + "lsr r12, r9, #23\n\t" + "lsr r8, r8, #23\n\t" + "orr r8, r8, r9, lsl #9\n\t" + "orr r9, r12, lr, lsl #9\n\t" + /* s[4] <<< 2 */ + "lsr r12, r11, #30\n\t" + "lsr lr, r10, #30\n\t" + "orr r10, r12, r10, lsl #2\n\t" + "orr r11, lr, r11, lsl #2\n\t" + "bic r12, r8, r6\n\t" + "bic lr, r9, r7\n\t" + "eor r12, r12, r4\n\t" + "eor lr, lr, r5\n\t" + "str r12, [sp, #168]\n\t" + "str lr, [sp, #172]\n\t" + "bic r12, r10, r8\n\t" + "bic lr, r11, r9\n\t" + "eor r12, r12, r6\n\t" + "eor lr, lr, r7\n\t" + "str r12, [sp, #176]\n\t" + "str lr, [sp, #180]\n\t" + "bic r12, r2, r10\n\t" + "bic lr, r3, r11\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "str r12, [sp, #184]\n\t" + "str lr, [sp, #188]\n\t" + "bic r12, r4, r2\n\t" + "bic lr, r5, r3\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [sp, #192]\n\t" + "str lr, [sp, #196]\n\t" + "bic r12, r6, r4\n\t" + "bic lr, r7, r5\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" + "str r12, [sp, #160]\n\t" + "str lr, [sp, #164]\n\t" + /* Round odd */ + /* Calc b[4] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #32]\n\t" + "ldr r5, [sp, #40]\n\t" +#else + "ldrd r4, r5, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #72]\n\t" + "ldr r7, [sp, #80]\n\t" +#else + "ldrd r6, r7, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #112]\n\t" + "ldr r9, [sp, #120]\n\t" +#else + "ldrd r8, r9, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #152]\n\t" + "ldr r11, [sp, #160]\n\t" +#else + "ldrd r10, r11, [sp, #152]\n\t" +#endif + "ldr r12, [sp, #192]\n\t" + "ldr lr, [sp, #196]\n\t" + "eor r2, r4, r6\n\t" + "eor r3, r5, r7\n\t" + "eor r2, r2, r8\n\t" + "eor r3, r3, r9\n\t" + "eor r2, r2, r10\n\t" + "eor r3, r3, r11\n\t" + "eor r2, r2, r12\n\t" + "eor r3, r3, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r2, [%[state], #32]\n\t" + "str r3, [%[state], #40]\n\t" +#else + "strd r2, r3, [%[state], #32]\n\t" +#endif + /* Calc b[1] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #8]\n\t" + "ldr r5, [sp, #16]\n\t" +#else + "ldrd r4, r5, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #48]\n\t" + "ldr r7, [sp, #56]\n\t" +#else + "ldrd r6, r7, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #88]\n\t" + "ldr r9, [sp, #96]\n\t" +#else + "ldrd r8, r9, [sp, #88]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #128]\n\t" + "ldr r11, [sp, #136]\n\t" +#else + "ldrd r10, r11, [sp, #128]\n\t" +#endif + "ldr r12, [sp, #168]\n\t" + "ldr lr, [sp, #172]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r4, r4, r10\n\t" + "eor r5, r5, r11\n\t" + "eor r4, r4, r12\n\t" + "eor r5, r5, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[state], #8]\n\t" + "str r5, [%[state], #16]\n\t" +#else + "strd r4, r5, [%[state], #8]\n\t" +#endif + /* Calc t[0] */ + "eor r2, r2, r5, lsr #31\n\t" + "eor r3, r3, r4, lsr #31\n\t" + "eor r2, r2, r4, lsl #1\n\t" + "eor r3, r3, r5, lsl #1\n\t" + /* Calc b[0] and XOR t[0] into s[x*5+0] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp]\n\t" + "ldr r5, [sp, #8]\n\t" +#else + "ldrd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #40]\n\t" + "ldr r7, [sp, #48]\n\t" +#else + "ldrd r6, r7, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #80]\n\t" + "ldr r9, [sp, #88]\n\t" +#else + "ldrd r8, r9, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #120]\n\t" + "ldr r11, [sp, #128]\n\t" +#else + "ldrd r10, r11, [sp, #120]\n\t" +#endif + "eor r12, r4, r6\n\t" + "eor lr, r5, r7\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "eor r4, r4, r2\n\t" + "eor r5, r5, r3\n\t" + "eor r6, r6, r2\n\t" + "eor r7, r7, r3\n\t" + "eor r8, r8, r2\n\t" + "eor r9, r9, r3\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp]\n\t" + "str r5, [sp, #8]\n\t" +#else + "strd r4, r5, [sp]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #40]\n\t" + "str r7, [sp, #48]\n\t" +#else + "strd r6, r7, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #80]\n\t" + "str r9, [sp, #88]\n\t" +#else + "strd r8, r9, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #120]\n\t" + "str r11, [sp, #128]\n\t" +#else + "strd r10, r11, [sp, #120]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #160]\n\t" + "ldr r11, [sp, #168]\n\t" +#else + "ldrd r10, r11, [sp, #160]\n\t" +#endif + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #160]\n\t" + "str r11, [sp, #168]\n\t" +#else + "strd r10, r11, [sp, #160]\n\t" +#endif + "str r12, [%[state]]\n\t" + "str lr, [%[state], #4]\n\t" + /* Calc b[3] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #24]\n\t" + "ldr r5, [sp, #32]\n\t" +#else + "ldrd r4, r5, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #64]\n\t" + "ldr r7, [sp, #72]\n\t" +#else + "ldrd r6, r7, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #104]\n\t" + "ldr r9, [sp, #112]\n\t" +#else + "ldrd r8, r9, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #144]\n\t" + "ldr r11, [sp, #152]\n\t" +#else + "ldrd r10, r11, [sp, #144]\n\t" +#endif + "ldr r12, [sp, #184]\n\t" + "ldr lr, [sp, #188]\n\t" + "eor r4, r4, r6\n\t" + "eor r5, r5, r7\n\t" + "eor r4, r4, r8\n\t" + "eor r5, r5, r9\n\t" + "eor r4, r4, r10\n\t" + "eor r5, r5, r11\n\t" + "eor r4, r4, r12\n\t" + "eor r5, r5, lr\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [%[state], #24]\n\t" + "str r5, [%[state], #32]\n\t" +#else + "strd r4, r5, [%[state], #24]\n\t" +#endif + /* Calc t[2] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[state], #8]\n\t" + "ldr r3, [%[state], #16]\n\t" +#else + "ldrd r2, r3, [%[state], #8]\n\t" +#endif + "eor r2, r2, r5, lsr #31\n\t" + "eor r3, r3, r4, lsr #31\n\t" + "eor r2, r2, r4, lsl #1\n\t" + "eor r3, r3, r5, lsl #1\n\t" + /* Calc b[2] and XOR t[2] into s[x*5+2] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #16]\n\t" + "ldr r5, [sp, #24]\n\t" +#else + "ldrd r4, r5, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #56]\n\t" + "ldr r7, [sp, #64]\n\t" +#else + "ldrd r6, r7, [sp, #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #96]\n\t" + "ldr r9, [sp, #104]\n\t" +#else + "ldrd r8, r9, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #136]\n\t" + "ldr r11, [sp, #144]\n\t" +#else + "ldrd r10, r11, [sp, #136]\n\t" +#endif + "eor r12, r4, r6\n\t" + "eor lr, r5, r7\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "eor r4, r4, r2\n\t" + "eor r5, r5, r3\n\t" + "eor r6, r6, r2\n\t" + "eor r7, r7, r3\n\t" + "eor r8, r8, r2\n\t" + "eor r9, r9, r3\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #16]\n\t" + "str r5, [sp, #24]\n\t" +#else + "strd r4, r5, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #56]\n\t" + "str r7, [sp, #64]\n\t" +#else + "strd r6, r7, [sp, #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #96]\n\t" + "str r9, [sp, #104]\n\t" +#else + "strd r8, r9, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #136]\n\t" + "str r11, [sp, #144]\n\t" +#else + "strd r10, r11, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #176]\n\t" + "ldr r11, [sp, #184]\n\t" +#else + "ldrd r10, r11, [sp, #176]\n\t" +#endif + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #176]\n\t" + "str r11, [sp, #184]\n\t" +#else + "strd r10, r11, [sp, #176]\n\t" +#endif + "str r12, [%[state], #16]\n\t" + "str lr, [%[state], #20]\n\t" + /* Calc t[1] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[state]]\n\t" + "ldr r3, [%[state], #8]\n\t" +#else + "ldrd r2, r3, [%[state]]\n\t" +#endif + "eor r2, r2, lr, lsr #31\n\t" + "eor r3, r3, r12, lsr #31\n\t" + "eor r2, r2, r12, lsl #1\n\t" + "eor r3, r3, lr, lsl #1\n\t" + /* XOR t[1] into s[x*5+1] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #8]\n\t" + "ldr r5, [sp, #16]\n\t" +#else + "ldrd r4, r5, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #48]\n\t" + "ldr r7, [sp, #56]\n\t" +#else + "ldrd r6, r7, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #88]\n\t" + "ldr r9, [sp, #96]\n\t" +#else + "ldrd r8, r9, [sp, #88]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #128]\n\t" + "ldr r11, [sp, #136]\n\t" +#else + "ldrd r10, r11, [sp, #128]\n\t" +#endif + "ldr r12, [sp, #168]\n\t" + "ldr lr, [sp, #172]\n\t" + "eor r4, r4, r2\n\t" + "eor r5, r5, r3\n\t" + "eor r6, r6, r2\n\t" + "eor r7, r7, r3\n\t" + "eor r8, r8, r2\n\t" + "eor r9, r9, r3\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #8]\n\t" + "str r5, [sp, #16]\n\t" +#else + "strd r4, r5, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #48]\n\t" + "str r7, [sp, #56]\n\t" +#else + "strd r6, r7, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #88]\n\t" + "str r9, [sp, #96]\n\t" +#else + "strd r8, r9, [sp, #88]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #128]\n\t" + "str r11, [sp, #136]\n\t" +#else + "strd r10, r11, [sp, #128]\n\t" +#endif + "str r12, [sp, #168]\n\t" + "str lr, [sp, #172]\n\t" + /* Calc t[3] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[state], #16]\n\t" + "ldr r3, [%[state], #24]\n\t" +#else + "ldrd r2, r3, [%[state], #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state], #32]\n\t" + "ldr r5, [%[state], #40]\n\t" +#else + "ldrd r4, r5, [%[state], #32]\n\t" +#endif + "eor r2, r2, r5, lsr #31\n\t" + "eor r3, r3, r4, lsr #31\n\t" + "eor r2, r2, r4, lsl #1\n\t" + "eor r3, r3, r5, lsl #1\n\t" + /* XOR t[3] into s[x*5+3] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #24]\n\t" + "ldr r5, [sp, #32]\n\t" +#else + "ldrd r4, r5, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #64]\n\t" + "ldr r7, [sp, #72]\n\t" +#else + "ldrd r6, r7, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #104]\n\t" + "ldr r9, [sp, #112]\n\t" +#else + "ldrd r8, r9, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #144]\n\t" + "ldr r11, [sp, #152]\n\t" +#else + "ldrd r10, r11, [sp, #144]\n\t" +#endif + "ldr r12, [sp, #184]\n\t" + "ldr lr, [sp, #188]\n\t" + "eor r4, r4, r2\n\t" + "eor r5, r5, r3\n\t" + "eor r6, r6, r2\n\t" + "eor r7, r7, r3\n\t" + "eor r8, r8, r2\n\t" + "eor r9, r9, r3\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #24]\n\t" + "str r5, [sp, #32]\n\t" +#else + "strd r4, r5, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #64]\n\t" + "str r7, [sp, #72]\n\t" +#else + "strd r6, r7, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #104]\n\t" + "str r9, [sp, #112]\n\t" +#else + "strd r8, r9, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #144]\n\t" + "str r11, [sp, #152]\n\t" +#else + "strd r10, r11, [sp, #144]\n\t" +#endif + "str r12, [sp, #184]\n\t" + "str lr, [sp, #188]\n\t" + /* Calc t[4] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [%[state], #24]\n\t" + "ldr r3, [%[state], #32]\n\t" +#else + "ldrd r2, r3, [%[state], #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [%[state]]\n\t" + "ldr r5, [%[state], #8]\n\t" +#else + "ldrd r4, r5, [%[state]]\n\t" +#endif + "eor r2, r2, r5, lsr #31\n\t" + "eor r3, r3, r4, lsr #31\n\t" + "eor r2, r2, r4, lsl #1\n\t" + "eor r3, r3, r5, lsl #1\n\t" + /* XOR t[4] into s[x*5+4] */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #32]\n\t" + "ldr r5, [sp, #40]\n\t" +#else + "ldrd r4, r5, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #72]\n\t" + "ldr r7, [sp, #80]\n\t" +#else + "ldrd r6, r7, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #112]\n\t" + "ldr r9, [sp, #120]\n\t" +#else + "ldrd r8, r9, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #152]\n\t" + "ldr r11, [sp, #160]\n\t" +#else + "ldrd r10, r11, [sp, #152]\n\t" +#endif + "ldr r12, [sp, #192]\n\t" + "ldr lr, [sp, #196]\n\t" + "eor r4, r4, r2\n\t" + "eor r5, r5, r3\n\t" + "eor r6, r6, r2\n\t" + "eor r7, r7, r3\n\t" + "eor r8, r8, r2\n\t" + "eor r9, r9, r3\n\t" + "eor r10, r10, r2\n\t" + "eor r11, r11, r3\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r4, [sp, #32]\n\t" + "str r5, [sp, #40]\n\t" +#else + "strd r4, r5, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r6, [sp, #72]\n\t" + "str r7, [sp, #80]\n\t" +#else + "strd r6, r7, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r8, [sp, #112]\n\t" + "str r9, [sp, #120]\n\t" +#else + "strd r8, r9, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "str r10, [sp, #152]\n\t" + "str r11, [sp, #160]\n\t" +#else + "strd r10, r11, [sp, #152]\n\t" +#endif + "str r12, [sp, #192]\n\t" + "str lr, [sp, #196]\n\t" + /* Row Mix */ + /* Row 0 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp]\n\t" + "ldr r3, [sp, #8]\n\t" +#else + "ldrd r2, r3, [sp]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #48]\n\t" + "ldr r5, [sp, #56]\n\t" +#else + "ldrd r4, r5, [sp, #48]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #96]\n\t" + "ldr r7, [sp, #104]\n\t" +#else + "ldrd r6, r7, [sp, #96]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #144]\n\t" + "ldr r9, [sp, #152]\n\t" +#else + "ldrd r8, r9, [sp, #144]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #192]\n\t" + "ldr r11, [sp, #200]\n\t" +#else + "ldrd r10, r11, [sp, #192]\n\t" +#endif + /* s[1] <<< 44 */ + "mov lr, r4\n\t" + "lsr r12, r5, #20\n\t" + "lsr r4, r4, #20\n\t" + "orr r4, r4, r5, lsl #12\n\t" + "orr r5, r12, lr, lsl #12\n\t" + /* s[2] <<< 43 */ + "mov lr, r6\n\t" + "lsr r12, r7, #21\n\t" + "lsr r6, r6, #21\n\t" + "orr r6, r6, r7, lsl #11\n\t" + "orr r7, r12, lr, lsl #11\n\t" + /* s[3] <<< 21 */ + "lsr r12, r9, #11\n\t" + "lsr lr, r8, #11\n\t" + "orr r8, r12, r8, lsl #21\n\t" + "orr r9, lr, r9, lsl #21\n\t" + /* s[4] <<< 14 */ + "lsr r12, r11, #18\n\t" + "lsr lr, r10, #18\n\t" + "orr r10, r12, r10, lsl #14\n\t" + "orr r11, lr, r11, lsl #14\n\t" + "bic r12, r8, r6\n\t" + "bic lr, r9, r7\n\t" + "eor r12, r12, r4\n\t" + "eor lr, lr, r5\n\t" + "str r12, [%[state], #8]\n\t" + "str lr, [%[state], #12]\n\t" + "bic r12, r10, r8\n\t" + "bic lr, r11, r9\n\t" + "eor r12, r12, r6\n\t" + "eor lr, lr, r7\n\t" + "str r12, [%[state], #16]\n\t" + "str lr, [%[state], #20]\n\t" + "bic r12, r2, r10\n\t" + "bic lr, r3, r11\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "str r12, [%[state], #24]\n\t" + "str lr, [%[state], #28]\n\t" + "bic r12, r4, r2\n\t" + "bic lr, r5, r3\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [%[state], #32]\n\t" + "str lr, [%[state], #36]\n\t" + /* Get constant */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [r1]\n\t" + "ldr r11, [r1, #4]\n\t" +#else + "ldrd r10, r11, [r1]\n\t" +#endif + "add r1, r1, #8\n\t" + "bic r12, r6, r4\n\t" + "bic lr, r7, r5\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" + /* XOR in constant */ + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [%[state]]\n\t" + "str lr, [%[state], #4]\n\t" + /* Row 1 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp, #24]\n\t" + "ldr r3, [sp, #32]\n\t" +#else + "ldrd r2, r3, [sp, #24]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #72]\n\t" + "ldr r5, [sp, #80]\n\t" +#else + "ldrd r4, r5, [sp, #72]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #80]\n\t" + "ldr r7, [sp, #88]\n\t" +#else + "ldrd r6, r7, [sp, #80]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #128]\n\t" + "ldr r9, [sp, #136]\n\t" +#else + "ldrd r8, r9, [sp, #128]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #176]\n\t" + "ldr r11, [sp, #184]\n\t" +#else + "ldrd r10, r11, [sp, #176]\n\t" +#endif + /* s[0] <<< 28 */ + "lsr r12, r3, #4\n\t" + "lsr lr, r2, #4\n\t" + "orr r2, r12, r2, lsl #28\n\t" + "orr r3, lr, r3, lsl #28\n\t" + /* s[1] <<< 20 */ + "lsr r12, r5, #12\n\t" + "lsr lr, r4, #12\n\t" + "orr r4, r12, r4, lsl #20\n\t" + "orr r5, lr, r5, lsl #20\n\t" + /* s[2] <<< 3 */ + "lsr r12, r7, #29\n\t" + "lsr lr, r6, #29\n\t" + "orr r6, r12, r6, lsl #3\n\t" + "orr r7, lr, r7, lsl #3\n\t" + /* s[3] <<< 45 */ + "mov lr, r8\n\t" + "lsr r12, r9, #19\n\t" + "lsr r8, r8, #19\n\t" + "orr r8, r8, r9, lsl #13\n\t" + "orr r9, r12, lr, lsl #13\n\t" + /* s[4] <<< 61 */ + "mov lr, r10\n\t" + "lsr r12, r11, #3\n\t" + "lsr r10, r10, #3\n\t" + "orr r10, r10, r11, lsl #29\n\t" + "orr r11, r12, lr, lsl #29\n\t" + "bic r12, r8, r6\n\t" + "bic lr, r9, r7\n\t" + "eor r12, r12, r4\n\t" + "eor lr, lr, r5\n\t" + "str r12, [%[state], #48]\n\t" + "str lr, [%[state], #52]\n\t" + "bic r12, r10, r8\n\t" + "bic lr, r11, r9\n\t" + "eor r12, r12, r6\n\t" + "eor lr, lr, r7\n\t" + "str r12, [%[state], #56]\n\t" + "str lr, [%[state], #60]\n\t" + "bic r12, r2, r10\n\t" + "bic lr, r3, r11\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "str r12, [%[state], #64]\n\t" + "str lr, [%[state], #68]\n\t" + "bic r12, r4, r2\n\t" + "bic lr, r5, r3\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [%[state], #72]\n\t" + "str lr, [%[state], #76]\n\t" + "bic r12, r6, r4\n\t" + "bic lr, r7, r5\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" + "str r12, [%[state], #40]\n\t" + "str lr, [%[state], #44]\n\t" + /* Row 2 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp, #8]\n\t" + "ldr r3, [sp, #16]\n\t" +#else + "ldrd r2, r3, [sp, #8]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #56]\n\t" + "ldr r5, [sp, #64]\n\t" +#else + "ldrd r4, r5, [sp, #56]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #104]\n\t" + "ldr r7, [sp, #112]\n\t" +#else + "ldrd r6, r7, [sp, #104]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #152]\n\t" + "ldr r9, [sp, #160]\n\t" +#else + "ldrd r8, r9, [sp, #152]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #160]\n\t" + "ldr r11, [sp, #168]\n\t" +#else + "ldrd r10, r11, [sp, #160]\n\t" +#endif + /* s[0] <<< 1 */ + "lsr r12, r3, #31\n\t" + "lsr lr, r2, #31\n\t" + "orr r2, r12, r2, lsl #1\n\t" + "orr r3, lr, r3, lsl #1\n\t" + /* s[1] <<< 6 */ + "lsr r12, r5, #26\n\t" + "lsr lr, r4, #26\n\t" + "orr r4, r12, r4, lsl #6\n\t" + "orr r5, lr, r5, lsl #6\n\t" + /* s[2] <<< 25 */ + "lsr r12, r7, #7\n\t" + "lsr lr, r6, #7\n\t" + "orr r6, r12, r6, lsl #25\n\t" + "orr r7, lr, r7, lsl #25\n\t" + /* s[3] <<< 8 */ + "lsr r12, r9, #24\n\t" + "lsr lr, r8, #24\n\t" + "orr r8, r12, r8, lsl #8\n\t" + "orr r9, lr, r9, lsl #8\n\t" + /* s[4] <<< 18 */ + "lsr r12, r11, #14\n\t" + "lsr lr, r10, #14\n\t" + "orr r10, r12, r10, lsl #18\n\t" + "orr r11, lr, r11, lsl #18\n\t" + "bic r12, r8, r6\n\t" + "bic lr, r9, r7\n\t" + "eor r12, r12, r4\n\t" + "eor lr, lr, r5\n\t" + "str r12, [%[state], #88]\n\t" + "str lr, [%[state], #92]\n\t" + "bic r12, r10, r8\n\t" + "bic lr, r11, r9\n\t" + "eor r12, r12, r6\n\t" + "eor lr, lr, r7\n\t" + "str r12, [%[state], #96]\n\t" + "str lr, [%[state], #100]\n\t" + "bic r12, r2, r10\n\t" + "bic lr, r3, r11\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "str r12, [%[state], #104]\n\t" + "str lr, [%[state], #108]\n\t" + "bic r12, r4, r2\n\t" + "bic lr, r5, r3\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [%[state], #112]\n\t" + "str lr, [%[state], #116]\n\t" + "bic r12, r6, r4\n\t" + "bic lr, r7, r5\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" + "str r12, [%[state], #80]\n\t" + "str lr, [%[state], #84]\n\t" + /* Row 3 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp, #32]\n\t" + "ldr r3, [sp, #40]\n\t" +#else + "ldrd r2, r3, [sp, #32]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #40]\n\t" + "ldr r5, [sp, #48]\n\t" +#else + "ldrd r4, r5, [sp, #40]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #88]\n\t" + "ldr r7, [sp, #96]\n\t" +#else + "ldrd r6, r7, [sp, #88]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #136]\n\t" + "ldr r9, [sp, #144]\n\t" +#else + "ldrd r8, r9, [sp, #136]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #184]\n\t" + "ldr r11, [sp, #192]\n\t" +#else + "ldrd r10, r11, [sp, #184]\n\t" +#endif + /* s[0] <<< 27 */ + "lsr r12, r3, #5\n\t" + "lsr lr, r2, #5\n\t" + "orr r2, r12, r2, lsl #27\n\t" + "orr r3, lr, r3, lsl #27\n\t" + /* s[1] <<< 36 */ + "mov lr, r4\n\t" + "lsr r12, r5, #28\n\t" + "lsr r4, r4, #28\n\t" + "orr r4, r4, r5, lsl #4\n\t" + "orr r5, r12, lr, lsl #4\n\t" + /* s[2] <<< 10 */ + "lsr r12, r7, #22\n\t" + "lsr lr, r6, #22\n\t" + "orr r6, r12, r6, lsl #10\n\t" + "orr r7, lr, r7, lsl #10\n\t" + /* s[3] <<< 15 */ + "lsr r12, r9, #17\n\t" + "lsr lr, r8, #17\n\t" + "orr r8, r12, r8, lsl #15\n\t" + "orr r9, lr, r9, lsl #15\n\t" + /* s[4] <<< 56 */ + "mov lr, r10\n\t" + "lsr r12, r11, #8\n\t" + "lsr r10, r10, #8\n\t" + "orr r10, r10, r11, lsl #24\n\t" + "orr r11, r12, lr, lsl #24\n\t" + "bic r12, r8, r6\n\t" + "bic lr, r9, r7\n\t" + "eor r12, r12, r4\n\t" + "eor lr, lr, r5\n\t" + "str r12, [%[state], #128]\n\t" + "str lr, [%[state], #132]\n\t" + "bic r12, r10, r8\n\t" + "bic lr, r11, r9\n\t" + "eor r12, r12, r6\n\t" + "eor lr, lr, r7\n\t" + "str r12, [%[state], #136]\n\t" + "str lr, [%[state], #140]\n\t" + "bic r12, r2, r10\n\t" + "bic lr, r3, r11\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "str r12, [%[state], #144]\n\t" + "str lr, [%[state], #148]\n\t" + "bic r12, r4, r2\n\t" + "bic lr, r5, r3\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [%[state], #152]\n\t" + "str lr, [%[state], #156]\n\t" + "bic r12, r6, r4\n\t" + "bic lr, r7, r5\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" + "str r12, [%[state], #120]\n\t" + "str lr, [%[state], #124]\n\t" + /* Row 4 */ +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r2, [sp, #16]\n\t" + "ldr r3, [sp, #24]\n\t" +#else + "ldrd r2, r3, [sp, #16]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r4, [sp, #64]\n\t" + "ldr r5, [sp, #72]\n\t" +#else + "ldrd r4, r5, [sp, #64]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r6, [sp, #112]\n\t" + "ldr r7, [sp, #120]\n\t" +#else + "ldrd r6, r7, [sp, #112]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r8, [sp, #120]\n\t" + "ldr r9, [sp, #128]\n\t" +#else + "ldrd r8, r9, [sp, #120]\n\t" +#endif +#if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) + "ldr r10, [sp, #168]\n\t" + "ldr r11, [sp, #176]\n\t" +#else + "ldrd r10, r11, [sp, #168]\n\t" +#endif + /* s[0] <<< 62 */ + "mov lr, r2\n\t" + "lsr r12, r3, #2\n\t" + "lsr r2, r2, #2\n\t" + "orr r2, r2, r3, lsl #30\n\t" + "orr r3, r12, lr, lsl #30\n\t" + /* s[1] <<< 55 */ + "mov lr, r4\n\t" + "lsr r12, r5, #9\n\t" + "lsr r4, r4, #9\n\t" + "orr r4, r4, r5, lsl #23\n\t" + "orr r5, r12, lr, lsl #23\n\t" + /* s[2] <<< 39 */ + "mov lr, r6\n\t" + "lsr r12, r7, #25\n\t" + "lsr r6, r6, #25\n\t" + "orr r6, r6, r7, lsl #7\n\t" + "orr r7, r12, lr, lsl #7\n\t" + /* s[3] <<< 41 */ + "mov lr, r8\n\t" + "lsr r12, r9, #23\n\t" + "lsr r8, r8, #23\n\t" + "orr r8, r8, r9, lsl #9\n\t" + "orr r9, r12, lr, lsl #9\n\t" + /* s[4] <<< 2 */ + "lsr r12, r11, #30\n\t" + "lsr lr, r10, #30\n\t" + "orr r10, r12, r10, lsl #2\n\t" + "orr r11, lr, r11, lsl #2\n\t" + "bic r12, r8, r6\n\t" + "bic lr, r9, r7\n\t" + "eor r12, r12, r4\n\t" + "eor lr, lr, r5\n\t" + "str r12, [%[state], #168]\n\t" + "str lr, [%[state], #172]\n\t" + "bic r12, r10, r8\n\t" + "bic lr, r11, r9\n\t" + "eor r12, r12, r6\n\t" + "eor lr, lr, r7\n\t" + "str r12, [%[state], #176]\n\t" + "str lr, [%[state], #180]\n\t" + "bic r12, r2, r10\n\t" + "bic lr, r3, r11\n\t" + "eor r12, r12, r8\n\t" + "eor lr, lr, r9\n\t" + "str r12, [%[state], #184]\n\t" + "str lr, [%[state], #188]\n\t" + "bic r12, r4, r2\n\t" + "bic lr, r5, r3\n\t" + "eor r12, r12, r10\n\t" + "eor lr, lr, r11\n\t" + "str r12, [%[state], #192]\n\t" + "str lr, [%[state], #196]\n\t" + "bic r12, r6, r4\n\t" + "bic lr, r7, r5\n\t" + "eor r12, r12, r2\n\t" + "eor lr, lr, r3\n\t" + "str r12, [%[state], #160]\n\t" + "str lr, [%[state], #164]\n\t" + "ldr r2, [sp, #200]\n\t" + "subs r2, r2, #1\n\t" + "bne L_sha3_arm32_begin_%=\n\t" + "add sp, sp, #0xcc\n\t" + : [state] "+r" (state), [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c), [L_sha3_arm2_rt] "+r" (L_sha3_arm2_rt_c) + : + : "memory", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc" + ); +} + +#endif /* WOLFSSL_ARMASM_NO_NEON */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ +#endif /* WOLFSSL_ARMASM */ +#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */ +#endif /* WOLFSSL_ARMASM */ + +#endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S index f06ae9284d..48e7ec4264 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm.S @@ -30,7 +30,7 @@ #include #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #ifndef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_SHA512 #ifdef WOLFSSL_ARMASM_NO_NEON @@ -9366,7 +9366,7 @@ L_SHA512_transform_neon_len_start: .size Transform_Sha512_Len,.-Transform_Sha512_Len #endif /* !WOLFSSL_ARMASM_NO_NEON */ #endif /* WOLFSSL_SHA512 */ -#endif /* !__aarch64__ && !__thumb__ */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ #endif /* WOLFSSL_ARMASM */ #if defined(__linux__) && defined(__ELF__) diff --git a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c index 32c54d102b..cf29ab59ff 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha512-asm_c.c @@ -31,7 +31,7 @@ #include #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #include #ifdef HAVE_CONFIG_H #include @@ -41,7 +41,7 @@ #ifdef WOLFSSL_ARMASM_INLINE #ifdef WOLFSSL_ARMASM -#if !defined(__aarch64__) && defined(__arm__) +#if !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) #ifdef __IAR_SYSTEMS_ICC__ #define __asm__ asm @@ -9159,9 +9159,9 @@ void Transform_Sha512_Len(wc_Sha512* sha512_p, const byte* data_p, word32 len_p) #endif /* !WOLFSSL_ARMASM_NO_NEON */ #endif /* WOLFSSL_SHA512 */ -#endif /* !__aarch64__ && !__thumb__ */ +#endif /* !__aarch64__ && __arm__ && !__thumb__ */ #endif /* WOLFSSL_ARMASM */ -#endif /* !defined(__aarch64__) && defined(__arm__) */ +#endif /* !defined(__aarch64__) && defined(__arm__) && !defined(__thumb__) */ #endif /* WOLFSSL_ARMASM */ #endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-sha3-asm.S b/wolfcrypt/src/port/arm/thumb2-sha3-asm.S new file mode 100644 index 0000000000..bc3ebecf26 --- /dev/null +++ b/wolfcrypt/src/port/arm/thumb2-sha3-asm.S @@ -0,0 +1,1174 @@ +/* thumb2-sha3-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha3/sha3.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha3-asm.S + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__thumb__) +#ifndef WOLFSSL_ARMASM_INLINE + .thumb + .syntax unified + .text + .type L_sha3_thumb2_rt, %object + .size L_sha3_thumb2_rt, 192 + .align 8 +L_sha3_thumb2_rt: + .word 0x1 + .word 0x0 + .word 0x8082 + .word 0x0 + .word 0x808a + .word 0x80000000 + .word 0x80008000 + .word 0x80000000 + .word 0x808b + .word 0x0 + .word 0x80000001 + .word 0x0 + .word 0x80008081 + .word 0x80000000 + .word 0x8009 + .word 0x80000000 + .word 0x8a + .word 0x0 + .word 0x88 + .word 0x0 + .word 0x80008009 + .word 0x0 + .word 0x8000000a + .word 0x0 + .word 0x8000808b + .word 0x0 + .word 0x8b + .word 0x80000000 + .word 0x8089 + .word 0x80000000 + .word 0x8003 + .word 0x80000000 + .word 0x8002 + .word 0x80000000 + .word 0x80 + .word 0x80000000 + .word 0x800a + .word 0x0 + .word 0x8000000a + .word 0x80000000 + .word 0x80008081 + .word 0x80000000 + .word 0x8080 + .word 0x80000000 + .word 0x80000001 + .word 0x0 + .word 0x80008008 + .word 0x80000000 + .text + .align 4 + .globl BlockSha3 + .type BlockSha3, %function +BlockSha3: + PUSH {r4, r5, r6, r7, r8, r9, r10, r11, lr} + SUB sp, sp, #0xcc + ADR r1, L_sha3_thumb2_rt + MOV r2, #0xc +L_sha3_thumb2_begin: + STR r2, [sp, #200] + /* Round even */ + /* Calc b[4] */ + LDRD r4, r5, [r0, #32] + LDRD r6, r7, [r0, #72] + LDRD r8, r9, [r0, #112] + LDRD r10, r11, [r0, #152] + LDR r12, [r0, #192] + LDR lr, [r0, #196] + EOR r2, r4, r6 + EOR r3, r5, r7 + EOR r2, r2, r8 + EOR r3, r3, r9 + EOR r2, r2, r10 + EOR r3, r3, r11 + EOR r2, r2, r12 + EOR r3, r3, lr + STRD r2, r3, [sp, #32] + /* Calc b[1] */ + LDRD r4, r5, [r0, #8] + LDRD r6, r7, [r0, #48] + LDRD r8, r9, [r0, #88] + LDRD r10, r11, [r0, #128] + LDR r12, [r0, #168] + LDR lr, [r0, #172] + EOR r4, r4, r6 + EOR r5, r5, r7 + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r4, r4, r10 + EOR r5, r5, r11 + EOR r4, r4, r12 + EOR r5, r5, lr + STRD r4, r5, [sp, #8] + /* Calc t[0] */ + EOR r2, r2, r5, LSR #31 + EOR r3, r3, r4, LSR #31 + EOR r2, r2, r4, LSL #1 + EOR r3, r3, r5, LSL #1 + /* Calc b[0] and XOR t[0] into s[x*5+0] */ + LDRD r4, r5, [r0] + LDRD r6, r7, [r0, #40] + LDRD r8, r9, [r0, #80] + LDRD r10, r11, [r0, #120] + EOR r12, r4, r6 + EOR lr, r5, r7 + EOR r12, r12, r8 + EOR lr, lr, r9 + EOR r12, r12, r10 + EOR lr, lr, r11 + EOR r4, r4, r2 + EOR r5, r5, r3 + EOR r6, r6, r2 + EOR r7, r7, r3 + EOR r8, r8, r2 + EOR r9, r9, r3 + EOR r10, r10, r2 + EOR r11, r11, r3 + STRD r4, r5, [r0] + STRD r6, r7, [r0, #40] + STRD r8, r9, [r0, #80] + STRD r10, r11, [r0, #120] + LDRD r10, r11, [r0, #160] + EOR r12, r12, r10 + EOR lr, lr, r11 + EOR r10, r10, r2 + EOR r11, r11, r3 + STRD r10, r11, [r0, #160] + STR r12, [sp] + STR lr, [sp, #4] + /* Calc b[3] */ + LDRD r4, r5, [r0, #24] + LDRD r6, r7, [r0, #64] + LDRD r8, r9, [r0, #104] + LDRD r10, r11, [r0, #144] + LDR r12, [r0, #184] + LDR lr, [r0, #188] + EOR r4, r4, r6 + EOR r5, r5, r7 + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r4, r4, r10 + EOR r5, r5, r11 + EOR r4, r4, r12 + EOR r5, r5, lr + STRD r4, r5, [sp, #24] + /* Calc t[2] */ + LDRD r2, r3, [sp, #8] + EOR r2, r2, r5, LSR #31 + EOR r3, r3, r4, LSR #31 + EOR r2, r2, r4, LSL #1 + EOR r3, r3, r5, LSL #1 + /* Calc b[2] and XOR t[2] into s[x*5+2] */ + LDRD r4, r5, [r0, #16] + LDRD r6, r7, [r0, #56] + LDRD r8, r9, [r0, #96] + LDRD r10, r11, [r0, #136] + EOR r12, r4, r6 + EOR lr, r5, r7 + EOR r12, r12, r8 + EOR lr, lr, r9 + EOR r12, r12, r10 + EOR lr, lr, r11 + EOR r4, r4, r2 + EOR r5, r5, r3 + EOR r6, r6, r2 + EOR r7, r7, r3 + EOR r8, r8, r2 + EOR r9, r9, r3 + EOR r10, r10, r2 + EOR r11, r11, r3 + STRD r4, r5, [r0, #16] + STRD r6, r7, [r0, #56] + STRD r8, r9, [r0, #96] + STRD r10, r11, [r0, #136] + LDRD r10, r11, [r0, #176] + EOR r12, r12, r10 + EOR lr, lr, r11 + EOR r10, r10, r2 + EOR r11, r11, r3 + STRD r10, r11, [r0, #176] + STR r12, [sp, #16] + STR lr, [sp, #20] + /* Calc t[1] */ + LDRD r2, r3, [sp] + EOR r2, r2, lr, LSR #31 + EOR r3, r3, r12, LSR #31 + EOR r2, r2, r12, LSL #1 + EOR r3, r3, lr, LSL #1 + /* XOR t[1] into s[x*5+1] */ + LDRD r4, r5, [r0, #8] + LDRD r6, r7, [r0, #48] + LDRD r8, r9, [r0, #88] + LDRD r10, r11, [r0, #128] + LDR r12, [r0, #168] + LDR lr, [r0, #172] + EOR r4, r4, r2 + EOR r5, r5, r3 + EOR r6, r6, r2 + EOR r7, r7, r3 + EOR r8, r8, r2 + EOR r9, r9, r3 + EOR r10, r10, r2 + EOR r11, r11, r3 + EOR r12, r12, r2 + EOR lr, lr, r3 + STRD r4, r5, [r0, #8] + STRD r6, r7, [r0, #48] + STRD r8, r9, [r0, #88] + STRD r10, r11, [r0, #128] + STR r12, [r0, #168] + STR lr, [r0, #172] + /* Calc t[3] */ + LDRD r2, r3, [sp, #16] + LDRD r4, r5, [sp, #32] + EOR r2, r2, r5, LSR #31 + EOR r3, r3, r4, LSR #31 + EOR r2, r2, r4, LSL #1 + EOR r3, r3, r5, LSL #1 + /* XOR t[3] into s[x*5+3] */ + LDRD r4, r5, [r0, #24] + LDRD r6, r7, [r0, #64] + LDRD r8, r9, [r0, #104] + LDRD r10, r11, [r0, #144] + LDR r12, [r0, #184] + LDR lr, [r0, #188] + EOR r4, r4, r2 + EOR r5, r5, r3 + EOR r6, r6, r2 + EOR r7, r7, r3 + EOR r8, r8, r2 + EOR r9, r9, r3 + EOR r10, r10, r2 + EOR r11, r11, r3 + EOR r12, r12, r2 + EOR lr, lr, r3 + STRD r4, r5, [r0, #24] + STRD r6, r7, [r0, #64] + STRD r8, r9, [r0, #104] + STRD r10, r11, [r0, #144] + STR r12, [r0, #184] + STR lr, [r0, #188] + /* Calc t[4] */ + LDRD r2, r3, [sp, #24] + LDRD r4, r5, [sp] + EOR r2, r2, r5, LSR #31 + EOR r3, r3, r4, LSR #31 + EOR r2, r2, r4, LSL #1 + EOR r3, r3, r5, LSL #1 + /* XOR t[4] into s[x*5+4] */ + LDRD r4, r5, [r0, #32] + LDRD r6, r7, [r0, #72] + LDRD r8, r9, [r0, #112] + LDRD r10, r11, [r0, #152] + LDR r12, [r0, #192] + LDR lr, [r0, #196] + EOR r4, r4, r2 + EOR r5, r5, r3 + EOR r6, r6, r2 + EOR r7, r7, r3 + EOR r8, r8, r2 + EOR r9, r9, r3 + EOR r10, r10, r2 + EOR r11, r11, r3 + EOR r12, r12, r2 + EOR lr, lr, r3 + STRD r4, r5, [r0, #32] + STRD r6, r7, [r0, #72] + STRD r8, r9, [r0, #112] + STRD r10, r11, [r0, #152] + STR r12, [r0, #192] + STR lr, [r0, #196] + /* Row Mix */ + /* Row 0 */ + LDRD r2, r3, [r0] + LDRD r4, r5, [r0, #48] + LDRD r6, r7, [r0, #96] + LDRD r8, r9, [r0, #144] + LDRD r10, r11, [r0, #192] + /* s[1] <<< 44 */ + MOV lr, r4 + LSR r12, r5, #20 + LSR r4, r4, #20 + ORR r4, r4, r5, LSL #12 + ORR r5, r12, lr, LSL #12 + /* s[2] <<< 43 */ + MOV lr, r6 + LSR r12, r7, #21 + LSR r6, r6, #21 + ORR r6, r6, r7, LSL #11 + ORR r7, r12, lr, LSL #11 + /* s[3] <<< 21 */ + LSR r12, r9, #11 + LSR lr, r8, #11 + ORR r8, r12, r8, LSL #21 + ORR r9, lr, r9, LSL #21 + /* s[4] <<< 14 */ + LSR r12, r11, #18 + LSR lr, r10, #18 + ORR r10, r12, r10, LSL #14 + ORR r11, lr, r11, LSL #14 + BIC r12, r8, r6 + BIC lr, r9, r7 + EOR r12, r12, r4 + EOR lr, lr, r5 + STR r12, [sp, #8] + STR lr, [sp, #12] + BIC r12, r10, r8 + BIC lr, r11, r9 + EOR r12, r12, r6 + EOR lr, lr, r7 + STR r12, [sp, #16] + STR lr, [sp, #20] + BIC r12, r2, r10 + BIC lr, r3, r11 + EOR r12, r12, r8 + EOR lr, lr, r9 + STR r12, [sp, #24] + STR lr, [sp, #28] + BIC r12, r4, r2 + BIC lr, r5, r3 + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [sp, #32] + STR lr, [sp, #36] + /* Get constant */ + LDRD r10, r11, [r1] + ADD r1, r1, #0x8 + BIC r12, r6, r4 + BIC lr, r7, r5 + EOR r12, r12, r2 + EOR lr, lr, r3 + /* XOR in constant */ + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [sp] + STR lr, [sp, #4] + /* Row 1 */ + LDRD r2, r3, [r0, #24] + LDRD r4, r5, [r0, #72] + LDRD r6, r7, [r0, #80] + LDRD r8, r9, [r0, #128] + LDRD r10, r11, [r0, #176] + /* s[0] <<< 28 */ + LSR r12, r3, #4 + LSR lr, r2, #4 + ORR r2, r12, r2, LSL #28 + ORR r3, lr, r3, LSL #28 + /* s[1] <<< 20 */ + LSR r12, r5, #12 + LSR lr, r4, #12 + ORR r4, r12, r4, LSL #20 + ORR r5, lr, r5, LSL #20 + /* s[2] <<< 3 */ + LSR r12, r7, #29 + LSR lr, r6, #29 + ORR r6, r12, r6, LSL #3 + ORR r7, lr, r7, LSL #3 + /* s[3] <<< 45 */ + MOV lr, r8 + LSR r12, r9, #19 + LSR r8, r8, #19 + ORR r8, r8, r9, LSL #13 + ORR r9, r12, lr, LSL #13 + /* s[4] <<< 61 */ + MOV lr, r10 + LSR r12, r11, #3 + LSR r10, r10, #3 + ORR r10, r10, r11, LSL #29 + ORR r11, r12, lr, LSL #29 + BIC r12, r8, r6 + BIC lr, r9, r7 + EOR r12, r12, r4 + EOR lr, lr, r5 + STR r12, [sp, #48] + STR lr, [sp, #52] + BIC r12, r10, r8 + BIC lr, r11, r9 + EOR r12, r12, r6 + EOR lr, lr, r7 + STR r12, [sp, #56] + STR lr, [sp, #60] + BIC r12, r2, r10 + BIC lr, r3, r11 + EOR r12, r12, r8 + EOR lr, lr, r9 + STR r12, [sp, #64] + STR lr, [sp, #68] + BIC r12, r4, r2 + BIC lr, r5, r3 + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [sp, #72] + STR lr, [sp, #76] + BIC r12, r6, r4 + BIC lr, r7, r5 + EOR r12, r12, r2 + EOR lr, lr, r3 + STR r12, [sp, #40] + STR lr, [sp, #44] + /* Row 2 */ + LDRD r2, r3, [r0, #8] + LDRD r4, r5, [r0, #56] + LDRD r6, r7, [r0, #104] + LDRD r8, r9, [r0, #152] + LDRD r10, r11, [r0, #160] + /* s[0] <<< 1 */ + LSR r12, r3, #31 + LSR lr, r2, #31 + ORR r2, r12, r2, LSL #1 + ORR r3, lr, r3, LSL #1 + /* s[1] <<< 6 */ + LSR r12, r5, #26 + LSR lr, r4, #26 + ORR r4, r12, r4, LSL #6 + ORR r5, lr, r5, LSL #6 + /* s[2] <<< 25 */ + LSR r12, r7, #7 + LSR lr, r6, #7 + ORR r6, r12, r6, LSL #25 + ORR r7, lr, r7, LSL #25 + /* s[3] <<< 8 */ + LSR r12, r9, #24 + LSR lr, r8, #24 + ORR r8, r12, r8, LSL #8 + ORR r9, lr, r9, LSL #8 + /* s[4] <<< 18 */ + LSR r12, r11, #14 + LSR lr, r10, #14 + ORR r10, r12, r10, LSL #18 + ORR r11, lr, r11, LSL #18 + BIC r12, r8, r6 + BIC lr, r9, r7 + EOR r12, r12, r4 + EOR lr, lr, r5 + STR r12, [sp, #88] + STR lr, [sp, #92] + BIC r12, r10, r8 + BIC lr, r11, r9 + EOR r12, r12, r6 + EOR lr, lr, r7 + STR r12, [sp, #96] + STR lr, [sp, #100] + BIC r12, r2, r10 + BIC lr, r3, r11 + EOR r12, r12, r8 + EOR lr, lr, r9 + STR r12, [sp, #104] + STR lr, [sp, #108] + BIC r12, r4, r2 + BIC lr, r5, r3 + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [sp, #112] + STR lr, [sp, #116] + BIC r12, r6, r4 + BIC lr, r7, r5 + EOR r12, r12, r2 + EOR lr, lr, r3 + STR r12, [sp, #80] + STR lr, [sp, #84] + /* Row 3 */ + LDRD r2, r3, [r0, #32] + LDRD r4, r5, [r0, #40] + LDRD r6, r7, [r0, #88] + LDRD r8, r9, [r0, #136] + LDRD r10, r11, [r0, #184] + /* s[0] <<< 27 */ + LSR r12, r3, #5 + LSR lr, r2, #5 + ORR r2, r12, r2, LSL #27 + ORR r3, lr, r3, LSL #27 + /* s[1] <<< 36 */ + MOV lr, r4 + LSR r12, r5, #28 + LSR r4, r4, #28 + ORR r4, r4, r5, LSL #4 + ORR r5, r12, lr, LSL #4 + /* s[2] <<< 10 */ + LSR r12, r7, #22 + LSR lr, r6, #22 + ORR r6, r12, r6, LSL #10 + ORR r7, lr, r7, LSL #10 + /* s[3] <<< 15 */ + LSR r12, r9, #17 + LSR lr, r8, #17 + ORR r8, r12, r8, LSL #15 + ORR r9, lr, r9, LSL #15 + /* s[4] <<< 56 */ + MOV lr, r10 + LSR r12, r11, #8 + LSR r10, r10, #8 + ORR r10, r10, r11, LSL #24 + ORR r11, r12, lr, LSL #24 + BIC r12, r8, r6 + BIC lr, r9, r7 + EOR r12, r12, r4 + EOR lr, lr, r5 + STR r12, [sp, #128] + STR lr, [sp, #132] + BIC r12, r10, r8 + BIC lr, r11, r9 + EOR r12, r12, r6 + EOR lr, lr, r7 + STR r12, [sp, #136] + STR lr, [sp, #140] + BIC r12, r2, r10 + BIC lr, r3, r11 + EOR r12, r12, r8 + EOR lr, lr, r9 + STR r12, [sp, #144] + STR lr, [sp, #148] + BIC r12, r4, r2 + BIC lr, r5, r3 + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [sp, #152] + STR lr, [sp, #156] + BIC r12, r6, r4 + BIC lr, r7, r5 + EOR r12, r12, r2 + EOR lr, lr, r3 + STR r12, [sp, #120] + STR lr, [sp, #124] + /* Row 4 */ + LDRD r2, r3, [r0, #16] + LDRD r4, r5, [r0, #64] + LDRD r6, r7, [r0, #112] + LDRD r8, r9, [r0, #120] + LDRD r10, r11, [r0, #168] + /* s[0] <<< 62 */ + MOV lr, r2 + LSR r12, r3, #2 + LSR r2, r2, #2 + ORR r2, r2, r3, LSL #30 + ORR r3, r12, lr, LSL #30 + /* s[1] <<< 55 */ + MOV lr, r4 + LSR r12, r5, #9 + LSR r4, r4, #9 + ORR r4, r4, r5, LSL #23 + ORR r5, r12, lr, LSL #23 + /* s[2] <<< 39 */ + MOV lr, r6 + LSR r12, r7, #25 + LSR r6, r6, #25 + ORR r6, r6, r7, LSL #7 + ORR r7, r12, lr, LSL #7 + /* s[3] <<< 41 */ + MOV lr, r8 + LSR r12, r9, #23 + LSR r8, r8, #23 + ORR r8, r8, r9, LSL #9 + ORR r9, r12, lr, LSL #9 + /* s[4] <<< 2 */ + LSR r12, r11, #30 + LSR lr, r10, #30 + ORR r10, r12, r10, LSL #2 + ORR r11, lr, r11, LSL #2 + BIC r12, r8, r6 + BIC lr, r9, r7 + EOR r12, r12, r4 + EOR lr, lr, r5 + STR r12, [sp, #168] + STR lr, [sp, #172] + BIC r12, r10, r8 + BIC lr, r11, r9 + EOR r12, r12, r6 + EOR lr, lr, r7 + STR r12, [sp, #176] + STR lr, [sp, #180] + BIC r12, r2, r10 + BIC lr, r3, r11 + EOR r12, r12, r8 + EOR lr, lr, r9 + STR r12, [sp, #184] + STR lr, [sp, #188] + BIC r12, r4, r2 + BIC lr, r5, r3 + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [sp, #192] + STR lr, [sp, #196] + BIC r12, r6, r4 + BIC lr, r7, r5 + EOR r12, r12, r2 + EOR lr, lr, r3 + STR r12, [sp, #160] + STR lr, [sp, #164] + /* Round odd */ + /* Calc b[4] */ + LDRD r4, r5, [sp, #32] + LDRD r6, r7, [sp, #72] + LDRD r8, r9, [sp, #112] + LDRD r10, r11, [sp, #152] + LDR r12, [sp, #192] + LDR lr, [sp, #196] + EOR r2, r4, r6 + EOR r3, r5, r7 + EOR r2, r2, r8 + EOR r3, r3, r9 + EOR r2, r2, r10 + EOR r3, r3, r11 + EOR r2, r2, r12 + EOR r3, r3, lr + STRD r2, r3, [r0, #32] + /* Calc b[1] */ + LDRD r4, r5, [sp, #8] + LDRD r6, r7, [sp, #48] + LDRD r8, r9, [sp, #88] + LDRD r10, r11, [sp, #128] + LDR r12, [sp, #168] + LDR lr, [sp, #172] + EOR r4, r4, r6 + EOR r5, r5, r7 + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r4, r4, r10 + EOR r5, r5, r11 + EOR r4, r4, r12 + EOR r5, r5, lr + STRD r4, r5, [r0, #8] + /* Calc t[0] */ + EOR r2, r2, r5, LSR #31 + EOR r3, r3, r4, LSR #31 + EOR r2, r2, r4, LSL #1 + EOR r3, r3, r5, LSL #1 + /* Calc b[0] and XOR t[0] into s[x*5+0] */ + LDRD r4, r5, [sp] + LDRD r6, r7, [sp, #40] + LDRD r8, r9, [sp, #80] + LDRD r10, r11, [sp, #120] + EOR r12, r4, r6 + EOR lr, r5, r7 + EOR r12, r12, r8 + EOR lr, lr, r9 + EOR r12, r12, r10 + EOR lr, lr, r11 + EOR r4, r4, r2 + EOR r5, r5, r3 + EOR r6, r6, r2 + EOR r7, r7, r3 + EOR r8, r8, r2 + EOR r9, r9, r3 + EOR r10, r10, r2 + EOR r11, r11, r3 + STRD r4, r5, [sp] + STRD r6, r7, [sp, #40] + STRD r8, r9, [sp, #80] + STRD r10, r11, [sp, #120] + LDRD r10, r11, [sp, #160] + EOR r12, r12, r10 + EOR lr, lr, r11 + EOR r10, r10, r2 + EOR r11, r11, r3 + STRD r10, r11, [sp, #160] + STR r12, [r0] + STR lr, [r0, #4] + /* Calc b[3] */ + LDRD r4, r5, [sp, #24] + LDRD r6, r7, [sp, #64] + LDRD r8, r9, [sp, #104] + LDRD r10, r11, [sp, #144] + LDR r12, [sp, #184] + LDR lr, [sp, #188] + EOR r4, r4, r6 + EOR r5, r5, r7 + EOR r4, r4, r8 + EOR r5, r5, r9 + EOR r4, r4, r10 + EOR r5, r5, r11 + EOR r4, r4, r12 + EOR r5, r5, lr + STRD r4, r5, [r0, #24] + /* Calc t[2] */ + LDRD r2, r3, [r0, #8] + EOR r2, r2, r5, LSR #31 + EOR r3, r3, r4, LSR #31 + EOR r2, r2, r4, LSL #1 + EOR r3, r3, r5, LSL #1 + /* Calc b[2] and XOR t[2] into s[x*5+2] */ + LDRD r4, r5, [sp, #16] + LDRD r6, r7, [sp, #56] + LDRD r8, r9, [sp, #96] + LDRD r10, r11, [sp, #136] + EOR r12, r4, r6 + EOR lr, r5, r7 + EOR r12, r12, r8 + EOR lr, lr, r9 + EOR r12, r12, r10 + EOR lr, lr, r11 + EOR r4, r4, r2 + EOR r5, r5, r3 + EOR r6, r6, r2 + EOR r7, r7, r3 + EOR r8, r8, r2 + EOR r9, r9, r3 + EOR r10, r10, r2 + EOR r11, r11, r3 + STRD r4, r5, [sp, #16] + STRD r6, r7, [sp, #56] + STRD r8, r9, [sp, #96] + STRD r10, r11, [sp, #136] + LDRD r10, r11, [sp, #176] + EOR r12, r12, r10 + EOR lr, lr, r11 + EOR r10, r10, r2 + EOR r11, r11, r3 + STRD r10, r11, [sp, #176] + STR r12, [r0, #16] + STR lr, [r0, #20] + /* Calc t[1] */ + LDRD r2, r3, [r0] + EOR r2, r2, lr, LSR #31 + EOR r3, r3, r12, LSR #31 + EOR r2, r2, r12, LSL #1 + EOR r3, r3, lr, LSL #1 + /* XOR t[1] into s[x*5+1] */ + LDRD r4, r5, [sp, #8] + LDRD r6, r7, [sp, #48] + LDRD r8, r9, [sp, #88] + LDRD r10, r11, [sp, #128] + LDR r12, [sp, #168] + LDR lr, [sp, #172] + EOR r4, r4, r2 + EOR r5, r5, r3 + EOR r6, r6, r2 + EOR r7, r7, r3 + EOR r8, r8, r2 + EOR r9, r9, r3 + EOR r10, r10, r2 + EOR r11, r11, r3 + EOR r12, r12, r2 + EOR lr, lr, r3 + STRD r4, r5, [sp, #8] + STRD r6, r7, [sp, #48] + STRD r8, r9, [sp, #88] + STRD r10, r11, [sp, #128] + STR r12, [sp, #168] + STR lr, [sp, #172] + /* Calc t[3] */ + LDRD r2, r3, [r0, #16] + LDRD r4, r5, [r0, #32] + EOR r2, r2, r5, LSR #31 + EOR r3, r3, r4, LSR #31 + EOR r2, r2, r4, LSL #1 + EOR r3, r3, r5, LSL #1 + /* XOR t[3] into s[x*5+3] */ + LDRD r4, r5, [sp, #24] + LDRD r6, r7, [sp, #64] + LDRD r8, r9, [sp, #104] + LDRD r10, r11, [sp, #144] + LDR r12, [sp, #184] + LDR lr, [sp, #188] + EOR r4, r4, r2 + EOR r5, r5, r3 + EOR r6, r6, r2 + EOR r7, r7, r3 + EOR r8, r8, r2 + EOR r9, r9, r3 + EOR r10, r10, r2 + EOR r11, r11, r3 + EOR r12, r12, r2 + EOR lr, lr, r3 + STRD r4, r5, [sp, #24] + STRD r6, r7, [sp, #64] + STRD r8, r9, [sp, #104] + STRD r10, r11, [sp, #144] + STR r12, [sp, #184] + STR lr, [sp, #188] + /* Calc t[4] */ + LDRD r2, r3, [r0, #24] + LDRD r4, r5, [r0] + EOR r2, r2, r5, LSR #31 + EOR r3, r3, r4, LSR #31 + EOR r2, r2, r4, LSL #1 + EOR r3, r3, r5, LSL #1 + /* XOR t[4] into s[x*5+4] */ + LDRD r4, r5, [sp, #32] + LDRD r6, r7, [sp, #72] + LDRD r8, r9, [sp, #112] + LDRD r10, r11, [sp, #152] + LDR r12, [sp, #192] + LDR lr, [sp, #196] + EOR r4, r4, r2 + EOR r5, r5, r3 + EOR r6, r6, r2 + EOR r7, r7, r3 + EOR r8, r8, r2 + EOR r9, r9, r3 + EOR r10, r10, r2 + EOR r11, r11, r3 + EOR r12, r12, r2 + EOR lr, lr, r3 + STRD r4, r5, [sp, #32] + STRD r6, r7, [sp, #72] + STRD r8, r9, [sp, #112] + STRD r10, r11, [sp, #152] + STR r12, [sp, #192] + STR lr, [sp, #196] + /* Row Mix */ + /* Row 0 */ + LDRD r2, r3, [sp] + LDRD r4, r5, [sp, #48] + LDRD r6, r7, [sp, #96] + LDRD r8, r9, [sp, #144] + LDRD r10, r11, [sp, #192] + /* s[1] <<< 44 */ + MOV lr, r4 + LSR r12, r5, #20 + LSR r4, r4, #20 + ORR r4, r4, r5, LSL #12 + ORR r5, r12, lr, LSL #12 + /* s[2] <<< 43 */ + MOV lr, r6 + LSR r12, r7, #21 + LSR r6, r6, #21 + ORR r6, r6, r7, LSL #11 + ORR r7, r12, lr, LSL #11 + /* s[3] <<< 21 */ + LSR r12, r9, #11 + LSR lr, r8, #11 + ORR r8, r12, r8, LSL #21 + ORR r9, lr, r9, LSL #21 + /* s[4] <<< 14 */ + LSR r12, r11, #18 + LSR lr, r10, #18 + ORR r10, r12, r10, LSL #14 + ORR r11, lr, r11, LSL #14 + BIC r12, r8, r6 + BIC lr, r9, r7 + EOR r12, r12, r4 + EOR lr, lr, r5 + STR r12, [r0, #8] + STR lr, [r0, #12] + BIC r12, r10, r8 + BIC lr, r11, r9 + EOR r12, r12, r6 + EOR lr, lr, r7 + STR r12, [r0, #16] + STR lr, [r0, #20] + BIC r12, r2, r10 + BIC lr, r3, r11 + EOR r12, r12, r8 + EOR lr, lr, r9 + STR r12, [r0, #24] + STR lr, [r0, #28] + BIC r12, r4, r2 + BIC lr, r5, r3 + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [r0, #32] + STR lr, [r0, #36] + /* Get constant */ + LDRD r10, r11, [r1] + ADD r1, r1, #0x8 + BIC r12, r6, r4 + BIC lr, r7, r5 + EOR r12, r12, r2 + EOR lr, lr, r3 + /* XOR in constant */ + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [r0] + STR lr, [r0, #4] + /* Row 1 */ + LDRD r2, r3, [sp, #24] + LDRD r4, r5, [sp, #72] + LDRD r6, r7, [sp, #80] + LDRD r8, r9, [sp, #128] + LDRD r10, r11, [sp, #176] + /* s[0] <<< 28 */ + LSR r12, r3, #4 + LSR lr, r2, #4 + ORR r2, r12, r2, LSL #28 + ORR r3, lr, r3, LSL #28 + /* s[1] <<< 20 */ + LSR r12, r5, #12 + LSR lr, r4, #12 + ORR r4, r12, r4, LSL #20 + ORR r5, lr, r5, LSL #20 + /* s[2] <<< 3 */ + LSR r12, r7, #29 + LSR lr, r6, #29 + ORR r6, r12, r6, LSL #3 + ORR r7, lr, r7, LSL #3 + /* s[3] <<< 45 */ + MOV lr, r8 + LSR r12, r9, #19 + LSR r8, r8, #19 + ORR r8, r8, r9, LSL #13 + ORR r9, r12, lr, LSL #13 + /* s[4] <<< 61 */ + MOV lr, r10 + LSR r12, r11, #3 + LSR r10, r10, #3 + ORR r10, r10, r11, LSL #29 + ORR r11, r12, lr, LSL #29 + BIC r12, r8, r6 + BIC lr, r9, r7 + EOR r12, r12, r4 + EOR lr, lr, r5 + STR r12, [r0, #48] + STR lr, [r0, #52] + BIC r12, r10, r8 + BIC lr, r11, r9 + EOR r12, r12, r6 + EOR lr, lr, r7 + STR r12, [r0, #56] + STR lr, [r0, #60] + BIC r12, r2, r10 + BIC lr, r3, r11 + EOR r12, r12, r8 + EOR lr, lr, r9 + STR r12, [r0, #64] + STR lr, [r0, #68] + BIC r12, r4, r2 + BIC lr, r5, r3 + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [r0, #72] + STR lr, [r0, #76] + BIC r12, r6, r4 + BIC lr, r7, r5 + EOR r12, r12, r2 + EOR lr, lr, r3 + STR r12, [r0, #40] + STR lr, [r0, #44] + /* Row 2 */ + LDRD r2, r3, [sp, #8] + LDRD r4, r5, [sp, #56] + LDRD r6, r7, [sp, #104] + LDRD r8, r9, [sp, #152] + LDRD r10, r11, [sp, #160] + /* s[0] <<< 1 */ + LSR r12, r3, #31 + LSR lr, r2, #31 + ORR r2, r12, r2, LSL #1 + ORR r3, lr, r3, LSL #1 + /* s[1] <<< 6 */ + LSR r12, r5, #26 + LSR lr, r4, #26 + ORR r4, r12, r4, LSL #6 + ORR r5, lr, r5, LSL #6 + /* s[2] <<< 25 */ + LSR r12, r7, #7 + LSR lr, r6, #7 + ORR r6, r12, r6, LSL #25 + ORR r7, lr, r7, LSL #25 + /* s[3] <<< 8 */ + LSR r12, r9, #24 + LSR lr, r8, #24 + ORR r8, r12, r8, LSL #8 + ORR r9, lr, r9, LSL #8 + /* s[4] <<< 18 */ + LSR r12, r11, #14 + LSR lr, r10, #14 + ORR r10, r12, r10, LSL #18 + ORR r11, lr, r11, LSL #18 + BIC r12, r8, r6 + BIC lr, r9, r7 + EOR r12, r12, r4 + EOR lr, lr, r5 + STR r12, [r0, #88] + STR lr, [r0, #92] + BIC r12, r10, r8 + BIC lr, r11, r9 + EOR r12, r12, r6 + EOR lr, lr, r7 + STR r12, [r0, #96] + STR lr, [r0, #100] + BIC r12, r2, r10 + BIC lr, r3, r11 + EOR r12, r12, r8 + EOR lr, lr, r9 + STR r12, [r0, #104] + STR lr, [r0, #108] + BIC r12, r4, r2 + BIC lr, r5, r3 + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [r0, #112] + STR lr, [r0, #116] + BIC r12, r6, r4 + BIC lr, r7, r5 + EOR r12, r12, r2 + EOR lr, lr, r3 + STR r12, [r0, #80] + STR lr, [r0, #84] + /* Row 3 */ + LDRD r2, r3, [sp, #32] + LDRD r4, r5, [sp, #40] + LDRD r6, r7, [sp, #88] + LDRD r8, r9, [sp, #136] + LDRD r10, r11, [sp, #184] + /* s[0] <<< 27 */ + LSR r12, r3, #5 + LSR lr, r2, #5 + ORR r2, r12, r2, LSL #27 + ORR r3, lr, r3, LSL #27 + /* s[1] <<< 36 */ + MOV lr, r4 + LSR r12, r5, #28 + LSR r4, r4, #28 + ORR r4, r4, r5, LSL #4 + ORR r5, r12, lr, LSL #4 + /* s[2] <<< 10 */ + LSR r12, r7, #22 + LSR lr, r6, #22 + ORR r6, r12, r6, LSL #10 + ORR r7, lr, r7, LSL #10 + /* s[3] <<< 15 */ + LSR r12, r9, #17 + LSR lr, r8, #17 + ORR r8, r12, r8, LSL #15 + ORR r9, lr, r9, LSL #15 + /* s[4] <<< 56 */ + MOV lr, r10 + LSR r12, r11, #8 + LSR r10, r10, #8 + ORR r10, r10, r11, LSL #24 + ORR r11, r12, lr, LSL #24 + BIC r12, r8, r6 + BIC lr, r9, r7 + EOR r12, r12, r4 + EOR lr, lr, r5 + STR r12, [r0, #128] + STR lr, [r0, #132] + BIC r12, r10, r8 + BIC lr, r11, r9 + EOR r12, r12, r6 + EOR lr, lr, r7 + STR r12, [r0, #136] + STR lr, [r0, #140] + BIC r12, r2, r10 + BIC lr, r3, r11 + EOR r12, r12, r8 + EOR lr, lr, r9 + STR r12, [r0, #144] + STR lr, [r0, #148] + BIC r12, r4, r2 + BIC lr, r5, r3 + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [r0, #152] + STR lr, [r0, #156] + BIC r12, r6, r4 + BIC lr, r7, r5 + EOR r12, r12, r2 + EOR lr, lr, r3 + STR r12, [r0, #120] + STR lr, [r0, #124] + /* Row 4 */ + LDRD r2, r3, [sp, #16] + LDRD r4, r5, [sp, #64] + LDRD r6, r7, [sp, #112] + LDRD r8, r9, [sp, #120] + LDRD r10, r11, [sp, #168] + /* s[0] <<< 62 */ + MOV lr, r2 + LSR r12, r3, #2 + LSR r2, r2, #2 + ORR r2, r2, r3, LSL #30 + ORR r3, r12, lr, LSL #30 + /* s[1] <<< 55 */ + MOV lr, r4 + LSR r12, r5, #9 + LSR r4, r4, #9 + ORR r4, r4, r5, LSL #23 + ORR r5, r12, lr, LSL #23 + /* s[2] <<< 39 */ + MOV lr, r6 + LSR r12, r7, #25 + LSR r6, r6, #25 + ORR r6, r6, r7, LSL #7 + ORR r7, r12, lr, LSL #7 + /* s[3] <<< 41 */ + MOV lr, r8 + LSR r12, r9, #23 + LSR r8, r8, #23 + ORR r8, r8, r9, LSL #9 + ORR r9, r12, lr, LSL #9 + /* s[4] <<< 2 */ + LSR r12, r11, #30 + LSR lr, r10, #30 + ORR r10, r12, r10, LSL #2 + ORR r11, lr, r11, LSL #2 + BIC r12, r8, r6 + BIC lr, r9, r7 + EOR r12, r12, r4 + EOR lr, lr, r5 + STR r12, [r0, #168] + STR lr, [r0, #172] + BIC r12, r10, r8 + BIC lr, r11, r9 + EOR r12, r12, r6 + EOR lr, lr, r7 + STR r12, [r0, #176] + STR lr, [r0, #180] + BIC r12, r2, r10 + BIC lr, r3, r11 + EOR r12, r12, r8 + EOR lr, lr, r9 + STR r12, [r0, #184] + STR lr, [r0, #188] + BIC r12, r4, r2 + BIC lr, r5, r3 + EOR r12, r12, r10 + EOR lr, lr, r11 + STR r12, [r0, #192] + STR lr, [r0, #196] + BIC r12, r6, r4 + BIC lr, r7, r5 + EOR r12, r12, r2 + EOR lr, lr, r3 + STR r12, [r0, #160] + STR lr, [r0, #164] + LDR r2, [sp, #200] + SUBS r2, r2, #0x1 +#ifdef __GNUC__ + BNE L_sha3_thumb2_begin +#else + BNE.W L_sha3_thumb2_begin +#endif + ADD sp, sp, #0xcc + POP {r4, r5, r6, r7, r8, r9, r10, r11, pc} + /* Cycle Count = 1505 */ + .size BlockSha3,.-BlockSha3 +#endif /* !__aarch64__ && __thumb__ */ +#endif /* WOLFSSL_ARMASM */ + +#if defined(__linux__) && defined(__ELF__) +.section .note.GNU-stack,"",%progbits +#endif +#endif /* !WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c b/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c new file mode 100644 index 0000000000..174f8aa334 --- /dev/null +++ b/wolfcrypt/src/port/arm/thumb2-sha3-asm_c.c @@ -0,0 +1,1170 @@ +/* thumb2-sha3-asm + * + * Copyright (C) 2006-2023 wolfSSL Inc. + * + * This file is part of wolfSSL. + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Generated using (from wolfssl): + * cd ../scripts + * ruby ./sha3/sha3.rb thumb2 ../wolfssl/wolfcrypt/src/port/arm/thumb2-sha3-asm.c + */ + +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__thumb__) +#include +#ifdef HAVE_CONFIG_H + #include +#endif /* HAVE_CONFIG_H */ +#include +#ifdef WOLFSSL_ARMASM_INLINE + +#ifdef WOLFSSL_ARMASM +#if !defined(__aarch64__) && defined(__thumb__) + +#ifdef __IAR_SYSTEMS_ICC__ +#define __asm__ asm +#define __volatile__ volatile +#define WOLFSSL_NO_VAR_ASSIGN_REG +#endif /* __IAR_SYSTEMS_ICC__ */ +#ifdef __KEIL__ +#define __asm__ __asm +#define __volatile__ volatile +#endif /* __KEIL__ */ +static const uint64_t L_sha3_thumb2_rt[] = { + 0x0000000000000001UL, 0x0000000000008082UL, + 0x800000000000808aUL, 0x8000000080008000UL, + 0x000000000000808bUL, 0x0000000080000001UL, + 0x8000000080008081UL, 0x8000000000008009UL, + 0x000000000000008aUL, 0x0000000000000088UL, + 0x0000000080008009UL, 0x000000008000000aUL, + 0x000000008000808bUL, 0x800000000000008bUL, + 0x8000000000008089UL, 0x8000000000008003UL, + 0x8000000000008002UL, 0x8000000000000080UL, + 0x000000000000800aUL, 0x800000008000000aUL, + 0x8000000080008081UL, 0x8000000000008080UL, + 0x0000000080000001UL, 0x8000000080008008UL, +}; + +#include + +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG +void BlockSha3(word64* state_p) +#else +void BlockSha3(word64* state) +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ +{ +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + register word64* state __asm__ ("r0") = (word64*)state_p; + register uint64_t* L_sha3_thumb2_rt_c __asm__ ("r1") = (uint64_t*)&L_sha3_thumb2_rt; +#endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ + + __asm__ __volatile__ ( + "SUB sp, sp, #0xcc\n\t" + "MOV r1, %[L_sha3_thumb2_rt]\n\t" + "MOV r2, #0xc\n\t" + "\n" + "L_sha3_thumb2_begin%=:\n\t" + "STR r2, [sp, #200]\n\t" + /* Round even */ + /* Calc b[4] */ + "LDRD r4, r5, [%[state], #32]\n\t" + "LDRD r6, r7, [%[state], #72]\n\t" + "LDRD r8, r9, [%[state], #112]\n\t" + "LDRD r10, r11, [%[state], #152]\n\t" + "LDR r12, [%[state], #192]\n\t" + "LDR lr, [%[state], #196]\n\t" + "EOR r2, r4, r6\n\t" + "EOR r3, r5, r7\n\t" + "EOR r2, r2, r8\n\t" + "EOR r3, r3, r9\n\t" + "EOR r2, r2, r10\n\t" + "EOR r3, r3, r11\n\t" + "EOR r2, r2, r12\n\t" + "EOR r3, r3, lr\n\t" + "STRD r2, r3, [sp, #32]\n\t" + /* Calc b[1] */ + "LDRD r4, r5, [%[state], #8]\n\t" + "LDRD r6, r7, [%[state], #48]\n\t" + "LDRD r8, r9, [%[state], #88]\n\t" + "LDRD r10, r11, [%[state], #128]\n\t" + "LDR r12, [%[state], #168]\n\t" + "LDR lr, [%[state], #172]\n\t" + "EOR r4, r4, r6\n\t" + "EOR r5, r5, r7\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r4, r4, r10\n\t" + "EOR r5, r5, r11\n\t" + "EOR r4, r4, r12\n\t" + "EOR r5, r5, lr\n\t" + "STRD r4, r5, [sp, #8]\n\t" + /* Calc t[0] */ + "EOR r2, r2, r5, LSR #31\n\t" + "EOR r3, r3, r4, LSR #31\n\t" + "EOR r2, r2, r4, LSL #1\n\t" + "EOR r3, r3, r5, LSL #1\n\t" + /* Calc b[0] and XOR t[0] into s[x*5+0] */ + "LDRD r4, r5, [%[state]]\n\t" + "LDRD r6, r7, [%[state], #40]\n\t" + "LDRD r8, r9, [%[state], #80]\n\t" + "LDRD r10, r11, [%[state], #120]\n\t" + "EOR r12, r4, r6\n\t" + "EOR lr, r5, r7\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "EOR r4, r4, r2\n\t" + "EOR r5, r5, r3\n\t" + "EOR r6, r6, r2\n\t" + "EOR r7, r7, r3\n\t" + "EOR r8, r8, r2\n\t" + "EOR r9, r9, r3\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "STRD r4, r5, [%[state]]\n\t" + "STRD r6, r7, [%[state], #40]\n\t" + "STRD r8, r9, [%[state], #80]\n\t" + "STRD r10, r11, [%[state], #120]\n\t" + "LDRD r10, r11, [%[state], #160]\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "STRD r10, r11, [%[state], #160]\n\t" + "STR r12, [sp]\n\t" + "STR lr, [sp, #4]\n\t" + /* Calc b[3] */ + "LDRD r4, r5, [%[state], #24]\n\t" + "LDRD r6, r7, [%[state], #64]\n\t" + "LDRD r8, r9, [%[state], #104]\n\t" + "LDRD r10, r11, [%[state], #144]\n\t" + "LDR r12, [%[state], #184]\n\t" + "LDR lr, [%[state], #188]\n\t" + "EOR r4, r4, r6\n\t" + "EOR r5, r5, r7\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r4, r4, r10\n\t" + "EOR r5, r5, r11\n\t" + "EOR r4, r4, r12\n\t" + "EOR r5, r5, lr\n\t" + "STRD r4, r5, [sp, #24]\n\t" + /* Calc t[2] */ + "LDRD r2, r3, [sp, #8]\n\t" + "EOR r2, r2, r5, LSR #31\n\t" + "EOR r3, r3, r4, LSR #31\n\t" + "EOR r2, r2, r4, LSL #1\n\t" + "EOR r3, r3, r5, LSL #1\n\t" + /* Calc b[2] and XOR t[2] into s[x*5+2] */ + "LDRD r4, r5, [%[state], #16]\n\t" + "LDRD r6, r7, [%[state], #56]\n\t" + "LDRD r8, r9, [%[state], #96]\n\t" + "LDRD r10, r11, [%[state], #136]\n\t" + "EOR r12, r4, r6\n\t" + "EOR lr, r5, r7\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "EOR r4, r4, r2\n\t" + "EOR r5, r5, r3\n\t" + "EOR r6, r6, r2\n\t" + "EOR r7, r7, r3\n\t" + "EOR r8, r8, r2\n\t" + "EOR r9, r9, r3\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "STRD r4, r5, [%[state], #16]\n\t" + "STRD r6, r7, [%[state], #56]\n\t" + "STRD r8, r9, [%[state], #96]\n\t" + "STRD r10, r11, [%[state], #136]\n\t" + "LDRD r10, r11, [%[state], #176]\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "STRD r10, r11, [%[state], #176]\n\t" + "STR r12, [sp, #16]\n\t" + "STR lr, [sp, #20]\n\t" + /* Calc t[1] */ + "LDRD r2, r3, [sp]\n\t" + "EOR r2, r2, lr, LSR #31\n\t" + "EOR r3, r3, r12, LSR #31\n\t" + "EOR r2, r2, r12, LSL #1\n\t" + "EOR r3, r3, lr, LSL #1\n\t" + /* XOR t[1] into s[x*5+1] */ + "LDRD r4, r5, [%[state], #8]\n\t" + "LDRD r6, r7, [%[state], #48]\n\t" + "LDRD r8, r9, [%[state], #88]\n\t" + "LDRD r10, r11, [%[state], #128]\n\t" + "LDR r12, [%[state], #168]\n\t" + "LDR lr, [%[state], #172]\n\t" + "EOR r4, r4, r2\n\t" + "EOR r5, r5, r3\n\t" + "EOR r6, r6, r2\n\t" + "EOR r7, r7, r3\n\t" + "EOR r8, r8, r2\n\t" + "EOR r9, r9, r3\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STRD r4, r5, [%[state], #8]\n\t" + "STRD r6, r7, [%[state], #48]\n\t" + "STRD r8, r9, [%[state], #88]\n\t" + "STRD r10, r11, [%[state], #128]\n\t" + "STR r12, [%[state], #168]\n\t" + "STR lr, [%[state], #172]\n\t" + /* Calc t[3] */ + "LDRD r2, r3, [sp, #16]\n\t" + "LDRD r4, r5, [sp, #32]\n\t" + "EOR r2, r2, r5, LSR #31\n\t" + "EOR r3, r3, r4, LSR #31\n\t" + "EOR r2, r2, r4, LSL #1\n\t" + "EOR r3, r3, r5, LSL #1\n\t" + /* XOR t[3] into s[x*5+3] */ + "LDRD r4, r5, [%[state], #24]\n\t" + "LDRD r6, r7, [%[state], #64]\n\t" + "LDRD r8, r9, [%[state], #104]\n\t" + "LDRD r10, r11, [%[state], #144]\n\t" + "LDR r12, [%[state], #184]\n\t" + "LDR lr, [%[state], #188]\n\t" + "EOR r4, r4, r2\n\t" + "EOR r5, r5, r3\n\t" + "EOR r6, r6, r2\n\t" + "EOR r7, r7, r3\n\t" + "EOR r8, r8, r2\n\t" + "EOR r9, r9, r3\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STRD r4, r5, [%[state], #24]\n\t" + "STRD r6, r7, [%[state], #64]\n\t" + "STRD r8, r9, [%[state], #104]\n\t" + "STRD r10, r11, [%[state], #144]\n\t" + "STR r12, [%[state], #184]\n\t" + "STR lr, [%[state], #188]\n\t" + /* Calc t[4] */ + "LDRD r2, r3, [sp, #24]\n\t" + "LDRD r4, r5, [sp]\n\t" + "EOR r2, r2, r5, LSR #31\n\t" + "EOR r3, r3, r4, LSR #31\n\t" + "EOR r2, r2, r4, LSL #1\n\t" + "EOR r3, r3, r5, LSL #1\n\t" + /* XOR t[4] into s[x*5+4] */ + "LDRD r4, r5, [%[state], #32]\n\t" + "LDRD r6, r7, [%[state], #72]\n\t" + "LDRD r8, r9, [%[state], #112]\n\t" + "LDRD r10, r11, [%[state], #152]\n\t" + "LDR r12, [%[state], #192]\n\t" + "LDR lr, [%[state], #196]\n\t" + "EOR r4, r4, r2\n\t" + "EOR r5, r5, r3\n\t" + "EOR r6, r6, r2\n\t" + "EOR r7, r7, r3\n\t" + "EOR r8, r8, r2\n\t" + "EOR r9, r9, r3\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STRD r4, r5, [%[state], #32]\n\t" + "STRD r6, r7, [%[state], #72]\n\t" + "STRD r8, r9, [%[state], #112]\n\t" + "STRD r10, r11, [%[state], #152]\n\t" + "STR r12, [%[state], #192]\n\t" + "STR lr, [%[state], #196]\n\t" + /* Row Mix */ + /* Row 0 */ + "LDRD r2, r3, [%[state]]\n\t" + "LDRD r4, r5, [%[state], #48]\n\t" + "LDRD r6, r7, [%[state], #96]\n\t" + "LDRD r8, r9, [%[state], #144]\n\t" + "LDRD r10, r11, [%[state], #192]\n\t" + /* s[1] <<< 44 */ + "MOV lr, r4\n\t" + "LSR r12, r5, #20\n\t" + "LSR r4, r4, #20\n\t" + "ORR r4, r4, r5, LSL #12\n\t" + "ORR r5, r12, lr, LSL #12\n\t" + /* s[2] <<< 43 */ + "MOV lr, r6\n\t" + "LSR r12, r7, #21\n\t" + "LSR r6, r6, #21\n\t" + "ORR r6, r6, r7, LSL #11\n\t" + "ORR r7, r12, lr, LSL #11\n\t" + /* s[3] <<< 21 */ + "LSR r12, r9, #11\n\t" + "LSR lr, r8, #11\n\t" + "ORR r8, r12, r8, LSL #21\n\t" + "ORR r9, lr, r9, LSL #21\n\t" + /* s[4] <<< 14 */ + "LSR r12, r11, #18\n\t" + "LSR lr, r10, #18\n\t" + "ORR r10, r12, r10, LSL #14\n\t" + "ORR r11, lr, r11, LSL #14\n\t" + "BIC r12, r8, r6\n\t" + "BIC lr, r9, r7\n\t" + "EOR r12, r12, r4\n\t" + "EOR lr, lr, r5\n\t" + "STR r12, [sp, #8]\n\t" + "STR lr, [sp, #12]\n\t" + "BIC r12, r10, r8\n\t" + "BIC lr, r11, r9\n\t" + "EOR r12, r12, r6\n\t" + "EOR lr, lr, r7\n\t" + "STR r12, [sp, #16]\n\t" + "STR lr, [sp, #20]\n\t" + "BIC r12, r2, r10\n\t" + "BIC lr, r3, r11\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "STR r12, [sp, #24]\n\t" + "STR lr, [sp, #28]\n\t" + "BIC r12, r4, r2\n\t" + "BIC lr, r5, r3\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [sp, #32]\n\t" + "STR lr, [sp, #36]\n\t" + /* Get constant */ + "LDRD r10, r11, [r1]\n\t" + "ADD r1, r1, #0x8\n\t" + "BIC r12, r6, r4\n\t" + "BIC lr, r7, r5\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + /* XOR in constant */ + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [sp]\n\t" + "STR lr, [sp, #4]\n\t" + /* Row 1 */ + "LDRD r2, r3, [%[state], #24]\n\t" + "LDRD r4, r5, [%[state], #72]\n\t" + "LDRD r6, r7, [%[state], #80]\n\t" + "LDRD r8, r9, [%[state], #128]\n\t" + "LDRD r10, r11, [%[state], #176]\n\t" + /* s[0] <<< 28 */ + "LSR r12, r3, #4\n\t" + "LSR lr, r2, #4\n\t" + "ORR r2, r12, r2, LSL #28\n\t" + "ORR r3, lr, r3, LSL #28\n\t" + /* s[1] <<< 20 */ + "LSR r12, r5, #12\n\t" + "LSR lr, r4, #12\n\t" + "ORR r4, r12, r4, LSL #20\n\t" + "ORR r5, lr, r5, LSL #20\n\t" + /* s[2] <<< 3 */ + "LSR r12, r7, #29\n\t" + "LSR lr, r6, #29\n\t" + "ORR r6, r12, r6, LSL #3\n\t" + "ORR r7, lr, r7, LSL #3\n\t" + /* s[3] <<< 45 */ + "MOV lr, r8\n\t" + "LSR r12, r9, #19\n\t" + "LSR r8, r8, #19\n\t" + "ORR r8, r8, r9, LSL #13\n\t" + "ORR r9, r12, lr, LSL #13\n\t" + /* s[4] <<< 61 */ + "MOV lr, r10\n\t" + "LSR r12, r11, #3\n\t" + "LSR r10, r10, #3\n\t" + "ORR r10, r10, r11, LSL #29\n\t" + "ORR r11, r12, lr, LSL #29\n\t" + "BIC r12, r8, r6\n\t" + "BIC lr, r9, r7\n\t" + "EOR r12, r12, r4\n\t" + "EOR lr, lr, r5\n\t" + "STR r12, [sp, #48]\n\t" + "STR lr, [sp, #52]\n\t" + "BIC r12, r10, r8\n\t" + "BIC lr, r11, r9\n\t" + "EOR r12, r12, r6\n\t" + "EOR lr, lr, r7\n\t" + "STR r12, [sp, #56]\n\t" + "STR lr, [sp, #60]\n\t" + "BIC r12, r2, r10\n\t" + "BIC lr, r3, r11\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "STR r12, [sp, #64]\n\t" + "STR lr, [sp, #68]\n\t" + "BIC r12, r4, r2\n\t" + "BIC lr, r5, r3\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [sp, #72]\n\t" + "STR lr, [sp, #76]\n\t" + "BIC r12, r6, r4\n\t" + "BIC lr, r7, r5\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STR r12, [sp, #40]\n\t" + "STR lr, [sp, #44]\n\t" + /* Row 2 */ + "LDRD r2, r3, [%[state], #8]\n\t" + "LDRD r4, r5, [%[state], #56]\n\t" + "LDRD r6, r7, [%[state], #104]\n\t" + "LDRD r8, r9, [%[state], #152]\n\t" + "LDRD r10, r11, [%[state], #160]\n\t" + /* s[0] <<< 1 */ + "LSR r12, r3, #31\n\t" + "LSR lr, r2, #31\n\t" + "ORR r2, r12, r2, LSL #1\n\t" + "ORR r3, lr, r3, LSL #1\n\t" + /* s[1] <<< 6 */ + "LSR r12, r5, #26\n\t" + "LSR lr, r4, #26\n\t" + "ORR r4, r12, r4, LSL #6\n\t" + "ORR r5, lr, r5, LSL #6\n\t" + /* s[2] <<< 25 */ + "LSR r12, r7, #7\n\t" + "LSR lr, r6, #7\n\t" + "ORR r6, r12, r6, LSL #25\n\t" + "ORR r7, lr, r7, LSL #25\n\t" + /* s[3] <<< 8 */ + "LSR r12, r9, #24\n\t" + "LSR lr, r8, #24\n\t" + "ORR r8, r12, r8, LSL #8\n\t" + "ORR r9, lr, r9, LSL #8\n\t" + /* s[4] <<< 18 */ + "LSR r12, r11, #14\n\t" + "LSR lr, r10, #14\n\t" + "ORR r10, r12, r10, LSL #18\n\t" + "ORR r11, lr, r11, LSL #18\n\t" + "BIC r12, r8, r6\n\t" + "BIC lr, r9, r7\n\t" + "EOR r12, r12, r4\n\t" + "EOR lr, lr, r5\n\t" + "STR r12, [sp, #88]\n\t" + "STR lr, [sp, #92]\n\t" + "BIC r12, r10, r8\n\t" + "BIC lr, r11, r9\n\t" + "EOR r12, r12, r6\n\t" + "EOR lr, lr, r7\n\t" + "STR r12, [sp, #96]\n\t" + "STR lr, [sp, #100]\n\t" + "BIC r12, r2, r10\n\t" + "BIC lr, r3, r11\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "STR r12, [sp, #104]\n\t" + "STR lr, [sp, #108]\n\t" + "BIC r12, r4, r2\n\t" + "BIC lr, r5, r3\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [sp, #112]\n\t" + "STR lr, [sp, #116]\n\t" + "BIC r12, r6, r4\n\t" + "BIC lr, r7, r5\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STR r12, [sp, #80]\n\t" + "STR lr, [sp, #84]\n\t" + /* Row 3 */ + "LDRD r2, r3, [%[state], #32]\n\t" + "LDRD r4, r5, [%[state], #40]\n\t" + "LDRD r6, r7, [%[state], #88]\n\t" + "LDRD r8, r9, [%[state], #136]\n\t" + "LDRD r10, r11, [%[state], #184]\n\t" + /* s[0] <<< 27 */ + "LSR r12, r3, #5\n\t" + "LSR lr, r2, #5\n\t" + "ORR r2, r12, r2, LSL #27\n\t" + "ORR r3, lr, r3, LSL #27\n\t" + /* s[1] <<< 36 */ + "MOV lr, r4\n\t" + "LSR r12, r5, #28\n\t" + "LSR r4, r4, #28\n\t" + "ORR r4, r4, r5, LSL #4\n\t" + "ORR r5, r12, lr, LSL #4\n\t" + /* s[2] <<< 10 */ + "LSR r12, r7, #22\n\t" + "LSR lr, r6, #22\n\t" + "ORR r6, r12, r6, LSL #10\n\t" + "ORR r7, lr, r7, LSL #10\n\t" + /* s[3] <<< 15 */ + "LSR r12, r9, #17\n\t" + "LSR lr, r8, #17\n\t" + "ORR r8, r12, r8, LSL #15\n\t" + "ORR r9, lr, r9, LSL #15\n\t" + /* s[4] <<< 56 */ + "MOV lr, r10\n\t" + "LSR r12, r11, #8\n\t" + "LSR r10, r10, #8\n\t" + "ORR r10, r10, r11, LSL #24\n\t" + "ORR r11, r12, lr, LSL #24\n\t" + "BIC r12, r8, r6\n\t" + "BIC lr, r9, r7\n\t" + "EOR r12, r12, r4\n\t" + "EOR lr, lr, r5\n\t" + "STR r12, [sp, #128]\n\t" + "STR lr, [sp, #132]\n\t" + "BIC r12, r10, r8\n\t" + "BIC lr, r11, r9\n\t" + "EOR r12, r12, r6\n\t" + "EOR lr, lr, r7\n\t" + "STR r12, [sp, #136]\n\t" + "STR lr, [sp, #140]\n\t" + "BIC r12, r2, r10\n\t" + "BIC lr, r3, r11\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "STR r12, [sp, #144]\n\t" + "STR lr, [sp, #148]\n\t" + "BIC r12, r4, r2\n\t" + "BIC lr, r5, r3\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [sp, #152]\n\t" + "STR lr, [sp, #156]\n\t" + "BIC r12, r6, r4\n\t" + "BIC lr, r7, r5\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STR r12, [sp, #120]\n\t" + "STR lr, [sp, #124]\n\t" + /* Row 4 */ + "LDRD r2, r3, [%[state], #16]\n\t" + "LDRD r4, r5, [%[state], #64]\n\t" + "LDRD r6, r7, [%[state], #112]\n\t" + "LDRD r8, r9, [%[state], #120]\n\t" + "LDRD r10, r11, [%[state], #168]\n\t" + /* s[0] <<< 62 */ + "MOV lr, r2\n\t" + "LSR r12, r3, #2\n\t" + "LSR r2, r2, #2\n\t" + "ORR r2, r2, r3, LSL #30\n\t" + "ORR r3, r12, lr, LSL #30\n\t" + /* s[1] <<< 55 */ + "MOV lr, r4\n\t" + "LSR r12, r5, #9\n\t" + "LSR r4, r4, #9\n\t" + "ORR r4, r4, r5, LSL #23\n\t" + "ORR r5, r12, lr, LSL #23\n\t" + /* s[2] <<< 39 */ + "MOV lr, r6\n\t" + "LSR r12, r7, #25\n\t" + "LSR r6, r6, #25\n\t" + "ORR r6, r6, r7, LSL #7\n\t" + "ORR r7, r12, lr, LSL #7\n\t" + /* s[3] <<< 41 */ + "MOV lr, r8\n\t" + "LSR r12, r9, #23\n\t" + "LSR r8, r8, #23\n\t" + "ORR r8, r8, r9, LSL #9\n\t" + "ORR r9, r12, lr, LSL #9\n\t" + /* s[4] <<< 2 */ + "LSR r12, r11, #30\n\t" + "LSR lr, r10, #30\n\t" + "ORR r10, r12, r10, LSL #2\n\t" + "ORR r11, lr, r11, LSL #2\n\t" + "BIC r12, r8, r6\n\t" + "BIC lr, r9, r7\n\t" + "EOR r12, r12, r4\n\t" + "EOR lr, lr, r5\n\t" + "STR r12, [sp, #168]\n\t" + "STR lr, [sp, #172]\n\t" + "BIC r12, r10, r8\n\t" + "BIC lr, r11, r9\n\t" + "EOR r12, r12, r6\n\t" + "EOR lr, lr, r7\n\t" + "STR r12, [sp, #176]\n\t" + "STR lr, [sp, #180]\n\t" + "BIC r12, r2, r10\n\t" + "BIC lr, r3, r11\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "STR r12, [sp, #184]\n\t" + "STR lr, [sp, #188]\n\t" + "BIC r12, r4, r2\n\t" + "BIC lr, r5, r3\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [sp, #192]\n\t" + "STR lr, [sp, #196]\n\t" + "BIC r12, r6, r4\n\t" + "BIC lr, r7, r5\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STR r12, [sp, #160]\n\t" + "STR lr, [sp, #164]\n\t" + /* Round odd */ + /* Calc b[4] */ + "LDRD r4, r5, [sp, #32]\n\t" + "LDRD r6, r7, [sp, #72]\n\t" + "LDRD r8, r9, [sp, #112]\n\t" + "LDRD r10, r11, [sp, #152]\n\t" + "LDR r12, [sp, #192]\n\t" + "LDR lr, [sp, #196]\n\t" + "EOR r2, r4, r6\n\t" + "EOR r3, r5, r7\n\t" + "EOR r2, r2, r8\n\t" + "EOR r3, r3, r9\n\t" + "EOR r2, r2, r10\n\t" + "EOR r3, r3, r11\n\t" + "EOR r2, r2, r12\n\t" + "EOR r3, r3, lr\n\t" + "STRD r2, r3, [%[state], #32]\n\t" + /* Calc b[1] */ + "LDRD r4, r5, [sp, #8]\n\t" + "LDRD r6, r7, [sp, #48]\n\t" + "LDRD r8, r9, [sp, #88]\n\t" + "LDRD r10, r11, [sp, #128]\n\t" + "LDR r12, [sp, #168]\n\t" + "LDR lr, [sp, #172]\n\t" + "EOR r4, r4, r6\n\t" + "EOR r5, r5, r7\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r4, r4, r10\n\t" + "EOR r5, r5, r11\n\t" + "EOR r4, r4, r12\n\t" + "EOR r5, r5, lr\n\t" + "STRD r4, r5, [%[state], #8]\n\t" + /* Calc t[0] */ + "EOR r2, r2, r5, LSR #31\n\t" + "EOR r3, r3, r4, LSR #31\n\t" + "EOR r2, r2, r4, LSL #1\n\t" + "EOR r3, r3, r5, LSL #1\n\t" + /* Calc b[0] and XOR t[0] into s[x*5+0] */ + "LDRD r4, r5, [sp]\n\t" + "LDRD r6, r7, [sp, #40]\n\t" + "LDRD r8, r9, [sp, #80]\n\t" + "LDRD r10, r11, [sp, #120]\n\t" + "EOR r12, r4, r6\n\t" + "EOR lr, r5, r7\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "EOR r4, r4, r2\n\t" + "EOR r5, r5, r3\n\t" + "EOR r6, r6, r2\n\t" + "EOR r7, r7, r3\n\t" + "EOR r8, r8, r2\n\t" + "EOR r9, r9, r3\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "STRD r4, r5, [sp]\n\t" + "STRD r6, r7, [sp, #40]\n\t" + "STRD r8, r9, [sp, #80]\n\t" + "STRD r10, r11, [sp, #120]\n\t" + "LDRD r10, r11, [sp, #160]\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "STRD r10, r11, [sp, #160]\n\t" + "STR r12, [%[state]]\n\t" + "STR lr, [%[state], #4]\n\t" + /* Calc b[3] */ + "LDRD r4, r5, [sp, #24]\n\t" + "LDRD r6, r7, [sp, #64]\n\t" + "LDRD r8, r9, [sp, #104]\n\t" + "LDRD r10, r11, [sp, #144]\n\t" + "LDR r12, [sp, #184]\n\t" + "LDR lr, [sp, #188]\n\t" + "EOR r4, r4, r6\n\t" + "EOR r5, r5, r7\n\t" + "EOR r4, r4, r8\n\t" + "EOR r5, r5, r9\n\t" + "EOR r4, r4, r10\n\t" + "EOR r5, r5, r11\n\t" + "EOR r4, r4, r12\n\t" + "EOR r5, r5, lr\n\t" + "STRD r4, r5, [%[state], #24]\n\t" + /* Calc t[2] */ + "LDRD r2, r3, [%[state], #8]\n\t" + "EOR r2, r2, r5, LSR #31\n\t" + "EOR r3, r3, r4, LSR #31\n\t" + "EOR r2, r2, r4, LSL #1\n\t" + "EOR r3, r3, r5, LSL #1\n\t" + /* Calc b[2] and XOR t[2] into s[x*5+2] */ + "LDRD r4, r5, [sp, #16]\n\t" + "LDRD r6, r7, [sp, #56]\n\t" + "LDRD r8, r9, [sp, #96]\n\t" + "LDRD r10, r11, [sp, #136]\n\t" + "EOR r12, r4, r6\n\t" + "EOR lr, r5, r7\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "EOR r4, r4, r2\n\t" + "EOR r5, r5, r3\n\t" + "EOR r6, r6, r2\n\t" + "EOR r7, r7, r3\n\t" + "EOR r8, r8, r2\n\t" + "EOR r9, r9, r3\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "STRD r4, r5, [sp, #16]\n\t" + "STRD r6, r7, [sp, #56]\n\t" + "STRD r8, r9, [sp, #96]\n\t" + "STRD r10, r11, [sp, #136]\n\t" + "LDRD r10, r11, [sp, #176]\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "STRD r10, r11, [sp, #176]\n\t" + "STR r12, [%[state], #16]\n\t" + "STR lr, [%[state], #20]\n\t" + /* Calc t[1] */ + "LDRD r2, r3, [%[state]]\n\t" + "EOR r2, r2, lr, LSR #31\n\t" + "EOR r3, r3, r12, LSR #31\n\t" + "EOR r2, r2, r12, LSL #1\n\t" + "EOR r3, r3, lr, LSL #1\n\t" + /* XOR t[1] into s[x*5+1] */ + "LDRD r4, r5, [sp, #8]\n\t" + "LDRD r6, r7, [sp, #48]\n\t" + "LDRD r8, r9, [sp, #88]\n\t" + "LDRD r10, r11, [sp, #128]\n\t" + "LDR r12, [sp, #168]\n\t" + "LDR lr, [sp, #172]\n\t" + "EOR r4, r4, r2\n\t" + "EOR r5, r5, r3\n\t" + "EOR r6, r6, r2\n\t" + "EOR r7, r7, r3\n\t" + "EOR r8, r8, r2\n\t" + "EOR r9, r9, r3\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STRD r4, r5, [sp, #8]\n\t" + "STRD r6, r7, [sp, #48]\n\t" + "STRD r8, r9, [sp, #88]\n\t" + "STRD r10, r11, [sp, #128]\n\t" + "STR r12, [sp, #168]\n\t" + "STR lr, [sp, #172]\n\t" + /* Calc t[3] */ + "LDRD r2, r3, [%[state], #16]\n\t" + "LDRD r4, r5, [%[state], #32]\n\t" + "EOR r2, r2, r5, LSR #31\n\t" + "EOR r3, r3, r4, LSR #31\n\t" + "EOR r2, r2, r4, LSL #1\n\t" + "EOR r3, r3, r5, LSL #1\n\t" + /* XOR t[3] into s[x*5+3] */ + "LDRD r4, r5, [sp, #24]\n\t" + "LDRD r6, r7, [sp, #64]\n\t" + "LDRD r8, r9, [sp, #104]\n\t" + "LDRD r10, r11, [sp, #144]\n\t" + "LDR r12, [sp, #184]\n\t" + "LDR lr, [sp, #188]\n\t" + "EOR r4, r4, r2\n\t" + "EOR r5, r5, r3\n\t" + "EOR r6, r6, r2\n\t" + "EOR r7, r7, r3\n\t" + "EOR r8, r8, r2\n\t" + "EOR r9, r9, r3\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STRD r4, r5, [sp, #24]\n\t" + "STRD r6, r7, [sp, #64]\n\t" + "STRD r8, r9, [sp, #104]\n\t" + "STRD r10, r11, [sp, #144]\n\t" + "STR r12, [sp, #184]\n\t" + "STR lr, [sp, #188]\n\t" + /* Calc t[4] */ + "LDRD r2, r3, [%[state], #24]\n\t" + "LDRD r4, r5, [%[state]]\n\t" + "EOR r2, r2, r5, LSR #31\n\t" + "EOR r3, r3, r4, LSR #31\n\t" + "EOR r2, r2, r4, LSL #1\n\t" + "EOR r3, r3, r5, LSL #1\n\t" + /* XOR t[4] into s[x*5+4] */ + "LDRD r4, r5, [sp, #32]\n\t" + "LDRD r6, r7, [sp, #72]\n\t" + "LDRD r8, r9, [sp, #112]\n\t" + "LDRD r10, r11, [sp, #152]\n\t" + "LDR r12, [sp, #192]\n\t" + "LDR lr, [sp, #196]\n\t" + "EOR r4, r4, r2\n\t" + "EOR r5, r5, r3\n\t" + "EOR r6, r6, r2\n\t" + "EOR r7, r7, r3\n\t" + "EOR r8, r8, r2\n\t" + "EOR r9, r9, r3\n\t" + "EOR r10, r10, r2\n\t" + "EOR r11, r11, r3\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STRD r4, r5, [sp, #32]\n\t" + "STRD r6, r7, [sp, #72]\n\t" + "STRD r8, r9, [sp, #112]\n\t" + "STRD r10, r11, [sp, #152]\n\t" + "STR r12, [sp, #192]\n\t" + "STR lr, [sp, #196]\n\t" + /* Row Mix */ + /* Row 0 */ + "LDRD r2, r3, [sp]\n\t" + "LDRD r4, r5, [sp, #48]\n\t" + "LDRD r6, r7, [sp, #96]\n\t" + "LDRD r8, r9, [sp, #144]\n\t" + "LDRD r10, r11, [sp, #192]\n\t" + /* s[1] <<< 44 */ + "MOV lr, r4\n\t" + "LSR r12, r5, #20\n\t" + "LSR r4, r4, #20\n\t" + "ORR r4, r4, r5, LSL #12\n\t" + "ORR r5, r12, lr, LSL #12\n\t" + /* s[2] <<< 43 */ + "MOV lr, r6\n\t" + "LSR r12, r7, #21\n\t" + "LSR r6, r6, #21\n\t" + "ORR r6, r6, r7, LSL #11\n\t" + "ORR r7, r12, lr, LSL #11\n\t" + /* s[3] <<< 21 */ + "LSR r12, r9, #11\n\t" + "LSR lr, r8, #11\n\t" + "ORR r8, r12, r8, LSL #21\n\t" + "ORR r9, lr, r9, LSL #21\n\t" + /* s[4] <<< 14 */ + "LSR r12, r11, #18\n\t" + "LSR lr, r10, #18\n\t" + "ORR r10, r12, r10, LSL #14\n\t" + "ORR r11, lr, r11, LSL #14\n\t" + "BIC r12, r8, r6\n\t" + "BIC lr, r9, r7\n\t" + "EOR r12, r12, r4\n\t" + "EOR lr, lr, r5\n\t" + "STR r12, [%[state], #8]\n\t" + "STR lr, [%[state], #12]\n\t" + "BIC r12, r10, r8\n\t" + "BIC lr, r11, r9\n\t" + "EOR r12, r12, r6\n\t" + "EOR lr, lr, r7\n\t" + "STR r12, [%[state], #16]\n\t" + "STR lr, [%[state], #20]\n\t" + "BIC r12, r2, r10\n\t" + "BIC lr, r3, r11\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "STR r12, [%[state], #24]\n\t" + "STR lr, [%[state], #28]\n\t" + "BIC r12, r4, r2\n\t" + "BIC lr, r5, r3\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [%[state], #32]\n\t" + "STR lr, [%[state], #36]\n\t" + /* Get constant */ + "LDRD r10, r11, [r1]\n\t" + "ADD r1, r1, #0x8\n\t" + "BIC r12, r6, r4\n\t" + "BIC lr, r7, r5\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + /* XOR in constant */ + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [%[state]]\n\t" + "STR lr, [%[state], #4]\n\t" + /* Row 1 */ + "LDRD r2, r3, [sp, #24]\n\t" + "LDRD r4, r5, [sp, #72]\n\t" + "LDRD r6, r7, [sp, #80]\n\t" + "LDRD r8, r9, [sp, #128]\n\t" + "LDRD r10, r11, [sp, #176]\n\t" + /* s[0] <<< 28 */ + "LSR r12, r3, #4\n\t" + "LSR lr, r2, #4\n\t" + "ORR r2, r12, r2, LSL #28\n\t" + "ORR r3, lr, r3, LSL #28\n\t" + /* s[1] <<< 20 */ + "LSR r12, r5, #12\n\t" + "LSR lr, r4, #12\n\t" + "ORR r4, r12, r4, LSL #20\n\t" + "ORR r5, lr, r5, LSL #20\n\t" + /* s[2] <<< 3 */ + "LSR r12, r7, #29\n\t" + "LSR lr, r6, #29\n\t" + "ORR r6, r12, r6, LSL #3\n\t" + "ORR r7, lr, r7, LSL #3\n\t" + /* s[3] <<< 45 */ + "MOV lr, r8\n\t" + "LSR r12, r9, #19\n\t" + "LSR r8, r8, #19\n\t" + "ORR r8, r8, r9, LSL #13\n\t" + "ORR r9, r12, lr, LSL #13\n\t" + /* s[4] <<< 61 */ + "MOV lr, r10\n\t" + "LSR r12, r11, #3\n\t" + "LSR r10, r10, #3\n\t" + "ORR r10, r10, r11, LSL #29\n\t" + "ORR r11, r12, lr, LSL #29\n\t" + "BIC r12, r8, r6\n\t" + "BIC lr, r9, r7\n\t" + "EOR r12, r12, r4\n\t" + "EOR lr, lr, r5\n\t" + "STR r12, [%[state], #48]\n\t" + "STR lr, [%[state], #52]\n\t" + "BIC r12, r10, r8\n\t" + "BIC lr, r11, r9\n\t" + "EOR r12, r12, r6\n\t" + "EOR lr, lr, r7\n\t" + "STR r12, [%[state], #56]\n\t" + "STR lr, [%[state], #60]\n\t" + "BIC r12, r2, r10\n\t" + "BIC lr, r3, r11\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "STR r12, [%[state], #64]\n\t" + "STR lr, [%[state], #68]\n\t" + "BIC r12, r4, r2\n\t" + "BIC lr, r5, r3\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [%[state], #72]\n\t" + "STR lr, [%[state], #76]\n\t" + "BIC r12, r6, r4\n\t" + "BIC lr, r7, r5\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STR r12, [%[state], #40]\n\t" + "STR lr, [%[state], #44]\n\t" + /* Row 2 */ + "LDRD r2, r3, [sp, #8]\n\t" + "LDRD r4, r5, [sp, #56]\n\t" + "LDRD r6, r7, [sp, #104]\n\t" + "LDRD r8, r9, [sp, #152]\n\t" + "LDRD r10, r11, [sp, #160]\n\t" + /* s[0] <<< 1 */ + "LSR r12, r3, #31\n\t" + "LSR lr, r2, #31\n\t" + "ORR r2, r12, r2, LSL #1\n\t" + "ORR r3, lr, r3, LSL #1\n\t" + /* s[1] <<< 6 */ + "LSR r12, r5, #26\n\t" + "LSR lr, r4, #26\n\t" + "ORR r4, r12, r4, LSL #6\n\t" + "ORR r5, lr, r5, LSL #6\n\t" + /* s[2] <<< 25 */ + "LSR r12, r7, #7\n\t" + "LSR lr, r6, #7\n\t" + "ORR r6, r12, r6, LSL #25\n\t" + "ORR r7, lr, r7, LSL #25\n\t" + /* s[3] <<< 8 */ + "LSR r12, r9, #24\n\t" + "LSR lr, r8, #24\n\t" + "ORR r8, r12, r8, LSL #8\n\t" + "ORR r9, lr, r9, LSL #8\n\t" + /* s[4] <<< 18 */ + "LSR r12, r11, #14\n\t" + "LSR lr, r10, #14\n\t" + "ORR r10, r12, r10, LSL #18\n\t" + "ORR r11, lr, r11, LSL #18\n\t" + "BIC r12, r8, r6\n\t" + "BIC lr, r9, r7\n\t" + "EOR r12, r12, r4\n\t" + "EOR lr, lr, r5\n\t" + "STR r12, [%[state], #88]\n\t" + "STR lr, [%[state], #92]\n\t" + "BIC r12, r10, r8\n\t" + "BIC lr, r11, r9\n\t" + "EOR r12, r12, r6\n\t" + "EOR lr, lr, r7\n\t" + "STR r12, [%[state], #96]\n\t" + "STR lr, [%[state], #100]\n\t" + "BIC r12, r2, r10\n\t" + "BIC lr, r3, r11\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "STR r12, [%[state], #104]\n\t" + "STR lr, [%[state], #108]\n\t" + "BIC r12, r4, r2\n\t" + "BIC lr, r5, r3\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [%[state], #112]\n\t" + "STR lr, [%[state], #116]\n\t" + "BIC r12, r6, r4\n\t" + "BIC lr, r7, r5\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STR r12, [%[state], #80]\n\t" + "STR lr, [%[state], #84]\n\t" + /* Row 3 */ + "LDRD r2, r3, [sp, #32]\n\t" + "LDRD r4, r5, [sp, #40]\n\t" + "LDRD r6, r7, [sp, #88]\n\t" + "LDRD r8, r9, [sp, #136]\n\t" + "LDRD r10, r11, [sp, #184]\n\t" + /* s[0] <<< 27 */ + "LSR r12, r3, #5\n\t" + "LSR lr, r2, #5\n\t" + "ORR r2, r12, r2, LSL #27\n\t" + "ORR r3, lr, r3, LSL #27\n\t" + /* s[1] <<< 36 */ + "MOV lr, r4\n\t" + "LSR r12, r5, #28\n\t" + "LSR r4, r4, #28\n\t" + "ORR r4, r4, r5, LSL #4\n\t" + "ORR r5, r12, lr, LSL #4\n\t" + /* s[2] <<< 10 */ + "LSR r12, r7, #22\n\t" + "LSR lr, r6, #22\n\t" + "ORR r6, r12, r6, LSL #10\n\t" + "ORR r7, lr, r7, LSL #10\n\t" + /* s[3] <<< 15 */ + "LSR r12, r9, #17\n\t" + "LSR lr, r8, #17\n\t" + "ORR r8, r12, r8, LSL #15\n\t" + "ORR r9, lr, r9, LSL #15\n\t" + /* s[4] <<< 56 */ + "MOV lr, r10\n\t" + "LSR r12, r11, #8\n\t" + "LSR r10, r10, #8\n\t" + "ORR r10, r10, r11, LSL #24\n\t" + "ORR r11, r12, lr, LSL #24\n\t" + "BIC r12, r8, r6\n\t" + "BIC lr, r9, r7\n\t" + "EOR r12, r12, r4\n\t" + "EOR lr, lr, r5\n\t" + "STR r12, [%[state], #128]\n\t" + "STR lr, [%[state], #132]\n\t" + "BIC r12, r10, r8\n\t" + "BIC lr, r11, r9\n\t" + "EOR r12, r12, r6\n\t" + "EOR lr, lr, r7\n\t" + "STR r12, [%[state], #136]\n\t" + "STR lr, [%[state], #140]\n\t" + "BIC r12, r2, r10\n\t" + "BIC lr, r3, r11\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "STR r12, [%[state], #144]\n\t" + "STR lr, [%[state], #148]\n\t" + "BIC r12, r4, r2\n\t" + "BIC lr, r5, r3\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [%[state], #152]\n\t" + "STR lr, [%[state], #156]\n\t" + "BIC r12, r6, r4\n\t" + "BIC lr, r7, r5\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STR r12, [%[state], #120]\n\t" + "STR lr, [%[state], #124]\n\t" + /* Row 4 */ + "LDRD r2, r3, [sp, #16]\n\t" + "LDRD r4, r5, [sp, #64]\n\t" + "LDRD r6, r7, [sp, #112]\n\t" + "LDRD r8, r9, [sp, #120]\n\t" + "LDRD r10, r11, [sp, #168]\n\t" + /* s[0] <<< 62 */ + "MOV lr, r2\n\t" + "LSR r12, r3, #2\n\t" + "LSR r2, r2, #2\n\t" + "ORR r2, r2, r3, LSL #30\n\t" + "ORR r3, r12, lr, LSL #30\n\t" + /* s[1] <<< 55 */ + "MOV lr, r4\n\t" + "LSR r12, r5, #9\n\t" + "LSR r4, r4, #9\n\t" + "ORR r4, r4, r5, LSL #23\n\t" + "ORR r5, r12, lr, LSL #23\n\t" + /* s[2] <<< 39 */ + "MOV lr, r6\n\t" + "LSR r12, r7, #25\n\t" + "LSR r6, r6, #25\n\t" + "ORR r6, r6, r7, LSL #7\n\t" + "ORR r7, r12, lr, LSL #7\n\t" + /* s[3] <<< 41 */ + "MOV lr, r8\n\t" + "LSR r12, r9, #23\n\t" + "LSR r8, r8, #23\n\t" + "ORR r8, r8, r9, LSL #9\n\t" + "ORR r9, r12, lr, LSL #9\n\t" + /* s[4] <<< 2 */ + "LSR r12, r11, #30\n\t" + "LSR lr, r10, #30\n\t" + "ORR r10, r12, r10, LSL #2\n\t" + "ORR r11, lr, r11, LSL #2\n\t" + "BIC r12, r8, r6\n\t" + "BIC lr, r9, r7\n\t" + "EOR r12, r12, r4\n\t" + "EOR lr, lr, r5\n\t" + "STR r12, [%[state], #168]\n\t" + "STR lr, [%[state], #172]\n\t" + "BIC r12, r10, r8\n\t" + "BIC lr, r11, r9\n\t" + "EOR r12, r12, r6\n\t" + "EOR lr, lr, r7\n\t" + "STR r12, [%[state], #176]\n\t" + "STR lr, [%[state], #180]\n\t" + "BIC r12, r2, r10\n\t" + "BIC lr, r3, r11\n\t" + "EOR r12, r12, r8\n\t" + "EOR lr, lr, r9\n\t" + "STR r12, [%[state], #184]\n\t" + "STR lr, [%[state], #188]\n\t" + "BIC r12, r4, r2\n\t" + "BIC lr, r5, r3\n\t" + "EOR r12, r12, r10\n\t" + "EOR lr, lr, r11\n\t" + "STR r12, [%[state], #192]\n\t" + "STR lr, [%[state], #196]\n\t" + "BIC r12, r6, r4\n\t" + "BIC lr, r7, r5\n\t" + "EOR r12, r12, r2\n\t" + "EOR lr, lr, r3\n\t" + "STR r12, [%[state], #160]\n\t" + "STR lr, [%[state], #164]\n\t" + "LDR r2, [sp, #200]\n\t" + "SUBS r2, r2, #0x1\n\t" +#ifdef __GNUC__ + "BNE L_sha3_thumb2_begin%=\n\t" +#else + "BNE.W L_sha3_thumb2_begin%=\n\t" +#endif + "ADD sp, sp, #0xcc\n\t" +#ifndef WOLFSSL_NO_VAR_ASSIGN_REG + : [state] "+r" (state), + [L_sha3_thumb2_rt] "+r" (L_sha3_thumb2_rt_c) + : + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" +#else + : [state] "+r" (state) + : [L_sha3_thumb2_rt] "r" (L_sha3_thumb2_rt) + : "memory", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "lr", "cc" +#endif /* WOLFSSL_NO_VAR_ASSIGN_REG */ + ); +} + +#endif /* !__aarch64__ && __thumb__ */ +#endif /* WOLFSSL_ARMASM */ +#endif /* !defined(__aarch64__) && defined(__thumb__) */ +#endif /* WOLFSSL_ARMASM */ + +#endif /* WOLFSSL_ARMASM_INLINE */ diff --git a/wolfcrypt/src/port/arm/thumb2-sha512-asm.S b/wolfcrypt/src/port/arm/thumb2-sha512-asm.S index 6031b92404..4723ad6ac6 100644 --- a/wolfcrypt/src/port/arm/thumb2-sha512-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-sha512-asm.S @@ -39,7 +39,7 @@ .text .type L_SHA512_transform_len_k, %object .size L_SHA512_transform_len_k, 640 - .align 4 + .align 8 L_SHA512_transform_len_k: .word 0xd728ae22 .word 0x428a2f98 diff --git a/wolfcrypt/src/sha3.c b/wolfcrypt/src/sha3.c index 57b8d2eb4f..9f966973b5 100644 --- a/wolfcrypt/src/sha3.c +++ b/wolfcrypt/src/sha3.c @@ -59,7 +59,8 @@ } #endif -#if !defined(WOLFSSL_ARMASM) || !defined(WOLFSSL_ARMASM_CRYPTO_SHA3) +#if !defined(WOLFSSL_ARMASM) || (!defined(__arm__) && \ + !defined(WOLFSSL_ARMASM_CRYPTO_SHA3)) #ifdef USE_INTEL_SPEEDUP #include diff --git a/wolfssl/wolfcrypt/sha3.h b/wolfssl/wolfcrypt/sha3.h index 149c714c49..bbe1162879 100644 --- a/wolfssl/wolfcrypt/sha3.h +++ b/wolfssl/wolfcrypt/sha3.h @@ -216,7 +216,8 @@ WOLFSSL_LOCAL void sha3_block_bmi2(word64* s); WOLFSSL_LOCAL void sha3_block_avx2(word64* s); WOLFSSL_LOCAL void BlockSha3(word64 *s); #endif -#if defined(WOLFSSL_ARMASM) && defined(WOLFSSL_ARMASM_CRYPTO_SHA3) +#if defined(WOLFSSL_ARMASM) && (defined(__arm__) || \ + defined(WOLFSSL_ARMASM_CRYPTO_SHA3)) WOLFSSL_LOCAL void BlockSha3(word64 *s); #endif