From 62b9b355f238668d7289c24b58238ac500f0be3b Mon Sep 17 00:00:00 2001 From: Yan Peng <112029182+pennyannn@users.noreply.github.com> Date: Tue, 6 Aug 2024 12:55:23 -0700 Subject: [PATCH] Update aes_hw_ctr32_encrypt_blocks (#63) ### Description: This PR updates program and tests for `aes_hw_ctr32_encrypt_blocks` based on the fix of a bug from the PR https://github.com/aws/aws-lc/pull/1690 ### Testing: What tests have been run? Did `make all` succeed for your changes? Was conformance testing successful on an Aarch64 machine? Yes for both. ### License: By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../AESHWCtr32EncryptBlocksProgram.lean | 332 +++++++++--------- Tests/AES-GCM/AESV8ProgramTests.lean | 22 +- 2 files changed, 176 insertions(+), 178 deletions(-) diff --git a/Tests/AES-GCM/AESHWCtr32EncryptBlocksProgram.lean b/Tests/AES-GCM/AESHWCtr32EncryptBlocksProgram.lean index 0a545107..61371e97 100644 --- a/Tests/AES-GCM/AESHWCtr32EncryptBlocksProgram.lean +++ b/Tests/AES-GCM/AESHWCtr32EncryptBlocksProgram.lean @@ -24,175 +24,175 @@ open BitVec def aes_hw_ctr32_encrypt_blocks_program : Program := def_program - [ -- 000000000079f8a0 : - (0x79f8a0#64, 0xa9bf7bfd#32), -- stp x29, x30, [sp, #-16]! - (0x79f8a4#64, 0x910003fd#32), -- mov x29, sp - (0x79f8a8#64, 0xb940f065#32), -- ldr w5, [x3, #240] - (0x79f8ac#64, 0xb9400c88#32), -- ldr w8, [x4, #12] - (0x79f8b0#64, 0x4c407880#32), -- ld1 {v0.4s}, [x4] - (0x79f8b4#64, 0x4c40a870#32), -- ld1 {v16.4s, v17.4s}, [x3] - (0x79f8b8#64, 0x510010a5#32), -- sub w5, w5, #0x4 - (0x79f8bc#64, 0xd280020c#32), -- mov x12, #0x10 // #16 - (0x79f8c0#64, 0xf100085f#32), -- cmp x2, #0x2 - (0x79f8c4#64, 0x8b051067#32), -- add x7, x3, x5, lsl #4 - (0x79f8c8#64, 0x510008a5#32), -- sub w5, w5, #0x2 - (0x79f8cc#64, 0x4cdfa8f4#32), -- ld1 {v20.4s, v21.4s}, [x7], #32 - (0x79f8d0#64, 0x4cdfa8f6#32), -- ld1 {v22.4s, v23.4s}, [x7], #32 - (0x79f8d4#64, 0x4c4078e7#32), -- ld1 {v7.4s}, [x7] - (0x79f8d8#64, 0x91008067#32), -- add x7, x3, #0x20 - (0x79f8dc#64, 0x2a0503e6#32), -- mov w6, w5 - (0x79f8e0#64, 0x9a8c33ec#32), -- csel x12, xzr, x12, cc // cc = lo, ul, last - (0x79f8e4#64, 0x5ac00908#32), -- rev w8, w8 - (0x79f8e8#64, 0x1100050a#32), -- add w10, w8, #0x1 - (0x79f8ec#64, 0x4ea01c06#32), -- mov v6.16b, v0.16b - (0x79f8f0#64, 0x5ac0094a#32), -- rev w10, w10 - (0x79f8f4#64, 0x4e1c1d46#32), -- mov v6.s[3], w10 - (0x79f8f8#64, 0x11000908#32), -- add w8, w8, #0x2 - (0x79f8fc#64, 0x4ea61cc1#32), -- mov v1.16b, v6.16b - (0x79f900#64, 0x54000b89#32), -- b.ls 79fa70 <.Lctr32_tail> // b.plast - (0x79f904#64, 0x5ac0090c#32), -- rev w12, w8 - (0x79f908#64, 0x4e1c1d86#32), -- mov v6.s[3], w12 - (0x79f90c#64, 0xd1000c42#32), -- sub x2, x2, #0x3 - (0x79f910#64, 0x4ea61cd2#32), -- mov v18.16b, v6.16b - (0x79f914#64, 0x14000003#32), -- b 79f920 <.Loop3x_ctr32> - (0x79f918#64, 0xd503201f#32), -- nop - (0x79f91c#64, 0xd503201f#32), -- nop + [ -- 00000000007d2680 : + (0x7d2680#64, 0xa9bf7bfd#32), -- stp x29, x30, [sp, #-16]! + (0x7d2684#64, 0x910003fd#32), -- mov x29, sp + (0x7d2688#64, 0xb940f065#32), -- ldr w5, [x3, #240] + (0x7d268c#64, 0xb9400c88#32), -- ldr w8, [x4, #12] + (0x7d2690#64, 0x4c407880#32), -- ld1 {v0.4s}, [x4] + (0x7d2694#64, 0x4c40a870#32), -- ld1 {v16.4s, v17.4s}, [x3] + (0x7d2698#64, 0x510010a5#32), -- sub w5, w5, #0x4 + (0x7d269c#64, 0xd280020c#32), -- mov x12, #0x10 // #16 + (0x7d26a0#64, 0xf100085f#32), -- cmp x2, #0x2 + (0x7d26a4#64, 0x8b051067#32), -- add x7, x3, x5, lsl #4 + (0x7d26a8#64, 0x510008a5#32), -- sub w5, w5, #0x2 + (0x7d26ac#64, 0x4cdfa8f4#32), -- ld1 {v20.4s, v21.4s}, [x7], #32 + (0x7d26b0#64, 0x4cdfa8f6#32), -- ld1 {v22.4s, v23.4s}, [x7], #32 + (0x7d26b4#64, 0x4c4078e7#32), -- ld1 {v7.4s}, [x7] + (0x7d26b8#64, 0x91008067#32), -- add x7, x3, #0x20 + (0x7d26bc#64, 0x2a0503e6#32), -- mov w6, w5 + (0x7d26c0#64, 0x5ac00908#32), -- rev w8, w8 + (0x7d26c4#64, 0x1100050a#32), -- add w10, w8, #0x1 + (0x7d26c8#64, 0x4ea01c06#32), -- mov v6.16b, v0.16b + (0x7d26cc#64, 0x5ac0094a#32), -- rev w10, w10 + (0x7d26d0#64, 0x4e1c1d46#32), -- mov v6.s[3], w10 + (0x7d26d4#64, 0x11000908#32), -- add w8, w8, #0x2 + (0x7d26d8#64, 0x4ea61cc1#32), -- mov v1.16b, v6.16b + (0x7d26dc#64, 0x54000b49#32), -- b.ls 7d2844 <.Lctr32_tail> // b.plast + (0x7d26e0#64, 0x5ac0090c#32), -- rev w12, w8 + (0x7d26e4#64, 0x4e1c1d86#32), -- mov v6.s[3], w12 + (0x7d26e8#64, 0xd1000c42#32), -- sub x2, x2, #0x3 + (0x7d26ec#64, 0x4ea61cd2#32), -- mov v18.16b, v6.16b + (0x7d26f0#64, 0x14000004#32), -- b 7d2700 <.Loop3x_ctr32> + (0x7d26f4#64, 0xd503201f#32), -- nop + (0x7d26f8#64, 0xd503201f#32), -- nop + (0x7d26fc#64, 0xd503201f#32), -- nop - -- 000000000079f920 <.Loop3x_ctr32>: - (0x79f920#64, 0x4e284a00#32), -- aese v0.16b, v16.16b - (0x79f924#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b - (0x79f928#64, 0x4e284a01#32), -- aese v1.16b, v16.16b - (0x79f92c#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b - (0x79f930#64, 0x4e284a12#32), -- aese v18.16b, v16.16b - (0x79f934#64, 0x4e286a52#32), -- aesmc v18.16b, v18.16b - (0x79f938#64, 0x4cdf78f0#32), -- ld1 {v16.4s}, [x7], #16 - (0x79f93c#64, 0x710008c6#32), -- subs w6, w6, #0x2 - (0x79f940#64, 0x4e284a20#32), -- aese v0.16b, v17.16b - (0x79f944#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b - (0x79f948#64, 0x4e284a21#32), -- aese v1.16b, v17.16b - (0x79f94c#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b - (0x79f950#64, 0x4e284a32#32), -- aese v18.16b, v17.16b - (0x79f954#64, 0x4e286a52#32), -- aesmc v18.16b, v18.16b - (0x79f958#64, 0x4cdf78f1#32), -- ld1 {v17.4s}, [x7], #16 - (0x79f95c#64, 0x54fffe2c#32), -- b.gt 79f920 <.Loop3x_ctr32> - (0x79f960#64, 0x4e284a00#32), -- aese v0.16b, v16.16b - (0x79f964#64, 0x4e286804#32), -- aesmc v4.16b, v0.16b - (0x79f968#64, 0x4e284a01#32), -- aese v1.16b, v16.16b - (0x79f96c#64, 0x4e286825#32), -- aesmc v5.16b, v1.16b - (0x79f970#64, 0x4cdf7002#32), -- ld1 {v2.16b}, [x0], #16 - (0x79f974#64, 0x11000509#32), -- add w9, w8, #0x1 - (0x79f978#64, 0x4e284a12#32), -- aese v18.16b, v16.16b - (0x79f97c#64, 0x4e286a52#32), -- aesmc v18.16b, v18.16b - (0x79f980#64, 0x4cdf7003#32), -- ld1 {v3.16b}, [x0], #16 - (0x79f984#64, 0x5ac00929#32), -- rev w9, w9 - (0x79f988#64, 0x4e284a24#32), -- aese v4.16b, v17.16b - (0x79f98c#64, 0x4e286884#32), -- aesmc v4.16b, v4.16b - (0x79f990#64, 0x4e284a25#32), -- aese v5.16b, v17.16b - (0x79f994#64, 0x4e2868a5#32), -- aesmc v5.16b, v5.16b - (0x79f998#64, 0x4cdf7013#32), -- ld1 {v19.16b}, [x0], #16 - (0x79f99c#64, 0xaa0303e7#32), -- mov x7, x3 - (0x79f9a0#64, 0x4e284a32#32), -- aese v18.16b, v17.16b - (0x79f9a4#64, 0x4e286a51#32), -- aesmc v17.16b, v18.16b - (0x79f9a8#64, 0x4e284a84#32), -- aese v4.16b, v20.16b - (0x79f9ac#64, 0x4e286884#32), -- aesmc v4.16b, v4.16b - (0x79f9b0#64, 0x4e284a85#32), -- aese v5.16b, v20.16b - (0x79f9b4#64, 0x4e2868a5#32), -- aesmc v5.16b, v5.16b - (0x79f9b8#64, 0x6e271c42#32), -- eor v2.16b, v2.16b, v7.16b - (0x79f9bc#64, 0x1100090a#32), -- add w10, w8, #0x2 - (0x79f9c0#64, 0x4e284a91#32), -- aese v17.16b, v20.16b - (0x79f9c4#64, 0x4e286a31#32), -- aesmc v17.16b, v17.16b - (0x79f9c8#64, 0x6e271c63#32), -- eor v3.16b, v3.16b, v7.16b - (0x79f9cc#64, 0x11000d08#32), -- add w8, w8, #0x3 - (0x79f9d0#64, 0x4e284aa4#32), -- aese v4.16b, v21.16b - (0x79f9d4#64, 0x4e286884#32), -- aesmc v4.16b, v4.16b - (0x79f9d8#64, 0x4e284aa5#32), -- aese v5.16b, v21.16b - (0x79f9dc#64, 0x4e2868a5#32), -- aesmc v5.16b, v5.16b - (0x79f9e0#64, 0x6e271e73#32), -- eor v19.16b, v19.16b, v7.16b - (0x79f9e4#64, 0x4e1c1d26#32), -- mov v6.s[3], w9 - (0x79f9e8#64, 0x4e284ab1#32), -- aese v17.16b, v21.16b - (0x79f9ec#64, 0x4e286a31#32), -- aesmc v17.16b, v17.16b - (0x79f9f0#64, 0x4ea61cc0#32), -- mov v0.16b, v6.16b - (0x79f9f4#64, 0x5ac0094a#32), -- rev w10, w10 - (0x79f9f8#64, 0x4e284ac4#32), -- aese v4.16b, v22.16b - (0x79f9fc#64, 0x4e286884#32), -- aesmc v4.16b, v4.16b - (0x79fa00#64, 0x4e1c1d46#32), -- mov v6.s[3], w10 - (0x79fa04#64, 0x5ac0090c#32), -- rev w12, w8 - (0x79fa08#64, 0x4e284ac5#32), -- aese v5.16b, v22.16b - (0x79fa0c#64, 0x4e2868a5#32), -- aesmc v5.16b, v5.16b - (0x79fa10#64, 0x4ea61cc1#32), -- mov v1.16b, v6.16b - (0x79fa14#64, 0x4e1c1d86#32), -- mov v6.s[3], w12 - (0x79fa18#64, 0x4e284ad1#32), -- aese v17.16b, v22.16b - (0x79fa1c#64, 0x4e286a31#32), -- aesmc v17.16b, v17.16b - (0x79fa20#64, 0x4ea61cd2#32), -- mov v18.16b, v6.16b - (0x79fa24#64, 0xf1000c42#32), -- subs x2, x2, #0x3 - (0x79fa28#64, 0x4e284ae4#32), -- aese v4.16b, v23.16b - (0x79fa2c#64, 0x4e284ae5#32), -- aese v5.16b, v23.16b - (0x79fa30#64, 0x4e284af1#32), -- aese v17.16b, v23.16b - (0x79fa34#64, 0x6e241c42#32), -- eor v2.16b, v2.16b, v4.16b - (0x79fa38#64, 0x4cdf78f0#32), -- ld1 {v16.4s}, [x7], #16 - (0x79fa3c#64, 0x4c9f7022#32), -- st1 {v2.16b}, [x1], #16 - (0x79fa40#64, 0x6e251c63#32), -- eor v3.16b, v3.16b, v5.16b - (0x79fa44#64, 0x2a0503e6#32), -- mov w6, w5 - (0x79fa48#64, 0x4c9f7023#32), -- st1 {v3.16b}, [x1], #16 - (0x79fa4c#64, 0x6e311e73#32), -- eor v19.16b, v19.16b, v17.16b - (0x79fa50#64, 0x4cdf78f1#32), -- ld1 {v17.4s}, [x7], #16 - (0x79fa54#64, 0x4c9f7033#32), -- st1 {v19.16b}, [x1], #16 - (0x79fa58#64, 0x54fff642#32), -- b.cs 79f920 <.Loop3x_ctr32> // b.hs, b.nlast - (0x79fa5c#64, 0xb1000c42#32), -- adds x2, x2, #0x3 - (0x79fa60#64, 0x54000600#32), -- b.eq 79fb20 <.Lctr32_done> // b.none - (0x79fa64#64, 0xf100045f#32), -- cmp x2, #0x1 - (0x79fa68#64, 0xd280020c#32), -- mov x12, #0x10 // #16 - (0x79fa6c#64, 0x9a8c03ec#32), -- csel x12, xzr, x12, eq // eq = none + -- 00000000007d2700 <.Loop3x_ctr32>: + (0x7d2700#64, 0x4e284a00#32), -- aese v0.16b, v16.16b + (0x7d2704#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b + (0x7d2708#64, 0x4e284a01#32), -- aese v1.16b, v16.16b + (0x7d270c#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b + (0x7d2710#64, 0x4e284a12#32), -- aese v18.16b, v16.16b + (0x7d2714#64, 0x4e286a52#32), -- aesmc v18.16b, v18.16b + (0x7d2718#64, 0x4cdf78f0#32), -- ld1 {v16.4s}, [x7], #16 + (0x7d271c#64, 0x710008c6#32), -- subs w6, w6, #0x2 + (0x7d2720#64, 0x4e284a20#32), -- aese v0.16b, v17.16b + (0x7d2724#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b + (0x7d2728#64, 0x4e284a21#32), -- aese v1.16b, v17.16b + (0x7d272c#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b + (0x7d2730#64, 0x4e284a32#32), -- aese v18.16b, v17.16b + (0x7d2734#64, 0x4e286a52#32), -- aesmc v18.16b, v18.16b + (0x7d2738#64, 0x4cdf78f1#32), -- ld1 {v17.4s}, [x7], #16 + (0x7d273c#64, 0x54fffe2c#32), -- b.gt 7d2700 <.Loop3x_ctr32> + (0x7d2740#64, 0x4e284a00#32), -- aese v0.16b, v16.16b + (0x7d2744#64, 0x4e286804#32), -- aesmc v4.16b, v0.16b + (0x7d2748#64, 0x4e284a01#32), -- aese v1.16b, v16.16b + (0x7d274c#64, 0x4e286825#32), -- aesmc v5.16b, v1.16b + (0x7d2750#64, 0x4cdf7002#32), -- ld1 {v2.16b}, [x0], #16 + (0x7d2754#64, 0x11000509#32), -- add w9, w8, #0x1 + (0x7d2758#64, 0x4e284a12#32), -- aese v18.16b, v16.16b + (0x7d275c#64, 0x4e286a52#32), -- aesmc v18.16b, v18.16b + (0x7d2760#64, 0x4cdf7003#32), -- ld1 {v3.16b}, [x0], #16 + (0x7d2764#64, 0x5ac00929#32), -- rev w9, w9 + (0x7d2768#64, 0x4e284a24#32), -- aese v4.16b, v17.16b + (0x7d276c#64, 0x4e286884#32), -- aesmc v4.16b, v4.16b + (0x7d2770#64, 0x4e284a25#32), -- aese v5.16b, v17.16b + (0x7d2774#64, 0x4e2868a5#32), -- aesmc v5.16b, v5.16b + (0x7d2778#64, 0x4cdf7013#32), -- ld1 {v19.16b}, [x0], #16 + (0x7d277c#64, 0xaa0303e7#32), -- mov x7, x3 + (0x7d2780#64, 0x4e284a32#32), -- aese v18.16b, v17.16b + (0x7d2784#64, 0x4e286a51#32), -- aesmc v17.16b, v18.16b + (0x7d2788#64, 0x4e284a84#32), -- aese v4.16b, v20.16b + (0x7d278c#64, 0x4e286884#32), -- aesmc v4.16b, v4.16b + (0x7d2790#64, 0x4e284a85#32), -- aese v5.16b, v20.16b + (0x7d2794#64, 0x4e2868a5#32), -- aesmc v5.16b, v5.16b + (0x7d2798#64, 0x6e271c42#32), -- eor v2.16b, v2.16b, v7.16b + (0x7d279c#64, 0x1100090a#32), -- add w10, w8, #0x2 + (0x7d27a0#64, 0x4e284a91#32), -- aese v17.16b, v20.16b + (0x7d27a4#64, 0x4e286a31#32), -- aesmc v17.16b, v17.16b + (0x7d27a8#64, 0x6e271c63#32), -- eor v3.16b, v3.16b, v7.16b + (0x7d27ac#64, 0x11000d08#32), -- add w8, w8, #0x3 + (0x7d27b0#64, 0x4e284aa4#32), -- aese v4.16b, v21.16b + (0x7d27b4#64, 0x4e286884#32), -- aesmc v4.16b, v4.16b + (0x7d27b8#64, 0x4e284aa5#32), -- aese v5.16b, v21.16b + (0x7d27bc#64, 0x4e2868a5#32), -- aesmc v5.16b, v5.16b + (0x7d27c0#64, 0x6e271e73#32), -- eor v19.16b, v19.16b, v7.16b + (0x7d27c4#64, 0x4e1c1d26#32), -- mov v6.s[3], w9 + (0x7d27c8#64, 0x4e284ab1#32), -- aese v17.16b, v21.16b + (0x7d27cc#64, 0x4e286a31#32), -- aesmc v17.16b, v17.16b + (0x7d27d0#64, 0x4ea61cc0#32), -- mov v0.16b, v6.16b + (0x7d27d4#64, 0x5ac0094a#32), -- rev w10, w10 + (0x7d27d8#64, 0x4e284ac4#32), -- aese v4.16b, v22.16b + (0x7d27dc#64, 0x4e286884#32), -- aesmc v4.16b, v4.16b + (0x7d27e0#64, 0x4e1c1d46#32), -- mov v6.s[3], w10 + (0x7d27e4#64, 0x5ac0090c#32), -- rev w12, w8 + (0x7d27e8#64, 0x4e284ac5#32), -- aese v5.16b, v22.16b + (0x7d27ec#64, 0x4e2868a5#32), -- aesmc v5.16b, v5.16b + (0x7d27f0#64, 0x4ea61cc1#32), -- mov v1.16b, v6.16b + (0x7d27f4#64, 0x4e1c1d86#32), -- mov v6.s[3], w12 + (0x7d27f8#64, 0x4e284ad1#32), -- aese v17.16b, v22.16b + (0x7d27fc#64, 0x4e286a31#32), -- aesmc v17.16b, v17.16b + (0x7d2800#64, 0x4ea61cd2#32), -- mov v18.16b, v6.16b + (0x7d2804#64, 0xf1000c42#32), -- subs x2, x2, #0x3 + (0x7d2808#64, 0x4e284ae4#32), -- aese v4.16b, v23.16b + (0x7d280c#64, 0x4e284ae5#32), -- aese v5.16b, v23.16b + (0x7d2810#64, 0x4e284af1#32), -- aese v17.16b, v23.16b + (0x7d2814#64, 0x6e241c42#32), -- eor v2.16b, v2.16b, v4.16b + (0x7d2818#64, 0x4cdf78f0#32), -- ld1 {v16.4s}, [x7], #16 + (0x7d281c#64, 0x4c9f7022#32), -- st1 {v2.16b}, [x1], #16 + (0x7d2820#64, 0x6e251c63#32), -- eor v3.16b, v3.16b, v5.16b + (0x7d2824#64, 0x2a0503e6#32), -- mov w6, w5 + (0x7d2828#64, 0x4c9f7023#32), -- st1 {v3.16b}, [x1], #16 + (0x7d282c#64, 0x6e311e73#32), -- eor v19.16b, v19.16b, v17.16b + (0x7d2830#64, 0x4cdf78f1#32), -- ld1 {v17.4s}, [x7], #16 + (0x7d2834#64, 0x4c9f7033#32), -- st1 {v19.16b}, [x1], #16 + (0x7d2838#64, 0x54fff642#32), -- b.cs 7d2700 <.Loop3x_ctr32> // b.hs, b.nlast + (0x7d283c#64, 0xb1000c42#32), -- adds x2, x2, #0x3 + (0x7d2840#64, 0x54000600#32), -- b.eq 7d2900 <.Lctr32_done> // b.none - -- 000000000079fa70 <.Lctr32_tail>: - (0x79fa70#64, 0x4e284a00#32), -- aese v0.16b, v16.16b - (0x79fa74#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b - (0x79fa78#64, 0x4e284a01#32), -- aese v1.16b, v16.16b - (0x79fa7c#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b - (0x79fa80#64, 0x4cdf78f0#32), -- ld1 {v16.4s}, [x7], #16 - (0x79fa84#64, 0x710008c6#32), -- subs w6, w6, #0x2 - (0x79fa88#64, 0x4e284a20#32), -- aese v0.16b, v17.16b - (0x79fa8c#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b - (0x79fa90#64, 0x4e284a21#32), -- aese v1.16b, v17.16b - (0x79fa94#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b - (0x79fa98#64, 0x4cdf78f1#32), -- ld1 {v17.4s}, [x7], #16 - (0x79fa9c#64, 0x54fffeac#32), -- b.gt 79fa70 <.Lctr32_tail> - (0x79faa0#64, 0x4e284a00#32), -- aese v0.16b, v16.16b - (0x79faa4#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b - (0x79faa8#64, 0x4e284a01#32), -- aese v1.16b, v16.16b - (0x79faac#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b - (0x79fab0#64, 0x4e284a20#32), -- aese v0.16b, v17.16b - (0x79fab4#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b - (0x79fab8#64, 0x4e284a21#32), -- aese v1.16b, v17.16b - (0x79fabc#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b - (0x79fac0#64, 0x4ccc7002#32), -- ld1 {v2.16b}, [x0], x12 - (0x79fac4#64, 0x4e284a80#32), -- aese v0.16b, v20.16b - (0x79fac8#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b - (0x79facc#64, 0x4e284a81#32), -- aese v1.16b, v20.16b - (0x79fad0#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b - (0x79fad4#64, 0x4c407003#32), -- ld1 {v3.16b}, [x0] - (0x79fad8#64, 0x4e284aa0#32), -- aese v0.16b, v21.16b - (0x79fadc#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b - (0x79fae0#64, 0x4e284aa1#32), -- aese v1.16b, v21.16b - (0x79fae4#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b - (0x79fae8#64, 0x6e271c42#32), -- eor v2.16b, v2.16b, v7.16b - (0x79faec#64, 0x4e284ac0#32), -- aese v0.16b, v22.16b - (0x79faf0#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b - (0x79faf4#64, 0x4e284ac1#32), -- aese v1.16b, v22.16b - (0x79faf8#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b - (0x79fafc#64, 0x6e271c63#32), -- eor v3.16b, v3.16b, v7.16b - (0x79fb00#64, 0x4e284ae0#32), -- aese v0.16b, v23.16b - (0x79fb04#64, 0x4e284ae1#32), -- aese v1.16b, v23.16b - (0x79fb08#64, 0xf100045f#32), -- cmp x2, #0x1 - (0x79fb0c#64, 0x6e201c42#32), -- eor v2.16b, v2.16b, v0.16b - (0x79fb10#64, 0x6e211c63#32), -- eor v3.16b, v3.16b, v1.16b - (0x79fb14#64, 0x4c9f7022#32), -- st1 {v2.16b}, [x1], #16 - (0x79fb18#64, 0x54000040#32), -- b.eq 79fb20 <.Lctr32_done> // b.none - (0x79fb1c#64, 0x4c007023#32), -- st1 {v3.16b}, [x1] + -- 00000000007d2844 <.Lctr32_tail>: + (0x7d2844#64, 0xf100045f#32), -- cmp x2, #0x1 + (0x7d2848#64, 0xd280020c#32), -- mov x12, #0x10 // #16 + (0x7d284c#64, 0x9a8c03ec#32), -- csel x12, xzr, x12, eq // eq = none + (0x7d2850#64, 0x5400058b#32), -- b.lt 7d2900 <.Lctr32_done> // b.tstop + (0x7d2854#64, 0x4e284a00#32), -- aese v0.16b, v16.16b + (0x7d2858#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b + (0x7d285c#64, 0x4e284a01#32), -- aese v1.16b, v16.16b + (0x7d2860#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b + (0x7d2864#64, 0x4cdf78f0#32), -- ld1 {v16.4s}, [x7], #16 + (0x7d2868#64, 0x710008c6#32), -- subs w6, w6, #0x2 + (0x7d286c#64, 0x4e284a20#32), -- aese v0.16b, v17.16b + (0x7d2870#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b + (0x7d2874#64, 0x4e284a21#32), -- aese v1.16b, v17.16b + (0x7d2878#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b + (0x7d287c#64, 0x4cdf78f1#32), -- ld1 {v17.4s}, [x7], #16 + (0x7d2880#64, 0x54fffe2c#32), -- b.gt 7d2844 <.Lctr32_tail> + (0x7d2884#64, 0x4e284a00#32), -- aese v0.16b, v16.16b + (0x7d2888#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b + (0x7d288c#64, 0x4e284a01#32), -- aese v1.16b, v16.16b + (0x7d2890#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b + (0x7d2894#64, 0x4e284a20#32), -- aese v0.16b, v17.16b + (0x7d2898#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b + (0x7d289c#64, 0x4e284a21#32), -- aese v1.16b, v17.16b + (0x7d28a0#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b + (0x7d28a4#64, 0x4ccc7002#32), -- ld1 {v2.16b}, [x0], x12 + (0x7d28a8#64, 0x4e284a80#32), -- aese v0.16b, v20.16b + (0x7d28ac#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b + (0x7d28b0#64, 0x4e284a81#32), -- aese v1.16b, v20.16b + (0x7d28b4#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b + (0x7d28b8#64, 0x4c407003#32), -- ld1 {v3.16b}, [x0] + (0x7d28bc#64, 0x4e284aa0#32), -- aese v0.16b, v21.16b + (0x7d28c0#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b + (0x7d28c4#64, 0x4e284aa1#32), -- aese v1.16b, v21.16b + (0x7d28c8#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b + (0x7d28cc#64, 0x6e271c42#32), -- eor v2.16b, v2.16b, v7.16b + (0x7d28d0#64, 0x4e284ac0#32), -- aese v0.16b, v22.16b + (0x7d28d4#64, 0x4e286800#32), -- aesmc v0.16b, v0.16b + (0x7d28d8#64, 0x4e284ac1#32), -- aese v1.16b, v22.16b + (0x7d28dc#64, 0x4e286821#32), -- aesmc v1.16b, v1.16b + (0x7d28e0#64, 0x6e271c63#32), -- eor v3.16b, v3.16b, v7.16b + (0x7d28e4#64, 0x4e284ae0#32), -- aese v0.16b, v23.16b + (0x7d28e8#64, 0x4e284ae1#32), -- aese v1.16b, v23.16b + (0x7d28ec#64, 0x6e201c42#32), -- eor v2.16b, v2.16b, v0.16b + (0x7d28f0#64, 0x6e211c63#32), -- eor v3.16b, v3.16b, v1.16b + (0x7d28f4#64, 0x4c9f7022#32), -- st1 {v2.16b}, [x1], #16 + (0x7d28f8#64, 0xb400004c#32), -- cbz x12, 7d2900 <.Lctr32_done> + (0x7d28fc#64, 0x4c007023#32), -- st1 {v3.16b}, [x1] - -- 000000000079fb20 <.Lctr32_done>: - (0x79fb20#64, 0xf84107fd#32), -- ldr x29, [sp], #16 - (0x79fb24#64, 0xd65f03c0#32), -- ret + -- 00000000007d2900 <.Lctr32_done>: + (0x7d2900#64, 0xf84107fd#32), -- ldr x29, [sp], #16 + (0x7d2904#64, 0xd65f03c0#32) -- ret ] end AESHWCtr32EncryptBlocksProgram diff --git a/Tests/AES-GCM/AESV8ProgramTests.lean b/Tests/AES-GCM/AESV8ProgramTests.lean index a60f6ba2..59988ff5 100644 --- a/Tests/AES-GCM/AESV8ProgramTests.lean +++ b/Tests/AES-GCM/AESV8ProgramTests.lean @@ -308,7 +308,7 @@ def aes_hw_ctr32_encrypt_blocks_test (n : Nat) | _ => 0#64 let s := { gpr := init_gpr, sfp := (fun (_ : BitVec 5) => 0#128), - pc := 0x79f8a0#64, + pc := 0x7d2680#64, pstate := PState.zero, mem := (fun (_ :BitVec 64) => 0#8), program := AESHWCtr32EncryptBlocksProgram.aes_hw_ctr32_encrypt_blocks_program, @@ -332,8 +332,6 @@ def in_block : List (BitVec 8) := List.replicate 80 0x00#8 def ivec : BitVec 128 := 0x0#128 -- len = 0, 1, 2, 3, 4, 5 --- Note: aes_hw_ctr32_encrypt_blocks will encrypt two blocks when len = 0, why? --- Suspicion is that this function is never called with len = 0 namespace AES128Ctr32 def rounds : BitVec 64 := 10 @@ -353,42 +351,42 @@ def buf_res_128 : List (BitVec 8) := -- len = 0 def final_state0 : ArmState := - aes_hw_ctr32_encrypt_blocks_test 83 0 in_block rounds key_schedule ivec + aes_hw_ctr32_encrypt_blocks_test 30 0 in_block rounds key_schedule ivec def final_buf0 : BitVec 640 := read_mem_bytes 80 out_address final_state0 example : read_err final_state0 = StateError.None := by native_decide -example: final_buf0 = (BitVec.zero 384) ++ (extractLsb 255 0 (revflat buf_res_128)) := by native_decide +example: final_buf0 = BitVec.zero 640 := by native_decide -- len = 1 def final_state1 : ArmState := - aes_hw_ctr32_encrypt_blocks_test 82 1 in_block rounds key_schedule ivec + aes_hw_ctr32_encrypt_blocks_test 88 1 in_block rounds key_schedule ivec def final_buf1 : BitVec 640 := read_mem_bytes 80 out_address final_state1 example : read_err final_state1 = StateError.None := by native_decide example: final_buf1 = (BitVec.zero 512) ++ (extractLsb 127 0 (revflat buf_res_128)) := by native_decide -- -- len = 2 def final_state2 : ArmState := - aes_hw_ctr32_encrypt_blocks_test 82 2 in_block rounds key_schedule ivec + aes_hw_ctr32_encrypt_blocks_test 89 2 in_block rounds key_schedule ivec def final_buf2 : BitVec 640 := read_mem_bytes 80 out_address final_state2 example : read_err final_state2 = StateError.None := by native_decide example: final_buf2 = (BitVec.zero 384) ++ (extractLsb 255 0 (revflat buf_res_128)) := by native_decide -- len = 3 def final_state3 : ArmState := - aes_hw_ctr32_encrypt_blocks_test 129 3 in_block rounds key_schedule ivec + aes_hw_ctr32_encrypt_blocks_test 128 3 in_block rounds key_schedule ivec def final_buf3 : BitVec 640 := read_mem_bytes 80 out_address final_state3 example : read_err final_state3 = StateError.None := by native_decide example: final_buf3 = (BitVec.zero 256) ++ (extractLsb 383 0 (revflat buf_res_128)) := by native_decide -- len = 4 def final_state4 : ArmState := - aes_hw_ctr32_encrypt_blocks_test 187 4 in_block rounds key_schedule ivec + aes_hw_ctr32_encrypt_blocks_test 190 4 in_block rounds key_schedule ivec def final_buf4 : BitVec 640 := read_mem_bytes 80 out_address final_state4 example : read_err final_state4 = StateError.None := by native_decide example: final_buf4 = (BitVec.zero 127) ++ (extractLsb 512 0 (revflat buf_res_128)) := by native_decide -- len = 5 def final_state5 : ArmState := - aes_hw_ctr32_encrypt_blocks_test 188 5 in_block rounds key_schedule ivec + aes_hw_ctr32_encrypt_blocks_test 191 5 in_block rounds key_schedule ivec def final_buf5 : BitVec 640 := read_mem_bytes 80 out_address final_state5 example : read_err final_state5 = StateError.None := by native_decide example: final_buf5 = revflat buf_res_128 := by native_decide @@ -414,7 +412,7 @@ def buf_res_192 : List (BitVec 8) := -- len = 5 def final_state : ArmState := - aes_hw_ctr32_encrypt_blocks_test 216 5 in_block rounds key_schedule ivec + aes_hw_ctr32_encrypt_blocks_test 223 5 in_block rounds key_schedule ivec def final_buf : BitVec 640 := read_mem_bytes 80 out_address final_state example : read_err final_state = StateError.None := by native_decide example: final_buf = revflat buf_res_192 := by native_decide @@ -440,7 +438,7 @@ def buf_res_256 : List (BitVec 8) := -- len = 5 def final_state : ArmState := - aes_hw_ctr32_encrypt_blocks_test 244 5 in_block rounds key_schedule ivec + aes_hw_ctr32_encrypt_blocks_test 255 5 in_block rounds key_schedule ivec def final_buf : BitVec 640 := read_mem_bytes 80 out_address final_state example : read_err final_state = StateError.None := by native_decide example: final_buf = revflat buf_res_256 := by native_decide