From 3dffc7056ad2701b23ab22f92cd785178f09270b Mon Sep 17 00:00:00 2001
From: Shilpi Goel <shigoel@gmail.com>
Date: Mon, 30 Sep 2024 18:08:47 -0500
Subject: [PATCH] Reasoning about GCMGmultV8 (#153)

### Description:

We begin reasoning about `GCMGmultV8` by focusing on proving two frame
conditions, i.e., `Helem` address and `Htable` are unmodified at the end
of symbolic simulation and the final state is error-free.

We will tackle functional correctness later.

This PR also demonstrates an issue where `simp_mem` creates a malformed
proof term.

### Testing:

What tests have been run? Did `make all` succeed for your changes? Was
conformance testing successful on an Aarch64 machine?

Yes.

### License:

By submitting this pull request, I confirm that my contribution is
made under the terms of the Apache 2.0 license.
---
 Proofs/AES-GCM/GCMGmultV8Sym.lean | 74 ++++++++++++++++++++++++++++++-
 Proofs/SHA512/SHA512Loop.lean     | 24 ++++++----
 2 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/Proofs/AES-GCM/GCMGmultV8Sym.lean b/Proofs/AES-GCM/GCMGmultV8Sym.lean
index 7114063f..6a5ecc7a 100644
--- a/Proofs/AES-GCM/GCMGmultV8Sym.lean
+++ b/Proofs/AES-GCM/GCMGmultV8Sym.lean
@@ -5,21 +5,93 @@ Author(s): Alex Keizer
 -/
 import Tests.«AES-GCM».GCMGmultV8Program
 import Tactics.Sym
+import Tactics.Aggregate
 import Tactics.StepThms
+import Tactics.CSE
+import Arm.Memory.SeparateAutomation
+import Arm.Syntax
 
 namespace GCMGmultV8Program
+open ArmStateNotation
 
 #genStepEqTheorems gcm_gmult_v8_program
 
+/-
+xxx: GCMGmultV8 Xi HTable
+-/
+
+set_option pp.deepTerms false in
+set_option pp.deepTerms.threshold 50 in
+-- set_option trace.simp_mem.info true in
 theorem gcm_gmult_v8_program_run_27 (s0 sf : ArmState)
     (h_s0_program : s0.program = gcm_gmult_v8_program)
     (h_s0_err : read_err s0 = .None)
     (h_s0_pc : read_pc s0 = gcm_gmult_v8_program.min)
     (h_s0_sp_aligned : CheckSPAlignment s0)
+    (h_Xi : Xi = s0[read_gpr 64 0#5 s0, 16])
+    (h_HTable : HTable = s0[read_gpr 64 1#5 s0, 256])
+    (h_mem_sep : Memory.Region.pairwiseSeparate
+                 [(read_gpr 64 0#5 s0, 16),
+                  (read_gpr 64 1#5 s0, 256)])
     (h_run : sf = run gcm_gmult_v8_program.length s0) :
-    read_err sf = .None := by
+    -- The final state is error-free.
+    read_err sf = .None ∧
+    -- The program is unmodified in `sf`.
+    sf.program = gcm_gmult_v8_program ∧
+    -- The stack pointer is still aligned in `sf`.
+    CheckSPAlignment sf ∧
+    -- The final state returns to the address in register `x30` in `s0`.
+    read_pc sf = r (StateField.GPR 30#5) s0 ∧
+    -- HTable is unmodified.
+    sf[read_gpr 64 1#5 s0, 256] = HTable ∧
+    -- Frame conditions.
+    -- Note that the following also covers that the Xi address in .GPR 0
+    -- is unmodified.
+    REGS_UNCHANGED_EXCEPT [.SFP 0, .SFP 1, .SFP 2, .SFP 3,
+                           .SFP 17, .SFP 18, .SFP 19, .SFP 20,
+                           .SFP 21, .PC]
+                          (sf, s0) ∧
+    -- Memory frame condition.
+    MEM_UNCHANGED_EXCEPT [(r (.GPR 0) s0, 128)] (sf, s0) := by
+  simp_all only [state_simp_rules, -h_run] -- prelude
   simp (config := {ground := true}) only at h_s0_pc
   -- ^^ Still needed, because `gcm_gmult_v8_program.min` is somehow
   --    unable to be reflected
   sym_n 27
+  -- Epilogue
+  simp only [←Memory.mem_eq_iff_read_mem_bytes_eq] at *
+  simp only [memory_rules] at *
+  sym_aggregate
+  -- Split conjunction
+  repeat' apply And.intro
+  · -- Aggregate the memory (non)effects.
+    -- (FIXME) This will be tackled by `sym_aggregate` when `sym_n` and `simp_mem`
+    -- are merged.
+    simp only [*]
+    /-
+    (FIXME @bollu) `simp_mem; rfl` creates a malformed proof here. The tactic produces
+    no goals, but we get the following error message:
+
+    application type mismatch
+    Memory.read_bytes_eq_extractLsBytes_sub_of_mem_subset'
+      (Eq.mp (congrArg (Eq HTable) (Memory.State.read_mem_bytes_eq_mem_read_bytes s0))
+        (Eq.mp (congrArg (fun x => HTable = read_mem_bytes 256 x s0) zeroExtend_eq_of_r_gpr) h_HTable))
+    argument has type
+      HTable = Memory.read_bytes 256 (r (StateField.GPR 1#5) s0) s0.mem
+    but function has type
+      Memory.read_bytes 256 (r (StateField.GPR 1#5) s0) s0.mem = HTable →
+      mem_subset' (r (StateField.GPR 1#5) s0) 256 (r (StateField.GPR 1#5) s0) 256 →
+        Memory.read_bytes 256 (r (StateField.GPR 1#5) s0) s0.mem =
+          HTable.extractLsBytes (BitVec.toNat (r (StateField.GPR 1#5) s0) - BitVec.toNat (r (StateField.GPR 1#5) s0)) 256
+
+    simp_mem; rfl
+    -/
+    rw [Memory.read_bytes_write_bytes_eq_read_bytes_of_mem_separate']
+    simp_mem
+  · simp only [List.mem_cons, List.mem_singleton, not_or, and_imp]
+    sym_aggregate
+  · intro n addr h_separate
+    simp_mem (config := { useOmegaToClose := false })
+    -- Aggregate the memory (non)effects.
+    simp only [*]
   done
diff --git a/Proofs/SHA512/SHA512Loop.lean b/Proofs/SHA512/SHA512Loop.lean
index 75027137..2a9f715d 100644
--- a/Proofs/SHA512/SHA512Loop.lean
+++ b/Proofs/SHA512/SHA512Loop.lean
@@ -45,11 +45,12 @@ def loop_post (PC N SP CtxBase InputBase : BitVec 64)
   ctx_addr si = CtxBase ∧
   stack_ptr si = SP - 16#64 ∧
   si[KtblAddr, (SHA2.k_512.length * 8)] = BitVec.flatten SHA2.k_512 ∧
-  Memory.Region.pairwiseSeparate
-                  [(SP - 16#64,   16),
-                   (CtxBase,     64),
-                   (InputBase,    (N.toNat * 128)),
-                   (KtblAddr,    (SHA2.k_512.length * 8))] ∧
+  -- (TODO @alex @bollu Uncomment, please, for stress-testing)
+--  Memory.Region.pairwiseSeparate 
+--                  [(SP - 16#64,   16),
+--                   (CtxBase,     64),
+--                   (InputBase,    (N.toNat * 128)),
+--                   (KtblAddr,    (SHA2.k_512.length * 8))] ∧
   r (.GPR 3#5) si = KtblAddr ∧
   input_addr si = InputBase + (N * 128#64) ∧
   -- Registers contain the last processed input block.
@@ -75,13 +76,13 @@ set_option debug.skipKernelTC true in
 -- set_option profiler true in
 -- set_option profiler.threshold 1 in
 set_option maxHeartbeats 0 in
-set_option maxRecDepth 8000 in
+-- set_option maxRecDepth 8000 in
 theorem sha512_block_armv8_loop_1block (si sf : ArmState)
   (h_N : N = 1#64)
   (h_si_prelude : SHA512.prelude 0x126500#64 N SP CtxBase InputBase si)
   -- TODO: Ideally, nsteps ought to be 485 to be able to simulate the loop to
   -- completion.
-  (h_steps : nsteps = 200)
+  (h_steps : nsteps = 400)
   (h_run : sf = run nsteps si) :
   -- (FIXME) PC should be 0x126c94#64 i.e., we are poised to execute the first
   -- instruction following the loop. For now, we stop early on to remain in sync.
@@ -94,12 +95,19 @@ theorem sha512_block_armv8_loop_1block (si sf : ArmState)
           h_si_input_base, h_si_ctx, h_si_ktbl, h_si_separate⟩ := h_si_prelude
   simp only [num_blocks, ctx_addr, stack_ptr, input_addr] at *
   simp only [loop_post]
+  simp at h_si_separate
   -- Symbolic Simulation
   /-
   TODO @alex: The following aggregation fails with
   "simp failed, maximum number of steps exceeded"
   -/
-  -- sym_n 200
+  sym_n 100
+  sym_n 100
+  sym_n 100
+  sym_n 100
+  -- sym_aggregate
+
+
   -- Epilogue
   -- cse (config := { processHyps := .allHyps })
   -- simp (config := {ground := true}) only