From 6520069c446709896041196a764dec9206f1d833 Mon Sep 17 00:00:00 2001
From: Roger Barton <rbarton@student.ethz.ch>
Date: Fri, 19 Jan 2024 20:26:47 +0100
Subject: [PATCH] roi spec, jupyter verify gemm with bench iters

---
 .gitignore             |   1 +
 target/sim/bench.ipynb | 321 ++++++++++++-----------------------------
 target/sim/spec.json   |   9 +-
 3 files changed, 93 insertions(+), 238 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5de7b8d20..fd154ee18 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@ gmon.out
 site/
 work.lib++/
 target/sim/gemm_results.csv
+logs*/
\ No newline at end of file
diff --git a/target/sim/bench.ipynb b/target/sim/bench.ipynb
index da74d8cd0..676ab47f0 100644
--- a/target/sim/bench.ipynb
+++ b/target/sim/bench.ipynb
@@ -12,11 +12,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "c018cdf4-4eac-434f-ab48-c86c9ee61541",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "!python -m pip install pandas plotly pyyaml"
@@ -26,11 +22,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "263c1118-51c5-4a82-9226-b01df47c40a7",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd, numpy as np\n",
@@ -103,11 +95,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "aff8e0ac-8b91-4ac1-b9f1-5da08ff143e1",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Compile hardware for Questa (vsim)\n",
@@ -118,11 +106,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "16e62def-f903-455e-994a-6661c8b8895d",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Compile software\n",
@@ -133,11 +117,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ef1ab542-924c-4c02-a507-6044c4b32169",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Post process traces\n",
@@ -149,11 +129,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3caceab4-f9cd-474a-9ce2-8688d05317d8",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Read profile data\n",
@@ -165,11 +141,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "d20fe0e3-3f04-4a05-ab11-3d016dc75e79",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Plot some results\n",
@@ -181,11 +153,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "28811c2e-63ee-4143-ad82-4d97420b9b68",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "!make CFG_OVERRIDE={cfg_file} rtl"
@@ -203,11 +171,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "96393e95-7390-4157-9314-af5155f46f22",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Load top-level benchmark config, where all sweep information is stored\n",
@@ -221,11 +185,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "a537b79f-0d5d-48d3-8662-bd6db2ef05ab",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# flatten into a table\n",
@@ -238,11 +198,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "f68f9aea-c801-4c1b-a1ab-2c70fc782aa8",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Evaluate expressions, any property ending in .eval is executed\n",
@@ -260,11 +216,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "cb40f6f7-4cc5-495d-925b-86c41d31df15",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Explode sweep arrays to get all combinations to run\n",
@@ -282,11 +234,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "24765e2c-d71b-4af5-a6b0-d0ef2de1377a",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   },
@@ -294,11 +242,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "bac025c0-3270-4823-8204-418e31e22652",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "def compile_hw(config: str):\n",
@@ -327,11 +271,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "09539ef4-0b02-4253-a095-26611601820a",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Setup output directory \n",
@@ -376,11 +316,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3213017b-a0ff-4e47-80f4-fa029cda2a4e",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "configs.groupby(by='hw.config').get_group('full')"
@@ -390,11 +326,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3a08c9b6-82de-4e36-a4bc-90fca987b151",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   },
@@ -402,11 +334,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "813a82e1-5683-497a-8603-545cb55baada",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   },
@@ -414,11 +342,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "46335c5f-0802-4541-8ebe-5f3ef2de12e4",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   },
@@ -434,11 +358,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "002be39d-43cf-4b35-b939-b3e66f7965fb",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "args = {}\n",
@@ -452,13 +372,21 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "62cd158c-0494-444b-bc86-de08c0681e6e",
+   "id": "ad1f5e4e-9490-4bc5-a6e6-2343dcd33d08",
    "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
+    "scrolled": true
    },
    "outputs": [],
+   "source": [
+    "!make CFG_OVERRIDE={args['hw_config']} bin/occamy_top.vsim"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62cd158c-0494-444b-bc86-de08c0681e6e",
+   "metadata": {},
+   "outputs": [],
    "source": [
     "import sys\n",
     "import os\n",
@@ -478,11 +406,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "347b27bc-bba7-41d5-9e8a-bb7e57a44d88",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "!make CFG_OVERRIDE={args['hw_config']} DEBUG=ON sw"
@@ -493,10 +417,7 @@
    "execution_count": null,
    "id": "91c3a858-8e0c-448b-8ecf-fa8583bb1317",
    "metadata": {
-    "scrolled": true,
-    "vscode": {
-     "languageId": "python"
-    }
+    "scrolled": true
    },
    "outputs": [],
    "source": [
@@ -513,11 +434,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "ab065250-6faf-41ff-9a69-1633e6abc948",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Extract input operands from ELF file\n",
@@ -525,8 +442,10 @@
     "    elf = Elf(args['symbols_bin'])\n",
     "else:\n",
     "    elf = Elf(args['snitch_bin'])\n",
+    "    \n",
+    "bench_iters = bytes_to_uint32s(elf.get_symbol_contents('bench_iters'))[0]\n",
     "alpha = 1\n",
-    "beta = bytes_to_uint32s(elf.get_symbol_contents('BETA'))[0]\n",
+    "beta = bytes_to_doubles(elf.get_symbol_contents('BETA'))[0]\n",
     "m = bytes_to_uint32s(elf.get_symbol_contents('M'))[0]\n",
     "n = bytes_to_uint32s(elf.get_symbol_contents('N'))[0]\n",
     "k = bytes_to_uint32s(elf.get_symbol_contents('K'))[0]\n",
@@ -553,14 +472,16 @@
     "    b = b.reshape((k, n))\n",
     "\n",
     "# Verify results\n",
-    "c_golden = golden_model(alpha, a, b, beta, c)\n",
+    "c_golden = c\n",
+    "for i in range(bench_iters):\n",
+    "    c_golden = golden_model(alpha, a, b, beta, c_golden)\n",
     "\n",
     "ERR_THRESHOLD = 0.001\n",
     "absolute_err = np.absolute(c_golden - c_actual)\n",
     "errors = np.count_nonzero(absolute_err > ERR_THRESHOLD)\n",
     "\n",
     "if (errors):\n",
-    "    print(f'Failed with {errors} errors.')\n",
+    "    print(f'Failed with {errors}/{m*n} errors.')\n",
     "    # verification.dump_results_to_csv([c_golden, c_actual, absolute_err],\n",
     "    #                                  Path.cwd() / 'gemm_results.csv')\n",
     "else:\n",
@@ -570,74 +491,72 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d181eb78-2dc7-4f0a-a0d4-6f67e70da99d",
+   "id": "39f5e03e-a92e-491a-b683-11f0f1efe335",
    "metadata": {
+    "editable": true,
     "scrolled": true,
-    "vscode": {
-     "languageId": "python"
-    }
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
    },
    "outputs": [],
    "source": [
-    "((absolute_err > ERR_THRESHOLD)*1)"
+    "!make -j ROI_SPEC=spec.json logs/trace.json"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3a5a2483-3454-4d5a-9a40-0a0e5984f932",
+   "id": "bfb3f426-7610-4901-9af6-d0487d173046",
    "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
+    "scrolled": true
    },
    "outputs": [],
    "source": [
-    "c_actual"
+    "!make -j annotate BINARY=sw/device/apps/blas/gemm/build/gemm.elf"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3d9e8972-1c93-4715-8996-89b04bdb664d",
+   "id": "d181eb78-2dc7-4f0a-a0d4-6f67e70da99d",
    "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
+    "scrolled": true
    },
    "outputs": [],
    "source": [
-    "result"
+    "((absolute_err > ERR_THRESHOLD)*1)[16:32,16:32]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bfb3f426-7610-4901-9af6-d0487d173046",
-   "metadata": {
-    "scrolled": true,
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "id": "3a5a2483-3454-4d5a-9a40-0a0e5984f932",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "!make -j annotate BINARY=sw/device/apps/blas/gemm/build/gemm.elf"
+    "c_actual"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "39f5e03e-a92e-491a-b683-11f0f1efe335",
-   "metadata": {
-    "scrolled": true,
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "id": "3d9e8972-1c93-4715-8996-89b04bdb664d",
+   "metadata": {},
    "outputs": [],
    "source": [
-    "!make -j ROI_SPEC=spec.json logs/trace.json"
+    "result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "87302cbd-c7c7-43fa-9103-00dc1bbaf687",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "c_golden"
    ]
   },
   {
@@ -653,11 +572,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "c70c0672-e284-4239-bfaa-ff9d33d046c5",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Works for PI == PJ\n",
@@ -672,11 +587,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "23383d54-a8f8-4052-8485-8e4685f78178",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "pi = (p / PJ).astype(int)\n",
@@ -687,11 +598,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "b13d577c-38dc-4822-b8ec-9e3456810ebd",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "pj = (p % PJ).astype(int)\n",
@@ -702,11 +609,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "dd6da9ba-0a27-43f8-89b0-78ab50e247c1",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "pk = (2*PJ - pi - pj -1) % PJ  # Or if k flipped: (PJ -pi + pj) % PJ\n",
@@ -717,11 +620,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "fa8d23d0-bcd7-457c-ad8d-9bfb48fd1a5a",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "srca = pi * PJ + ((2*PJ - pi - pk) % PJ)\n",
@@ -732,11 +631,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "1783c7cb-f71f-4fbc-982e-a9ca6d5b839e",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "srcb = pj + PJ * ((2*PJ - pj - pk) % PJ)\n",
@@ -747,11 +642,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "9b2c7dfb-718a-49ae-a737-bf135b24169b",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "sa = pi * PJ + ((PJ - pi - pj) % PJ)\n",
@@ -762,11 +653,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "777ceb79-d417-4995-a5d3-f0fd2af26573",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "sb = pi + PJ *((PJ - pi + pj) % PJ)\n",
@@ -777,11 +664,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "be14971f-8d22-43d6-b154-9d05af168f40",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "PJ * ((PJ - pi + pj) % PJ)"
@@ -791,11 +674,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "50c615ee-810f-40c7-a1ff-830b380ef1a7",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "dram = ((p+1) % PJ == 0)\n",
@@ -806,11 +685,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "e8a89535-a92a-4f41-893f-a51f29244735",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "sa = sa * (1-dram) + dram * -1\n",
@@ -821,11 +696,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "7895d159-de0e-4d47-9e38-c03bdd42d2d4",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "sb = sb * (1-dram) + dram * -1\n",
@@ -836,11 +707,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "dd23dac9-1184-47b6-93e8-5ce0e453f365",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "pipeStep = (PJ - p - 1) % PJ\n",
@@ -851,11 +718,7 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "8c098e3b-84b5-4f70-aa81-bf2c1df02f08",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   },
@@ -863,20 +726,16 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "22443e7d-45b1-41fd-9f7c-88b85f3949fe",
-   "metadata": {
-    "vscode": {
-     "languageId": "python"
-    }
-   },
+   "metadata": {},
    "outputs": [],
    "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Octave",
-   "language": "octave",
-   "name": "octave"
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -885,7 +744,7 @@
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
-   "name": "octave",
+   "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.9.12"
diff --git a/target/sim/spec.json b/target/sim/spec.json
index b0f5ae619..3098f0cfb 100644
--- a/target/sim/spec.json
+++ b/target/sim/spec.json
@@ -4,18 +4,13 @@
     {
         "thread": "${f'hart_{9*i+j +1}'}",
         "roi": [
-            {"idx": 1, "label": "compute"}
+            {"idx": 1, "label": "${f'compute_hart_{9*i+j +1}'}"}
         ]
     },
 % endfor
     {
         "thread": "${f'dma_{9*(i+1)}'}",
-        "roi": [
-            {"idx": 0, "label": "dma_in"},
-% for dma_i in range(0, 8):
-            {"idx": ${dma_i + 12}, "label": "dma_tile"},
-% endfor
-        ]
+        "roi": "*"
     },
 % endfor
 ]