From 303d319f8b99e0328239547b22975339970d0756 Mon Sep 17 00:00:00 2001
From: Roger Barton <rbarton@student.ethz.ch>
Date: Thu, 15 Feb 2024 19:20:08 +0100
Subject: [PATCH] py bench, copy params, load perf data into df, plots

---
 target/sim/bench.ipynb | 217 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 206 insertions(+), 11 deletions(-)

diff --git a/target/sim/bench.ipynb b/target/sim/bench.ipynb
index 676ab47f0..3b41a6a8b 100644
--- a/target/sim/bench.ipynb
+++ b/target/sim/bench.ipynb
@@ -366,7 +366,20 @@
     "args['snitch_bin']  = \"sw/host/apps/offload/build/offload-gemm.elf\"\n",
     "args['symbols_bin'] = \"sw/device/apps/blas/gemm/build/gemm.elf\"\n",
     "args['log']         = None\n",
-    "args['hw_config']   = 'cfg/1Q4C.hjson' # 'cfg/1Q2C.hjson'"
+    "args['hw_config']   = 'cfg/1Q1C.hjson' # 'cfg/1Q2C.hjson'\n",
+    "run_id = 0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "39071006-ab07-48e4-9845-ca34ec6bd10b",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "!make clean && make rtl"
    ]
   },
   {
@@ -378,7 +391,7 @@
    },
    "outputs": [],
    "source": [
-    "!make CFG_OVERRIDE={args['hw_config']} bin/occamy_top.vsim"
+    "!make -j8 CFG_OVERRIDE={args['hw_config']} DEBUG=OFF bin/occamy_top.vsim"
    ]
   },
   {
@@ -406,10 +419,12 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "347b27bc-bba7-41d5-9e8a-bb7e57a44d88",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
-    "!make CFG_OVERRIDE={args['hw_config']} DEBUG=ON sw"
+    "!make -j8 CFG_OVERRIDE={args['hw_config']} DEBUG=ON sw"
    ]
   },
   {
@@ -423,6 +438,8 @@
    "source": [
     "%%time\n",
     "# Run simulation and get outputs\n",
+    "!mkdir -p logs/\n",
+    "!cp ../../working_dir/snitch_cluster/sw/blas/gemm/data/params.hjson logs/params.hjson\n",
     "raw_results = verification.simulate(sim_bin=args['sim_bin'],\n",
     "                                    snitch_bin=args['snitch_bin'],\n",
     "                                    symbols_bin=args['symbols_bin'],\n",
@@ -481,7 +498,8 @@
     "errors = np.count_nonzero(absolute_err > ERR_THRESHOLD)\n",
     "\n",
     "if (errors):\n",
-    "    print(f'Failed with {errors}/{m*n} errors.')\n",
+    "    print(f'Failed with {errors}/{m*n} errors, for dim {m} x {n}.')\n",
+    "    print(((absolute_err > ERR_THRESHOLD)*1))#[16:32,16:32])\n",
     "    # verification.dump_results_to_csv([c_golden, c_actual, absolute_err],\n",
     "    #                                  Path.cwd() / 'gemm_results.csv')\n",
     "else:\n",
@@ -502,7 +520,8 @@
    },
    "outputs": [],
    "source": [
-    "!make -j ROI_SPEC=spec.json logs/trace.json"
+    "%%time\n",
+    "!make -j8 ROI_SPEC=spec.json BINARY=sw/device/apps/blas/gemm/build/gemm.elf logs/trace.json"
    ]
   },
   {
@@ -514,7 +533,19 @@
    },
    "outputs": [],
    "source": [
-    "!make -j annotate BINARY=sw/device/apps/blas/gemm/build/gemm.elf"
+    "%%time\n",
+    "!make -j8 annotate BINARY=sw/device/apps/blas/gemm/build/gemm.elf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cc717c4e-9ba0-48f5-8afa-b394b594df18",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!mv logs/ logs_{run_id}\n",
+    "run_id += 1"
    ]
   },
   {
@@ -526,14 +557,16 @@
    },
    "outputs": [],
    "source": [
-    "((absolute_err > ERR_THRESHOLD)*1)[16:32,16:32]"
+    "((absolute_err > ERR_THRESHOLD)*1)#[16:32,16:32]"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "3a5a2483-3454-4d5a-9a40-0a0e5984f932",
-   "metadata": {},
+   "id": "076768ca-29cf-4c6c-9465-85603a2b6341",
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "c_actual"
@@ -543,7 +576,9 @@
    "cell_type": "code",
    "execution_count": null,
    "id": "3d9e8972-1c93-4715-8996-89b04bdb664d",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "result"
@@ -559,6 +594,166 @@
     "c_golden"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9e6b31ec-75f9-43f1-ad39-ab5a29550e06",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "!$PYTHON /scratch/msc23h17/occamy/working_dir/snitch_cluster/util/trace/annotate.py -q -o sw/device/apps/blas/gemm/build/gemm.dumpa sw/device/apps/blas/gemm/build/gemm.elf sw/device/apps/blas/gemm/build/gemm.dump --is-objdump"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4098cbae-7807-47db-a6f9-c28c05770466",
+   "metadata": {},
+   "source": [
+    "# Post-processing Performance Metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6186458e-484c-495a-8b5a-1bd5883667ac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import hjson\n",
+    "from pathlib import Path\n",
+    "dfs = []\n",
+    "for i in range(4):\n",
+    "    with Path(f'logs_{i}/params.hjson').open() as f:\n",
+    "        param = hjson.loads(f.read())\n",
+    "        df1 = pd.json_normalize(param)\n",
+    "        df1.index = df1.index + i\n",
+    "        dfs.append(df1)\n",
+    "runs = pd.concat(dfs)\n",
+    "runs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "481d6636-026f-4111-88e6-b86773d1aa03",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import plotly.express as px"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "89fc9b64-ee5b-4085-b88f-bf946da738fd",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "start_region = 3\n",
+    "end_region = -2\n",
+    "\n",
+    "dfs = []\n",
+    "for run_id in range(4):\n",
+    "    for i in range(9):\n",
+    "        hart_type = 'dma' if i % 9 == 8 else 'fpu'\n",
+    "        p = [i % 9, int(i / 9), 0]\n",
+    "        P = [8, 4, 1]\n",
+    "        PI = PJ = 2\n",
+    "        pi = int(p[1] / PJ)\n",
+    "        pj = p[1] % PJ\n",
+    "        df1 = pd.read_json(f'logs_{run_id}/hart_%0.5x_perf.json' % (i+1))[start_region:end_region]\n",
+    "        df1.index = df1.index - start_region\n",
+    "        df1['p0'] = p[0]\n",
+    "        df1['p1'] = p[1]\n",
+    "        df1['pi'] = pi\n",
+    "        df1['pj'] = pj\n",
+    "        df1['hart'] = int(i+1)\n",
+    "        df1['pk'] = int((PI + int(2 * PJ) - pi - pj - 1)) % PJ\n",
+    "        df1['hart_type'] = hart_type\n",
+    "        df1['tileid'] = (df1.index / 3).astype(int)\n",
+    "        df1['ij'] = (df1.index / 3 / 256 * 32).astype(int)\n",
+    "        # g = df1.groupby('tileid')\n",
+    "        # gc = df1[df1['fpss_fpu_occupancy'] > 0.1].groupby('tileid').first()\n",
+    "        # df1['kernel_occupancy'] = gc['cycles'] / g.sum()['cycles']\n",
+    "        df1['run_id'] = run_id\n",
+    "        dfs.append(df1)\n",
+    "perf = pd.concat(dfs)\n",
+    "perf.reset_index(inplace=True, names=\"region_id\")\n",
+    "perf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "921167a3-4e03-4911-8ad9-bb010ebf3df9",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "perf['region'] = 'none'\n",
+    "perf.loc[perf['fpss_fpu_occupancy'] > 0.1, 'region'] = 'compute'\n",
+    "# perf.loc[(df['cycles'] < 1000) & (df['fpss_occupancy'] == 0), 'region'] = 'sync'\n",
+    "# perf.loc[(df['cycles'] < 1000) & (df['fpss_occupancy'] > 0), 'region'] = 'indexing'\n",
+    "perf = perf.merge(runs, left_on='run_id', right_index=True)\n",
+    "perf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05415f8f-b48f-4e4c-a783-35a0e4575ca0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# px.violin(perf[(perf['hart_type'] == 'fpu') & (perf['region_id'] == 2)], 'gemmInfo.K', 'fpss_fpu_occupancy', violinmode='overlay')\n",
+    "px.bar(perf[(perf['hart_type'] == 'fpu')], 'gemmInfo.K', 'fpss_fpu_occupancy', color='region_id')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bcdaca7f-50f4-4120-baae-dce2714764a8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "px.violin(perf[(perf['hart_type'] == 'fpu') & (perf['region'] == 'compute')], 'cycles', color='pk', violinmode='overlay')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f23374c3-d7f9-4135-bf5c-d74385f181c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "px.violin(perf[(perf['hart_type'] == 'fpu') & (perf['region'] == 'compute')], 'kernel_occupancy', color='pk', violinmode='overlay')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4cb21865-ebc7-474d-aeea-d73d337600f5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "px.scatter(perf[(perf['hart_type'] == 'fpu') & (perf['region'] == 'compute')], 'cycles', 'tileid', color='ij')#, violinmode='overlay')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c3afc48a-1b74-4150-88f4-4d464aa882cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "px.violin(perf[(perf['cycles'] < 1000) & (perf['fpss_occupancy'] == 0)], 'cycles')"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "925ac4a8-de21-4203-b52a-185aaa3c212c",