From fc45cda023a630f7e15c6b98de478361436e7d9a Mon Sep 17 00:00:00 2001 From: Roger Barton Date: Tue, 16 Jan 2024 11:32:54 +0100 Subject: [PATCH] py gemm 2D pipeline index calculations --- target/sim/bench.ipynb | 198 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 189 insertions(+), 9 deletions(-) diff --git a/target/sim/bench.ipynb b/target/sim/bench.ipynb index eb08171a4..008f3a743 100644 --- a/target/sim/bench.ipynb +++ b/target/sim/bench.ipynb @@ -365,7 +365,8 @@ "args['sim_bin'] = \"bin/occamy_top.vsim\"\n", "args['snitch_bin'] = \"sw/host/apps/offload/build/offload-gemm.elf\"\n", "args['symbols_bin'] = \"sw/device/apps/blas/gemm/build/gemm.elf\"\n", - "args['log'] = None" + "args['log'] = None\n", + "args['hw_config'] = 'cfg/1Q4C.hjson' # 'cfg/1Q2C.hjson'" ] }, { @@ -396,7 +397,7 @@ "metadata": {}, "outputs": [], "source": [ - "!make CFG_OVERRIDE=cfg/1Q2C.hjson DEBUG=ON sw" + "!make CFG_OVERRIDE={args['hw_config']} DEBUG=ON sw" ] }, { @@ -429,7 +430,7 @@ " elf = Elf(args['symbols_bin'])\n", "else:\n", " elf = Elf(args['snitch_bin'])\n", - "alpha = bytes_to_uint32s(elf.get_symbol_contents('ALPHA'))[0]\n", + "beta = bytes_to_uint32s(elf.get_symbol_contents('BETA'))[0]\n", "m = bytes_to_uint32s(elf.get_symbol_contents('M'))[0]\n", "n = bytes_to_uint32s(elf.get_symbol_contents('N'))[0]\n", "k = bytes_to_uint32s(elf.get_symbol_contents('K'))[0]\n", @@ -456,7 +457,7 @@ " b = b.reshape((k, n))\n", "\n", "# Verify results\n", - "c_golden = golden_model(a, b, alpha, c)\n", + "c_golden = golden_model(a, b, beta, c)\n", "\n", "ERR_THRESHOLD = 0.001\n", "absolute_err = np.absolute(c_golden - c_actual)\n", @@ -467,17 +468,19 @@ " # verification.dump_results_to_csv([c_golden, c_actual, absolute_err],\n", " # Path.cwd() / 'gemm_results.csv')\n", "else:\n", - " print('SUCCESS. No actual C matches result.')" + " print(f'SUCCESS. Actual C matches result for dim {m} x {n}.')" ] }, { "cell_type": "code", "execution_count": null, "id": "d181eb78-2dc7-4f0a-a0d4-6f67e70da99d", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "((absolute_err > ERR_THRESHOLD)*1).reshape((m,n))" + "((absolute_err > ERR_THRESHOLD)*1)" ] }, { @@ -493,15 +496,192 @@ { "cell_type": "code", "execution_count": null, - "id": "21675e83-8f1c-481d-91bb-98cd1a5957eb", + "id": "3d9e8972-1c93-4715-8996-89b04bdb664d", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "result" + ] }, { "cell_type": "code", "execution_count": null, "id": "bfb3f426-7610-4901-9af6-d0487d173046", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "!make -j annotate BINARY=sw/device/apps/blas/gemm/build/gemm.elf" + ] + }, + { + "cell_type": "markdown", + "id": "925ac4a8-de21-4203-b52a-185aaa3c212c", + "metadata": {}, + "source": [ + "# 2D Pipeline Indexing\n", + "Index calculations for determining the source cluster for C2C dma." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c70c0672-e284-4239-bfaa-ff9d33d046c5", + "metadata": {}, + "outputs": [], + "source": [ + "# Works for PI == PJ\n", + "PI = 3\n", + "PJ = 3\n", + "P = PI * PJ\n", + "p = np.linspace(0, P -1, num=P, dtype=int).reshape((PI, PJ))\n", + "p" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23383d54-a8f8-4052-8485-8e4685f78178", + "metadata": {}, + "outputs": [], + "source": [ + "pi = (p / PJ).astype(int)\n", + "pi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b13d577c-38dc-4822-b8ec-9e3456810ebd", + "metadata": {}, + "outputs": [], + "source": [ + "pj = (p % PJ).astype(int)\n", + "pj" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd6da9ba-0a27-43f8-89b0-78ab50e247c1", + "metadata": {}, + "outputs": [], + "source": [ + "pk = (2*PJ - pi - pj -1) % PJ # Or if k flipped: (PJ -pi + pj) % PJ\n", + "pk" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa8d23d0-bcd7-457c-ad8d-9bfb48fd1a5a", + "metadata": {}, + "outputs": [], + "source": [ + "srca = pi * PJ + ((2*PJ - pi - pk) % PJ)\n", + "srca" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1783c7cb-f71f-4fbc-982e-a9ca6d5b839e", + "metadata": {}, + "outputs": [], + "source": [ + "srcb = pj + PJ * ((2*PJ - pj - pk) % PJ)\n", + "srcb" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b2c7dfb-718a-49ae-a737-bf135b24169b", + "metadata": {}, + "outputs": [], + "source": [ + "sa = pi * PJ + ((PJ - pi - pj) % PJ)\n", + "sa" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "777ceb79-d417-4995-a5d3-f0fd2af26573", + "metadata": {}, + "outputs": [], + "source": [ + "sb = pi + PJ *((PJ - pi + pj) % PJ)\n", + "sb" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be14971f-8d22-43d6-b154-9d05af168f40", + "metadata": {}, + "outputs": [], + "source": [ + "PJ * ((PJ - pi + pj) % PJ)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "50c615ee-810f-40c7-a1ff-830b380ef1a7", + "metadata": {}, + "outputs": [], + "source": [ + "dram = ((p+1) % PJ == 0)\n", + "dram" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8a89535-a92a-4f41-893f-a51f29244735", + "metadata": {}, + "outputs": [], + "source": [ + "sa = sa * (1-dram) + dram * -1\n", + "sa" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7895d159-de0e-4d47-9e38-c03bdd42d2d4", + "metadata": {}, + "outputs": [], + "source": [ + "sb = sb * (1-dram) + dram * -1\n", + "sb" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dd23dac9-1184-47b6-93e8-5ce0e453f365", + "metadata": {}, + "outputs": [], + "source": [ + "pipeStep = (PJ - p - 1) % PJ\n", + "pipeStep" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c098e3b-84b5-4f70-aa81-bf2c1df02f08", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22443e7d-45b1-41fd-9f7c-88b85f3949fe", "metadata": {}, "outputs": [], "source": []