From 973cd49c56f898027b02eaf13663fdff79855c84 Mon Sep 17 00:00:00 2001 From: Francesco Conti Date: Wed, 21 Aug 2024 11:49:03 +0200 Subject: [PATCH] Tentative fix for QW<8 bit This fixes layout + runtime for QW<8 bit. Tested only on pointwise and only on the special scenario of synthetic weights, for now. --- neureka/hal/neureka_task.c | 2 +- test/NeurekaMemoryLayout.py | 11 ++--------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/neureka/hal/neureka_task.c b/neureka/hal/neureka_task.c index d31c934..d9209f5 100644 --- a/neureka/hal/neureka_task.c +++ b/neureka/hal/neureka_task.c @@ -169,7 +169,7 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in, if (task->kernel_shape == 1) { // 1x1 task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1; task->data.cfg.weights_stride.d1 = - NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 * num_k_in; + (NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 / 8) * task->qw * num_k_in; } else if (!task->depthwise) { // 3x3 task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3; task->data.cfg.weights_stride.d1 = diff --git a/test/NeurekaMemoryLayout.py b/test/NeurekaMemoryLayout.py index 61b3ad8..ca51d4a 100644 --- a/test/NeurekaMemoryLayout.py +++ b/test/NeurekaMemoryLayout.py @@ -88,15 +88,8 @@ def weightEncode( elif height == 1 and width == 1: # (cout * cinMajor, Bits * cinSubtile) weight = weight.reshape(-1, bits * cinSubtile) - # Pad only the last dimension to weight bandwidth size - # (-1, Weight Bandwidth) - weight = np.pad( - weight, - ((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH_1x1 - weight.shape[-1])), - "constant", - constant_values=0, - ) - weightBandwidthBytes = int(np.ceil(NeurekaMemoryLayout._WEIGHT_BANDWIDTH_1x1 / 8)) + # No padding needed here + weightBandwidthBytes = int(np.ceil(bits * cinSubtile / 8)) # Prepare for packing # (-1, Weight Bandwidth Bytes, 8)