Skip to content

Commit

Permalink
fix neureka bandwidth in 3x3 mode
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescoConti committed Apr 18, 2024
1 parent 1e19f46 commit 67113b3
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 9 deletions.
8 changes: 5 additions & 3 deletions neureka/hal/neureka_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,16 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in,
.d2 = h_out_stride};
task->data.cfg.output_stride = output_stride;

task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES;
if (task->kernel_shape == 1) { // 1x1
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1;
task->data.cfg.weights_stride.d1 =
NEUREKA_WEIGHT_BANDWIDTH_BYTES * num_k_in;
NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 * num_k_in;
} else if (!task->depthwise) { // 3x3
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3;
task->data.cfg.weights_stride.d1 =
NEUREKA_WEIGHT_BANDWIDTH_BYTES * task->qw * num_k_in;
NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 * task->qw * num_k_in;
} else { // 3x3 depthwise
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3;
task->data.cfg.weights_stride.d1 = 0;
}
task->data.cfg.weights_stride.d2 = 0;
Expand Down
13 changes: 8 additions & 5 deletions neureka/hal/neureka_task_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@

/* ARCHITECTURE */

#define NNX_NEUREKA_PE_H (6)
#define NNX_NEUREKA_PE_W (6)
#define NNX_NEUREKA_PE_H (4)
#define NNX_NEUREKA_PE_W (4)
#define NNX_NEUREKA_BANDWIDTH_1x1 (256)
#define NNX_NEUREKA_BANDWIDTH_3x3 (288)

#define NEUREKA_SUBTILE_INPUT_HEIGHT_1x1 (NNX_NEUREKA_PE_H)
#define NEUREKA_SUBTILE_INPUT_WIDTH_1x1 (NNX_NEUREKA_PE_W)
Expand All @@ -34,12 +36,13 @@
#define NEUREKA_SUBTILE_INPUT_WIDTH_3x3 (NNX_NEUREKA_PE_W+2)
#define NEUREKA_SUBTILE_INPUT_CHANNEL_3x3 (32)

#define NEUREKA_SUBTILE_OUTPUT_HEIGHT (4)
#define NEUREKA_SUBTILE_OUTPUT_WIDTH (4)
#define NEUREKA_SUBTILE_OUTPUT_HEIGHT (NNX_NEUREKA_PE_H)
#define NEUREKA_SUBTILE_OUTPUT_WIDTH (NNX_NEUREKA_PE_W)
#define NEUREKA_SUBTILE_OUTPUT_CHANNEL (32)

#define NEUREKA_OUTPUT_BANDWIDTH_BYTES (32)
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES (32)
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 (NNX_NEUREKA_BANDWIDTH_1x1/8)
#define NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3 (NNX_NEUREKA_BANDWIDTH_3x3/8)

/* TASK REGISTERS */

Expand Down
1 change: 0 additions & 1 deletion test/NeurekaMemoryLayout.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def weightEncode(
weight = weight.reshape(-1, height * width * cinSubtile)
# Pad only the last dimension to weight bandwidth size
# (-1, Weight Bandwidth)
print("DEBUG", weight.shape)
weight = np.pad(
weight,
((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH_3x3 - weight.shape[-1])),
Expand Down

0 comments on commit 67113b3

Please sign in to comment.