From d9c7723a7a77485ad9f918b1f22d6ede2020c0c1 Mon Sep 17 00:00:00 2001 From: Luka Macan Date: Sat, 27 Jan 2024 16:28:58 +0100 Subject: [PATCH] Rename divnceil and remainder, and add nnx_ prefix --- ne16/hal/ne16_task.c | 39 +++++++++++++++++++++++--------------- neureka/hal/neureka_task.c | 39 +++++++++++++++++++++++--------------- src/pulp_nnx_ne16.c | 4 ++-- util/pulp_nnx_util.c | 12 +++++++----- util/pulp_nnx_util.h | 20 ++++++++++--------- 5 files changed, 68 insertions(+), 46 deletions(-) diff --git a/ne16/hal/ne16_task.c b/ne16/hal/ne16_task.c index 21518a7..5f856e4 100644 --- a/ne16/hal/ne16_task.c +++ b/ne16/hal/ne16_task.c @@ -133,7 +133,8 @@ void ne16_task_set_strides(ne16_task_t *task, const uint32_t k_in, const uint32_t w_in_stride, const uint32_t h_out_stride, const uint32_t w_out_stride) { - const uint32_t num_k_in = divnceil(k_in, NE16_SUBTILE_INPUT_CHANNEL); + const uint32_t num_k_in = + nnx_calculate_number_of_tiles(k_in, NE16_SUBTILE_INPUT_CHANNEL); const ne16_stride_t input_stride = { .d0 = w_in_stride, .d1 = h_in_stride, .d2 = 0}; @@ -166,26 +167,34 @@ void ne16_task_set_counters(ne16_task_t *task, const uint32_t k_in, const uint32_t h_out, const uint32_t w_out, const uint32_t k_out, const uint8_t padding_bottom, const uint8_t padding_right) { - const uint16_t num_Ko = divnceil(k_out, task->subtile_output_channel); - const uint16_t num_Ki = divnceil(k_in, NE16_SUBTILE_INPUT_CHANNEL); - const uint16_t num_Ho = divnceil(h_out, NE16_SUBTILE_OUTPUT_HEIGHT); - const uint16_t num_Wo = divnceil(w_out, NE16_SUBTILE_OUTPUT_WIDTH); - - const uint16_t rem_Ko = remainder(k_out, task->subtile_output_channel); - const uint16_t rem_Ki = remainder(k_in, NE16_SUBTILE_INPUT_CHANNEL); - const uint16_t rem_Ho = remainder(h_out, NE16_SUBTILE_OUTPUT_HEIGHT); - const uint16_t rem_Wo = remainder(w_out, NE16_SUBTILE_OUTPUT_WIDTH); + const uint16_t num_Ko = + nnx_calculate_number_of_tiles(k_out, task->subtile_output_channel); + const uint16_t num_Ki = + nnx_calculate_number_of_tiles(k_in, NE16_SUBTILE_INPUT_CHANNEL); + const uint16_t num_Ho = + nnx_calculate_number_of_tiles(h_out, NE16_SUBTILE_OUTPUT_HEIGHT); + const uint16_t num_Wo = + nnx_calculate_number_of_tiles(w_out, NE16_SUBTILE_OUTPUT_WIDTH); + + const uint16_t rem_Ko = + nnx_calculate_last_tile_size(k_out, task->subtile_output_channel); + const uint16_t rem_Ki = + nnx_calculate_last_tile_size(k_in, NE16_SUBTILE_INPUT_CHANNEL); + const uint16_t rem_Ho = + nnx_calculate_last_tile_size(h_out, NE16_SUBTILE_OUTPUT_HEIGHT); + const uint16_t rem_Wo = + nnx_calculate_last_tile_size(w_out, NE16_SUBTILE_OUTPUT_WIDTH); const uint16_t rem_Hi = (task->kernel_shape == 1 ? rem_Ho : rem_Ho + 2) - padding_bottom; const uint16_t rem_Wi = (task->kernel_shape == 1 ? rem_Wo : rem_Wo + 2) - padding_right; const ne16_subtile_t subtile = { - .number = {.KoKi = concat_half(num_Ko, num_Ki), - .HoWo = concat_half(num_Ho, num_Wo)}, - .remainder = {.KoKi = concat_half(rem_Ko, rem_Ki), - .HoWo = concat_half(rem_Ho, rem_Wo), - .HiWi = concat_half(rem_Hi, rem_Wi)}}; + .number = {.KoKi = nnx_concat_half(num_Ko, num_Ki), + .HoWo = nnx_concat_half(num_Ho, num_Wo)}, + .remainder = {.KoKi = nnx_concat_half(rem_Ko, rem_Ki), + .HoWo = nnx_concat_half(rem_Ho, rem_Wo), + .HiWi = nnx_concat_half(rem_Hi, rem_Wi)}}; task->data.cfg.subtile = subtile; } diff --git a/neureka/hal/neureka_task.c b/neureka/hal/neureka_task.c index 3527ac0..4541f9d 100644 --- a/neureka/hal/neureka_task.c +++ b/neureka/hal/neureka_task.c @@ -148,7 +148,8 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in, const uint32_t w_in_stride, const uint32_t h_out_stride, const uint32_t w_out_stride) { - const uint32_t num_k_in = divnceil(k_in, task->subtile_input_channel); + const uint32_t num_k_in = + nnx_calculate_number_of_tiles(k_in, task->subtile_input_channel); const neureka_stride_t input_stride = { .d0 = w_in_stride, .d1 = h_in_stride, .d2 = 0}; @@ -177,26 +178,34 @@ void neureka_task_set_counters(neureka_task_t *task, const uint32_t k_in, const uint32_t k_out, const uint8_t padding_bottom, const uint8_t padding_right) { - const uint16_t num_Ko = divnceil(k_out, task->subtile_output_channel); - const uint16_t num_Ki = divnceil(k_in, task->subtile_input_channel); - const uint16_t num_Ho = divnceil(h_out, NEUREKA_SUBTILE_OUTPUT_HEIGHT); - const uint16_t num_Wo = divnceil(w_out, NEUREKA_SUBTILE_OUTPUT_WIDTH); - - const uint16_t rem_Ko = remainder(k_out, task->subtile_output_channel); - const uint16_t rem_Ki = remainder(k_in, task->subtile_input_channel); - const uint16_t rem_Ho = remainder(h_out, NEUREKA_SUBTILE_OUTPUT_HEIGHT); - const uint16_t rem_Wo = remainder(w_out, NEUREKA_SUBTILE_OUTPUT_WIDTH); + const uint16_t num_Ko = + nnx_calculate_number_of_tiles(k_out, task->subtile_output_channel); + const uint16_t num_Ki = + nnx_calculate_number_of_tiles(k_in, task->subtile_input_channel); + const uint16_t num_Ho = + nnx_calculate_number_of_tiles(h_out, NEUREKA_SUBTILE_OUTPUT_HEIGHT); + const uint16_t num_Wo = + nnx_calculate_number_of_tiles(w_out, NEUREKA_SUBTILE_OUTPUT_WIDTH); + + const uint16_t rem_Ko = + nnx_calculate_last_tile_size(k_out, task->subtile_output_channel); + const uint16_t rem_Ki = + nnx_calculate_last_tile_size(k_in, task->subtile_input_channel); + const uint16_t rem_Ho = + nnx_calculate_last_tile_size(h_out, NEUREKA_SUBTILE_OUTPUT_HEIGHT); + const uint16_t rem_Wo = + nnx_calculate_last_tile_size(w_out, NEUREKA_SUBTILE_OUTPUT_WIDTH); const uint16_t rem_Hi = (task->kernel_shape == 1 ? rem_Ho : rem_Ho + 2) - padding_bottom; const uint16_t rem_Wi = (task->kernel_shape == 1 ? rem_Wo : rem_Wo + 2) - padding_right; const neureka_subtile_t subtile = { - .number = {.KoKi = concat_half(num_Ko, num_Ki), - .HoWo = concat_half(num_Ho, num_Wo)}, - .remainder = {.KoKi = concat_half(rem_Ko, rem_Ki), - .HoWo = concat_half(rem_Ho, rem_Wo), - .HiWi = concat_half(rem_Hi, rem_Wi)}}; + .number = {.KoKi = nnx_concat_half(num_Ko, num_Ki), + .HoWo = nnx_concat_half(num_Ho, num_Wo)}, + .remainder = {.KoKi = nnx_concat_half(rem_Ko, rem_Ki), + .HoWo = nnx_concat_half(rem_Ho, rem_Wo), + .HiWi = nnx_concat_half(rem_Hi, rem_Wi)}}; task->data.cfg.subtile = subtile; } diff --git a/src/pulp_nnx_ne16.c b/src/pulp_nnx_ne16.c index 99a2c9c..f9799fc 100644 --- a/src/pulp_nnx_ne16.c +++ b/src/pulp_nnx_ne16.c @@ -91,8 +91,8 @@ void ne16_nnx_dispatch_stride2x2(ne16_dev_t *dev, ne16_task_t *task, const uint8_t w_ker) { const uint8_t stride = 2; - const uint32_t n_h = divnceil(h_out, stride); - const uint32_t n_w = divnceil(w_out, stride); + const uint32_t n_h = nnx_calculate_number_of_tiles(h_out, stride); + const uint32_t n_w = nnx_calculate_number_of_tiles(w_out, stride); const uint32_t input_height_offset = h_out % stride == 1 ? stride : 0; const uint32_t input_width_offset = w_out % stride == 1 ? stride : 0; const uint32_t output_height_offset = h_out % stride == 1 ? 1 : 0; diff --git a/util/pulp_nnx_util.c b/util/pulp_nnx_util.c index 34db512..0107fc1 100644 --- a/util/pulp_nnx_util.c +++ b/util/pulp_nnx_util.c @@ -20,14 +20,16 @@ #include "pulp_nnx_util.h" -inline int divnceil(const int dividend, const int divisor) { - return ((dividend - 1) / divisor) + 1; +inline int nnx_calculate_number_of_tiles(const int dim_size, + const int tile_size) { + return ((dim_size - 1) / tile_size) + 1; } -inline int remainder(const int dividend, const int divisor) { - return ((dividend - 1) % divisor) + 1; +inline int nnx_calculate_last_tile_size(const int dim_size, + const int tile_size) { + return ((dim_size - 1) % tile_size) + 1; } -inline uint32_t concat_half(const uint16_t high, const uint16_t low) { +inline uint32_t nnx_concat_half(const uint16_t high, const uint16_t low) { return ((uint32_t)high << 16) | low; } diff --git a/util/pulp_nnx_util.h b/util/pulp_nnx_util.h index 638e5d9..d167f6d 100644 --- a/util/pulp_nnx_util.h +++ b/util/pulp_nnx_util.h @@ -24,26 +24,28 @@ #include /** - * divnceil + * nnx_calculate_number_of_iterations * - * Does integer division and ceiling of it. + * Calculates the number of iterations to go through a dimension. + * It does it by dividing the dimension with the tile size and doing a ceiling + * the result. */ -int divnceil(const int dividend, const int divisor); +int nnx_calculate_number_of_tiles(const int dim_size, const int tile_size); /** - * remainder + * nnx_calculate_last_tile_size * - * Calculates the remainder but if the remainder should be 0, - * returns divisor. Used for calculation of the last `remainding` - * iteration of the tile. + * Calculates the size of the last executed tile by calculating the remainder of + * the dim_size and the tile_size. In case the remainder is 0, it returns the + * full tile_size. */ -int remainder(const int dividend, const int divisor); +int nnx_calculate_last_tile_size(const int dim_size, const int tile_size); /** * concat_half * * Concatenate 2 16-bit numbers into a 32-bit number. */ -uint32_t concat_half(const uint16_t high, const uint16_t low); +uint32_t nnx_concat_half(const uint16_t high, const uint16_t low); #endif // __NNX_UTIL_H__