Skip to content

Commit

Permalink
Rename divnceil and remainder, and add nnx_ prefix
Browse files Browse the repository at this point in the history
  • Loading branch information
lukamac committed Jan 27, 2024
1 parent 29ee483 commit d9c7723
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 46 deletions.
39 changes: 24 additions & 15 deletions ne16/hal/ne16_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ void ne16_task_set_strides(ne16_task_t *task, const uint32_t k_in,
const uint32_t w_in_stride,
const uint32_t h_out_stride,
const uint32_t w_out_stride) {
const uint32_t num_k_in = divnceil(k_in, NE16_SUBTILE_INPUT_CHANNEL);
const uint32_t num_k_in =
nnx_calculate_number_of_tiles(k_in, NE16_SUBTILE_INPUT_CHANNEL);

const ne16_stride_t input_stride = {
.d0 = w_in_stride, .d1 = h_in_stride, .d2 = 0};
Expand Down Expand Up @@ -166,26 +167,34 @@ void ne16_task_set_counters(ne16_task_t *task, const uint32_t k_in,
const uint32_t h_out, const uint32_t w_out,
const uint32_t k_out, const uint8_t padding_bottom,
const uint8_t padding_right) {
const uint16_t num_Ko = divnceil(k_out, task->subtile_output_channel);
const uint16_t num_Ki = divnceil(k_in, NE16_SUBTILE_INPUT_CHANNEL);
const uint16_t num_Ho = divnceil(h_out, NE16_SUBTILE_OUTPUT_HEIGHT);
const uint16_t num_Wo = divnceil(w_out, NE16_SUBTILE_OUTPUT_WIDTH);

const uint16_t rem_Ko = remainder(k_out, task->subtile_output_channel);
const uint16_t rem_Ki = remainder(k_in, NE16_SUBTILE_INPUT_CHANNEL);
const uint16_t rem_Ho = remainder(h_out, NE16_SUBTILE_OUTPUT_HEIGHT);
const uint16_t rem_Wo = remainder(w_out, NE16_SUBTILE_OUTPUT_WIDTH);
const uint16_t num_Ko =
nnx_calculate_number_of_tiles(k_out, task->subtile_output_channel);
const uint16_t num_Ki =
nnx_calculate_number_of_tiles(k_in, NE16_SUBTILE_INPUT_CHANNEL);
const uint16_t num_Ho =
nnx_calculate_number_of_tiles(h_out, NE16_SUBTILE_OUTPUT_HEIGHT);
const uint16_t num_Wo =
nnx_calculate_number_of_tiles(w_out, NE16_SUBTILE_OUTPUT_WIDTH);

const uint16_t rem_Ko =
nnx_calculate_last_tile_size(k_out, task->subtile_output_channel);
const uint16_t rem_Ki =
nnx_calculate_last_tile_size(k_in, NE16_SUBTILE_INPUT_CHANNEL);
const uint16_t rem_Ho =
nnx_calculate_last_tile_size(h_out, NE16_SUBTILE_OUTPUT_HEIGHT);
const uint16_t rem_Wo =
nnx_calculate_last_tile_size(w_out, NE16_SUBTILE_OUTPUT_WIDTH);
const uint16_t rem_Hi =
(task->kernel_shape == 1 ? rem_Ho : rem_Ho + 2) - padding_bottom;
const uint16_t rem_Wi =
(task->kernel_shape == 1 ? rem_Wo : rem_Wo + 2) - padding_right;

const ne16_subtile_t subtile = {
.number = {.KoKi = concat_half(num_Ko, num_Ki),
.HoWo = concat_half(num_Ho, num_Wo)},
.remainder = {.KoKi = concat_half(rem_Ko, rem_Ki),
.HoWo = concat_half(rem_Ho, rem_Wo),
.HiWi = concat_half(rem_Hi, rem_Wi)}};
.number = {.KoKi = nnx_concat_half(num_Ko, num_Ki),
.HoWo = nnx_concat_half(num_Ho, num_Wo)},
.remainder = {.KoKi = nnx_concat_half(rem_Ko, rem_Ki),
.HoWo = nnx_concat_half(rem_Ho, rem_Wo),
.HiWi = nnx_concat_half(rem_Hi, rem_Wi)}};
task->data.cfg.subtile = subtile;
}

Expand Down
39 changes: 24 additions & 15 deletions neureka/hal/neureka_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in,
const uint32_t w_in_stride,
const uint32_t h_out_stride,
const uint32_t w_out_stride) {
const uint32_t num_k_in = divnceil(k_in, task->subtile_input_channel);
const uint32_t num_k_in =
nnx_calculate_number_of_tiles(k_in, task->subtile_input_channel);

const neureka_stride_t input_stride = {
.d0 = w_in_stride, .d1 = h_in_stride, .d2 = 0};
Expand Down Expand Up @@ -177,26 +178,34 @@ void neureka_task_set_counters(neureka_task_t *task, const uint32_t k_in,
const uint32_t k_out,
const uint8_t padding_bottom,
const uint8_t padding_right) {
const uint16_t num_Ko = divnceil(k_out, task->subtile_output_channel);
const uint16_t num_Ki = divnceil(k_in, task->subtile_input_channel);
const uint16_t num_Ho = divnceil(h_out, NEUREKA_SUBTILE_OUTPUT_HEIGHT);
const uint16_t num_Wo = divnceil(w_out, NEUREKA_SUBTILE_OUTPUT_WIDTH);

const uint16_t rem_Ko = remainder(k_out, task->subtile_output_channel);
const uint16_t rem_Ki = remainder(k_in, task->subtile_input_channel);
const uint16_t rem_Ho = remainder(h_out, NEUREKA_SUBTILE_OUTPUT_HEIGHT);
const uint16_t rem_Wo = remainder(w_out, NEUREKA_SUBTILE_OUTPUT_WIDTH);
const uint16_t num_Ko =
nnx_calculate_number_of_tiles(k_out, task->subtile_output_channel);
const uint16_t num_Ki =
nnx_calculate_number_of_tiles(k_in, task->subtile_input_channel);
const uint16_t num_Ho =
nnx_calculate_number_of_tiles(h_out, NEUREKA_SUBTILE_OUTPUT_HEIGHT);
const uint16_t num_Wo =
nnx_calculate_number_of_tiles(w_out, NEUREKA_SUBTILE_OUTPUT_WIDTH);

const uint16_t rem_Ko =
nnx_calculate_last_tile_size(k_out, task->subtile_output_channel);
const uint16_t rem_Ki =
nnx_calculate_last_tile_size(k_in, task->subtile_input_channel);
const uint16_t rem_Ho =
nnx_calculate_last_tile_size(h_out, NEUREKA_SUBTILE_OUTPUT_HEIGHT);
const uint16_t rem_Wo =
nnx_calculate_last_tile_size(w_out, NEUREKA_SUBTILE_OUTPUT_WIDTH);
const uint16_t rem_Hi =
(task->kernel_shape == 1 ? rem_Ho : rem_Ho + 2) - padding_bottom;
const uint16_t rem_Wi =
(task->kernel_shape == 1 ? rem_Wo : rem_Wo + 2) - padding_right;

const neureka_subtile_t subtile = {
.number = {.KoKi = concat_half(num_Ko, num_Ki),
.HoWo = concat_half(num_Ho, num_Wo)},
.remainder = {.KoKi = concat_half(rem_Ko, rem_Ki),
.HoWo = concat_half(rem_Ho, rem_Wo),
.HiWi = concat_half(rem_Hi, rem_Wi)}};
.number = {.KoKi = nnx_concat_half(num_Ko, num_Ki),
.HoWo = nnx_concat_half(num_Ho, num_Wo)},
.remainder = {.KoKi = nnx_concat_half(rem_Ko, rem_Ki),
.HoWo = nnx_concat_half(rem_Ho, rem_Wo),
.HiWi = nnx_concat_half(rem_Hi, rem_Wi)}};
task->data.cfg.subtile = subtile;
}

Expand Down
4 changes: 2 additions & 2 deletions src/pulp_nnx_ne16.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,8 @@ void ne16_nnx_dispatch_stride2x2(ne16_dev_t *dev, ne16_task_t *task,
const uint8_t w_ker) {
const uint8_t stride = 2;

const uint32_t n_h = divnceil(h_out, stride);
const uint32_t n_w = divnceil(w_out, stride);
const uint32_t n_h = nnx_calculate_number_of_tiles(h_out, stride);
const uint32_t n_w = nnx_calculate_number_of_tiles(w_out, stride);
const uint32_t input_height_offset = h_out % stride == 1 ? stride : 0;
const uint32_t input_width_offset = w_out % stride == 1 ? stride : 0;
const uint32_t output_height_offset = h_out % stride == 1 ? 1 : 0;
Expand Down
12 changes: 7 additions & 5 deletions util/pulp_nnx_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@

#include "pulp_nnx_util.h"

inline int divnceil(const int dividend, const int divisor) {
return ((dividend - 1) / divisor) + 1;
inline int nnx_calculate_number_of_tiles(const int dim_size,
const int tile_size) {
return ((dim_size - 1) / tile_size) + 1;
}

inline int remainder(const int dividend, const int divisor) {
return ((dividend - 1) % divisor) + 1;
inline int nnx_calculate_last_tile_size(const int dim_size,
const int tile_size) {
return ((dim_size - 1) % tile_size) + 1;
}

inline uint32_t concat_half(const uint16_t high, const uint16_t low) {
inline uint32_t nnx_concat_half(const uint16_t high, const uint16_t low) {
return ((uint32_t)high << 16) | low;
}
20 changes: 11 additions & 9 deletions util/pulp_nnx_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,28 @@
#include <stdint.h>

/**
* divnceil
* nnx_calculate_number_of_iterations
*
* Does integer division and ceiling of it.
* Calculates the number of iterations to go through a dimension.
* It does it by dividing the dimension with the tile size and doing a ceiling
* the result.
*/
int divnceil(const int dividend, const int divisor);
int nnx_calculate_number_of_tiles(const int dim_size, const int tile_size);

/**
* remainder
* nnx_calculate_last_tile_size
*
* Calculates the remainder but if the remainder should be 0,
* returns divisor. Used for calculation of the last `remainding`
* iteration of the tile.
* Calculates the size of the last executed tile by calculating the remainder of
* the dim_size and the tile_size. In case the remainder is 0, it returns the
* full tile_size.
*/
int remainder(const int dividend, const int divisor);
int nnx_calculate_last_tile_size(const int dim_size, const int tile_size);

/**
* concat_half
*
* Concatenate 2 16-bit numbers into a 32-bit number.
*/
uint32_t concat_half(const uint16_t high, const uint16_t low);
uint32_t nnx_concat_half(const uint16_t high, const uint16_t low);

#endif // __NNX_UTIL_H__

0 comments on commit d9c7723

Please sign in to comment.