diff --git a/neureka/hal/neureka_task.c b/neureka/hal/neureka_task.c index d31c934..d9209f5 100644 --- a/neureka/hal/neureka_task.c +++ b/neureka/hal/neureka_task.c @@ -169,7 +169,7 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in, if (task->kernel_shape == 1) { // 1x1 task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1; task->data.cfg.weights_stride.d1 = - NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 * num_k_in; + (NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 / 8) * task->qw * num_k_in; } else if (!task->depthwise) { // 3x3 task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3; task->data.cfg.weights_stride.d1 = diff --git a/test/NeuralEngineFunctionalModel.py b/test/NeuralEngineFunctionalModel.py index 32bcc5a..5938554 100644 --- a/test/NeuralEngineFunctionalModel.py +++ b/test/NeuralEngineFunctionalModel.py @@ -2,6 +2,7 @@ import torch import torch.nn.functional as F +import numpy as np from TestClasses import IntegerType, Padding, Stride @@ -9,6 +10,15 @@ class NeuralEngineFunctionalModel: ACCUMULATOR_TYPE = IntegerType(name="int32") + @staticmethod + def _tensor_to_hex(tensor): + int_tensor = np.asarray(torch.floor(tensor).to(torch.int64)) + int_tensor[int_tensor < 0] = 0xffffffff + (int_tensor[int_tensor < 0]+1) + hex_tensor = np.empty(int_tensor.shape, dtype=object) + for idx in np.ndindex(int_tensor.shape): + hex_tensor[idx] = hex(int_tensor[idx].item()) + return hex_tensor + @staticmethod def _cast( tensor: torch.Tensor, _type: IntegerType, saturate: bool = False @@ -36,7 +46,10 @@ def _norm_quant( if verbose: print("INTERMEDIATE RESULTS (after scale):") - print(tensor) + current_threshold = np.get_printoptions()['threshold'] + np.set_printoptions(threshold=np.inf) + print(NeuralEngineFunctionalModel._tensor_to_hex(tensor)) + np.set_printoptions(threshold=current_threshold) if has_bias: assert bias is not None @@ -54,7 +67,10 @@ def _norm_quant( if verbose: print("INTERMEDIATE RESULTS (after bias):") - print(tensor) + current_threshold = np.get_printoptions()['threshold'] + np.set_printoptions(threshold=np.inf) + print(NeuralEngineFunctionalModel._tensor_to_hex(tensor)) + np.set_printoptions(threshold=current_threshold) if has_relu: tensor = F.relu(tensor) @@ -63,7 +79,10 @@ def _norm_quant( if verbose: print("INTERMEDIATE RESULTS (after shift):") - print(tensor) + current_threshold = np.get_printoptions()['threshold'] + np.set_printoptions(threshold=np.inf) + print(NeuralEngineFunctionalModel._tensor_to_hex(tensor)) + np.set_printoptions(threshold=current_threshold) # Saturate into out_type tensor = NeuralEngineFunctionalModel._cast(tensor, out_type, saturate=True) @@ -102,6 +121,15 @@ def convolution( 0, ) + if verbose: + print("INPUTS (padded):") + current_threshold = np.get_printoptions()['threshold'] + np.set_printoptions(threshold=np.inf) + print(NeuralEngineFunctionalModel._tensor_to_hex(input_padded)) + print("WEIGHTS (padded):") + print(NeuralEngineFunctionalModel._tensor_to_hex(weight)) + np.set_printoptions(threshold=current_threshold) + # Accumulators are 32bit non-saturating. # Calculate in higher precision (int64) output = F.conv2d( @@ -118,7 +146,10 @@ def convolution( if verbose: print("INTERMEDIATE RESULTS (pre-normalization/requant):") - print(output) + current_threshold = np.get_printoptions()['threshold'] + np.set_printoptions(threshold=np.inf) + print(NeuralEngineFunctionalModel._tensor_to_hex(output)) + np.set_printoptions(threshold=current_threshold) if has_norm_quant: assert scale is not None diff --git a/test/NeurekaMemoryLayout.py b/test/NeurekaMemoryLayout.py index 61b3ad8..ca51d4a 100644 --- a/test/NeurekaMemoryLayout.py +++ b/test/NeurekaMemoryLayout.py @@ -88,15 +88,8 @@ def weightEncode( elif height == 1 and width == 1: # (cout * cinMajor, Bits * cinSubtile) weight = weight.reshape(-1, bits * cinSubtile) - # Pad only the last dimension to weight bandwidth size - # (-1, Weight Bandwidth) - weight = np.pad( - weight, - ((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH_1x1 - weight.shape[-1])), - "constant", - constant_values=0, - ) - weightBandwidthBytes = int(np.ceil(NeurekaMemoryLayout._WEIGHT_BANDWIDTH_1x1 / 8)) + # No padding needed here + weightBandwidthBytes = int(np.ceil(bits * cinSubtile / 8)) # Prepare for packing # (-1, Weight Bandwidth Bytes, 8) diff --git a/test/NeurekaTestConf.py b/test/NeurekaTestConf.py index f878e68..a8f89a7 100644 --- a/test/NeurekaTestConf.py +++ b/test/NeurekaTestConf.py @@ -65,7 +65,7 @@ def check_valid_out_type(cls, v: IntegerType) -> IntegerType: @field_validator("weight_type") @classmethod def check_valid_weight_type(cls, v: IntegerType) -> IntegerType: - NeurekaTestConf._check_type("weight_type", v, ["int8"]) + NeurekaTestConf._check_type("weight_type", v, ["int8", "int7", "int6", "int5", "int4", "int3", "int2"]) return v @field_validator("scale_type") diff --git a/test/NnxTestClasses.py b/test/NnxTestClasses.py index 41d5131..7e0e3a0 100644 --- a/test/NnxTestClasses.py +++ b/test/NnxTestClasses.py @@ -48,6 +48,8 @@ class NnxTestConf(BaseModel): has_norm_quant: bool has_bias: bool has_relu: bool + synthetic_weights: bool + synthetic_inputs: bool @model_validator(mode="after") # type: ignore def check_valid_depthwise_channels(self) -> NnxTestConf: @@ -116,6 +118,8 @@ def __init__( scale: Optional[torch.Tensor] = None, bias: Optional[torch.Tensor] = None, global_shift: Optional[torch.Tensor] = torch.Tensor([0]), + synthetic_weights: Optional[bool] = False, + synthetic_inputs: Optional[bool] = False, ) -> None: self.conf = conf self.input = input @@ -124,6 +128,8 @@ def __init__( self.scale = scale self.bias = bias self.global_shift = global_shift + self.synthetic_weights = synthetic_weights + self.synthetic_inputs = synthetic_inputs def is_valid(self) -> bool: return all( @@ -207,7 +213,11 @@ def _calculate_global_shift( """Calculate global shift so that the output values are in the range of out_type""" s = tensor.type(torch.float64).std() target_s = 2 ** (out_type._bits - 1) - return torch.ceil(torch.log2(s / target_s)).type(torch.int32) + shift = torch.ceil(torch.log2(s / target_s)).type(torch.int32) + if shift < 1: + return torch.zeros((1,)).type(torch.int32) + else: + return shift @staticmethod def _random_data(_type: IntegerType, shape: Tuple, extremes: Tuple = None): @@ -243,20 +253,30 @@ def from_conf( bias_shape = (1, conf.out_channel, 1, 1) if input is None: - input = NnxTestGenerator._random_data( - _type=conf.in_type, - shape=input_shape, - ) + if conf.synthetic_inputs: + inputs = torch.zeros((1, conf.in_channel, conf.in_height, conf.in_width), dtype=torch.int64) + for i in range(conf.in_channel): + inputs[:, i,0,0] = i + else: + input = NnxTestGenerator._random_data( + _type=conf.in_type, + shape=input_shape, + ) if weight is None: - weight_mean = NnxTestGenerator._DEFAULT_WEIGHT_MEAN - weight_std = NnxTestGenerator._DEFAULT_WEIGHT_STDEV * (1<<(conf.weight_type._bits-1)-1) - weight = NnxTestGenerator._random_data_normal( - mean = weight_mean, - std = weight_std, - _type=conf.weight_type, - shape=weight_shape, - ) + if conf.synthetic_weights: + weight = torch.zeros((conf.out_channel, 1 if conf.depthwise else conf.in_channel, conf.kernel_shape.height, conf.kernel_shape.width), dtype=torch.int64) + for i in range(0, min(weight.shape[0], weight.shape[1])): + weight[i,i,0,0] = 1 + else: + weight_mean = NnxTestGenerator._DEFAULT_WEIGHT_MEAN + weight_std = NnxTestGenerator._DEFAULT_WEIGHT_STDEV * (1<<(conf.weight_type._bits-1)-1) + weight = NnxTestGenerator._random_data_normal( + mean = weight_mean, + std = weight_std, + _type=conf.weight_type, + shape=weight_shape, + ) if conf.has_norm_quant: if scale is None: @@ -306,6 +326,8 @@ def from_conf( scale=scale, bias=bias, global_shift=global_shift, + synthetic_inputs=conf.synthetic_inputs, + synthetic_weights=conf.synthetic_weights, ) @staticmethod @@ -361,7 +383,10 @@ def generate(self, test_name: str, test: NnxTest): weight_type = test.conf.weight_type weight_bits = weight_type._bits assert weight_bits > 1 and weight_bits <= 8 - weight_offset = -(2 ** (weight_bits - 1)) + if test.synthetic_weights: + weight_offset = 0 + else: + weight_offset = -(2 ** (weight_bits - 1)) weight_out_ch, weight_in_ch, weight_ks_h, weight_ks_w = test.weight.shape weight_data: np.ndarray = test.weight.numpy() - weight_offset weight_init = self.weightEncode( diff --git a/test/testgen.py b/test/testgen.py index c128ff4..e5378e4 100644 --- a/test/testgen.py +++ b/test/testgen.py @@ -86,21 +86,7 @@ def test_gen( exit(-1) test_conf = nnxTestConfCls.model_validate(test_conf_dict) - if test_conf_dict['synthetic_weights']: - import torch - weight = torch.zeros((test_conf.out_channel, 1 if test_conf.depthwise else test_conf.in_channel, test_conf.kernel_shape.height, test_conf.kernel_shape.width), dtype=torch.int64) - for i in range(0, min(weight.shape[0], weight.shape[1])): - weight[i,i,0,0] = 1 - else: - weight = None - if test_conf_dict['synthetic_inputs']: - import torch - inputs = torch.zeros((1, test_conf.in_channel, test_conf.in_height, test_conf.in_width), dtype=torch.int64) - for i in range(test_conf.in_channel): - inputs[:, i,0,0] = i - else: - inputs = None - test = NnxTestGenerator.from_conf(test_conf, verbose=args.print_tensors, weight=weight, input=inputs) + test = NnxTestGenerator.from_conf(test_conf, verbose=args.print_tensors) if not args.skip_save: test.save(args.test_dir) if args.headers: