Skip to content

Commit

Permalink
Align to newer N-EUREKA configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescoConti committed Apr 16, 2024
1 parent 99cc182 commit 5a9c4b2
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 21 deletions.
4 changes: 4 additions & 0 deletions test/NeuralEngineFunctionalModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ def _norm_quant(

tensor = tensor >> global_shift

if verbose:
print("INTERMEDIATE RESULTS (after shift):")
print(tensor)

# Saturate into out_type
tensor = NeuralEngineFunctionalModel._cast(tensor, out_type, saturate=True)

Expand Down
30 changes: 10 additions & 20 deletions test/NeurekaMemoryLayout.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
class NeurekaMemoryLayout:
_WEIGHT_BANDWIDTH = 256
_CIN_SUBTILE_1x1 = 32
_CIN_SUBTILE_3x3 = 28
_CIN_SUBTILE_3x3 = 32

@staticmethod
def weightEncode(
Expand Down Expand Up @@ -84,26 +84,16 @@ def weightEncode(
constant_values=0,
)
elif height == 1 and width == 1:
# Tile cinSubtile into tiles of size 4
# (cout, cinMajor, Bits, Flattened spatial, cinSubtileMajor, cinSubtileTile)
weight = weight.reshape(
cout, cinMajor, bits, height * width, cinSubtile // 4, 4
) # cout, cinMajor, bits, 1, 8, 4
# Pad bits to 8
if bits < 8:
# (cout, cinMajor, PaddedBits, Flattened spatial, cinSubtileMajor, cinSubtileTile)
weight = np.pad(
weight,
((0, 0), (0, 0), (0, 8 - bits), (0, 0), (0, 0), (0, 0)),
mode="constant",
constant_values=0,
)
# (cout, cinMajor, Flattened spatial, cinSubtileMajor, PaddedBits, cinSubtileTile)
weight = weight.transpose(0, 1, 3, 4, 2, 5)
# (cout * cinMajor, Bits * cinSubtile)
weight = weight.reshape(-1, bits * cinSubtile)
# Pad only the last dimension to weight bandwidth size
# (-1, Weight Bandwidth)
weight = weight.reshape(
cout * cinMajor, NeurekaMemoryLayout._WEIGHT_BANDWIDTH
) # cout*cinMajor, 256b
weight = np.pad(
weight,
((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH - weight.shape[-1])),
"constant",
constant_values=0,
)

# Prepare for packing
# (-1, Weight Bandwidth Bytes, 8)
Expand Down
156 changes: 156 additions & 0 deletions test/NeurekaMemoryLayoutSiracusa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# Luka Macan <luka.macan@unibo.it>
# Arpan Suravi Prasad <prasadar@iis.ee.ethz.ch>
#
# Copyright 2023 ETH Zurich and University of Bologna
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0

import numpy as np
import numpy.typing as npt


class NeurekaMemoryLayoutSiracusa:
_WEIGHT_BANDWIDTH = 256
_CIN_SUBTILE_1x1 = 32
_CIN_SUBTILE_3x3 = 28

@staticmethod
def weightEncode(
weight: npt.NDArray[np.uint8], bits: int, depthwise: bool = False
) -> npt.NDArray[np.uint8]:
"""Unroll weight into expected memory format
Expected weight shape is (cout, cin, H, W).
The produced memory layout depends on the weight kernel shape:
- 3x3: (cout, cinMajor, Bits, H x W x cinMinor_3x3 packed into Weight Bandwidth bits),
- 1x1: (cout, cinMajor, Bits x H x W x cinMinor_1x1 packed into Weight Bandwidth bits),
where cinMajor is the ceil(cin / cin subtile <mode>) and cinMinor has to be padded with 0 to cin subtile <mode>.
"""
if depthwise:
weight = weight.transpose(1, 0, 2, 3) # Swap cout and cin

cout, cin, height, width = weight.shape
cinSubtile = (
NeurekaMemoryLayoutSiracusa._CIN_SUBTILE_3x3
if height == 3
else NeurekaMemoryLayoutSiracusa._CIN_SUBTILE_1x1
)

# Pad cin to be divisible with CIN_SUBTILE
if cin % cinSubtile != 0:
cinPad = cinSubtile - cin % cinSubtile
weight = np.pad(
weight,
((0, 0), (0, cinPad), (0, 0), (0, 0)),
"constant",
constant_values=0,
)

# Reshape into (cout, cinMajor, cinMinor, Flattened spatial, 1)
# The 1 at the end is required by the unpacking
cinMajor = int(np.ceil(cin / cinSubtile))
weight = weight.reshape(cout, cinMajor, cinSubtile, height * width, 1)

# Unpack 'bits' bits in little order, e.g. bits=4: 3 => [1, 1, 0, 0]
# (cout, cinMajor, cinSubtile, Flattened spatial, Bits)
weight = np.unpackbits(weight, axis=-1, count=bits, bitorder="little")

# Shuffle bits so that the final shape is:
# (cout, cinMajor, Bits, Flattened spatial, cinSubtile)
weight = weight.transpose(0, 1, 4, 3, 2)

# Pack dimensions to fit into weight bandwidth
if height == 3 and width == 3:
# (cout * cinMajor * Bits, H * W * cinSubtile)
weight = weight.reshape(-1, height * width * cinSubtile)
# Pad only the last dimension to weight bandwidth size
# (-1, Weight Bandwidth)
weight = np.pad(
weight,
((0, 0), (0, NeurekaMemoryLayoutSiracusa._WEIGHT_BANDWIDTH - weight.shape[-1])),
"constant",
constant_values=0,
)
elif height == 1 and width == 1:
# Tile cinSubtile into tiles of size 4
# (cout, cinMajor, Bits, Flattened spatial, cinSubtileMajor, cinSubtileTile)
weight = weight.reshape(
cout, cinMajor, bits, height * width, cinSubtile // 4, 4
) # cout, cinMajor, bits, 1, 8, 4
# Pad bits to 8
if bits < 8:
# (cout, cinMajor, PaddedBits, Flattened spatial, cinSubtileMajor, cinSubtileTile)
weight = np.pad(
weight,
((0, 0), (0, 0), (0, 8 - bits), (0, 0), (0, 0), (0, 0)),
mode="constant",
constant_values=0,
)
# (cout, cinMajor, Flattened spatial, cinSubtileMajor, PaddedBits, cinSubtileTile)
weight = weight.transpose(0, 1, 3, 4, 2, 5)
# (-1, Weight Bandwidth)
weight = weight.reshape(
cout * cinMajor, NeurekaMemoryLayoutSiracusa._WEIGHT_BANDWIDTH
) # cout*cinMajor, 256b

# Prepare for packing
# (-1, Weight Bandwidth Bytes, 8)
weightBandwidthBytes = int(np.ceil(NeurekaMemoryLayoutSiracusa._WEIGHT_BANDWIDTH / 8))
weight = np.stack(np.split(weight, weightBandwidthBytes, axis=-1), axis=-2)

# Pack bits
# (-1, Weight Bandwidth Bytes)
weight = np.packbits(weight, axis=-1, bitorder="little")

return weight.flatten()

@staticmethod
def weightDecode(
weight: npt.NDArray[np.uint8],
bits: int,
cout: int,
cin: int,
height: int,
width: int,
) -> npt.NDArray[np.uint8]:
"""Reverse of weightEncode"""
cinSubtile = (
NeurekaMemoryLayoutSiracusa._CIN_SUBTILE_3x3
if height == 3
else NeurekaMemoryLayoutSiracusa._CIN_SUBTILE_1x1
)
cinMajor = int(np.ceil(cin / cinSubtile))
cinMinor = cinSubtile
weightBandwidthBytes = int(np.ceil(NeurekaMemoryLayoutSiracusa._WEIGHT_BANDWIDTH / 8))

weight = weight.reshape(-1, weightBandwidthBytes, 1)
weight = np.unpackbits(weight, axis=-1, count=8, bitorder="little")
weight = weight.reshape(-1, NeurekaMemoryLayoutSiracusa._WEIGHT_BANDWIDTH)

if height == 3 and width == 3:
weight = weight[:, : height * width * cinMinor]
weight = weight.reshape(
cout, cinMajor, bits, height * width, cinMinor
).transpose(0, 1, 4, 3, 2)
elif height == 1 and width == 1:
weight = weight[:, : height * width * cinMinor * 8]
weight = weight.reshape(cout, cinMajor, cinMinor // 4, 8, 4).transpose(
0, 1, 2, 4, 3
)
weight = np.packbits(weight, axis=-1, bitorder="little")
weight = weight.reshape(cout, cinMajor * cinMinor, height, width)
weight = weight[:, :cin, :, :]

return weight
19 changes: 18 additions & 1 deletion test/testgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import json
import os
from typing import Optional, Set, Type, Union
import numpy as np
import sys

import toml

Expand Down Expand Up @@ -52,6 +54,7 @@ def headers_gen(


def print_tensors(test: NnxTest):
np.set_printoptions(threshold=sys.maxsize)
print("INPUT TENSOR:")
print(test.input)
print("WEIGHT TENSOR:")
Expand Down Expand Up @@ -83,7 +86,21 @@ def test_gen(
exit(-1)

test_conf = nnxTestConfCls.model_validate(test_conf_dict)
test = NnxTestGenerator.from_conf(test_conf, verbose=args.print_tensors)
if test_conf_dict['synthetic_weights']:
import torch
weight = torch.zeros((test_conf.out_channel, 1 if test_conf.depthwise else test_conf.in_channel, test_conf.kernel_shape.height, test_conf.kernel_shape.width), dtype=torch.int64)
for i in range(0, min(weight.shape[0], weight.shape[1])):
weight[i,i,0,0] = 1
else:
weight = None
if test_conf_dict['synthetic_inputs']:
import torch
inputs = torch.zeros((1, test_conf.in_channel, test_conf.in_height, test_conf.in_width), dtype=torch.int64)
for i in range(test_conf.in_channel):
inputs[:, i,0,0] = i
else:
inputs = None
test = NnxTestGenerator.from_conf(test_conf, verbose=args.print_tensors, weight=weight, input=inputs)
if not args.skip_save:
test.save(args.test_dir)
if args.headers:
Expand Down

0 comments on commit 5a9c4b2

Please sign in to comment.