Skip to content

Commit

Permalink
Test improvements (#11)
Browse files Browse the repository at this point in the history
* Add more data generation methods and add the output_signed define
* Change MemoryLayout to NnxWeight
* Fix regen option of testgen
* Fix global shift type to uint8
* Add NnxMapping
  • Loading branch information
lukamac authored Dec 12, 2024
1 parent 4ffdc0e commit 246b8da
Show file tree
Hide file tree
Showing 10 changed files with 291 additions and 143 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

### Added

- add NnxMapping dictionary that maps accelerator name to the accelerator specific classes
- choice of data generation method (ones, incremented, or random)
- N-EUREKA accelerator support: 3x3, 1x1, and 3x3 depthwise convolution kernels
- Support for kernels without normalization and quantization for NE16
- isort check
Expand All @@ -15,6 +17,8 @@

### Changed

- conftest now passes only strings to test.py to improve readability of pytest logs
- NnxMemoryLayout is now NnxWeight and also has a method for source generation
- the `wmem` field in the test configurations is now required
- `ne16_task_init` got split into smaller parts: `ne16_task_init`, `ne16_task_set_op_to_conv`, `ne16_task_set_weight_offset`, `ne16_task_set_bits`, `ne16_task_set_norm_quant`
- strides in `ne16_task_set_strides`, `ne16_task_set_dims`, and `ne16_task_set_ptrs` are now strides between consecutive elements in that dimension
Expand All @@ -28,6 +32,7 @@

### Fixed

- global shift should have been of type uint8 not int32
- type conversion compiler warning

## [0.3.0] - 2024-01-14
Expand Down
37 changes: 27 additions & 10 deletions test/Ne16MemoryLayout.py → test/Ne16Weight.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,15 @@
import numpy as np
import numpy.typing as npt

from HeaderWriter import HeaderWriter
from NnxTestClasses import NnxWeight, WmemLiteral

class Ne16MemoryLayout:

class Ne16Weight(NnxWeight):
_CIN_SUBTILE = 16

@staticmethod
def weightEncode(
def encode(
weight: npt.NDArray[np.uint8], bits: int, depthwise: bool = False
) -> npt.NDArray[np.uint8]:
"""Unroll weight into expected memory format
Expand All @@ -39,8 +42,8 @@ def weightEncode(
cout, cin, height, width = weight.shape

# Pad cin to be divisible with CIN_SUBTILE
if cin % Ne16MemoryLayout._CIN_SUBTILE != 0:
cinPad = Ne16MemoryLayout._CIN_SUBTILE - cin % Ne16MemoryLayout._CIN_SUBTILE
if cin % Ne16Weight._CIN_SUBTILE != 0:
cinPad = Ne16Weight._CIN_SUBTILE - cin % Ne16Weight._CIN_SUBTILE
weight = np.pad(
weight,
((0, 0), (0, cinPad), (0, 0), (0, 0)),
Expand All @@ -51,8 +54,8 @@ def weightEncode(

# Reshape into (cout, cinMajor, cinMinor, flattened spatial, 1)
# The 1 at the end is required by the unpacking
cinMajor = cin // Ne16MemoryLayout._CIN_SUBTILE
cinMinor = Ne16MemoryLayout._CIN_SUBTILE
cinMajor = cin // Ne16Weight._CIN_SUBTILE
cinMinor = Ne16Weight._CIN_SUBTILE
weight = weight.reshape(cout, cinMajor, cinMinor, height * width, 1)

# Unpack 'bits' bits in little order, e.g. bits=4: 3 => [1, 1, 0, 0]
Expand All @@ -74,17 +77,16 @@ def weightEncode(
return weight.flatten()

@staticmethod
def weightDecode(
def decode(
weight: npt.NDArray[np.uint8],
bits: int,
cout: int,
cin: int,
height: int,
width: int,
) -> npt.NDArray[np.uint8]:
"""Reverse of weight_roll"""
cinMajor = int(np.ceil(cin / Ne16MemoryLayout._CIN_SUBTILE))
cinMinor = Ne16MemoryLayout._CIN_SUBTILE
cinMajor = int(np.ceil(cin / Ne16Weight._CIN_SUBTILE))
cinMinor = Ne16Weight._CIN_SUBTILE
cinMinorBytes = int(np.ceil(cinMinor / 8))

weight = weight.reshape(cout, cinMajor, bits, height * width, cinMinorBytes, 1)
Expand All @@ -96,3 +98,18 @@ def weightDecode(
weight = weight[:, :cin, :, :]

return weight

@staticmethod
def source_generate(
wmem: WmemLiteral, init: npt.NDArray[np.uint8], header_writer: HeaderWriter
) -> None:
assert wmem == "tcdm", f"Invalid wmem source provided: {wmem}"
section = "PI_L1"

header_writer.generate_vector_files(
"weight",
_type="uint8_t",
size=init.size,
init=init,
section=section,
)
46 changes: 34 additions & 12 deletions test/NeurekaMemoryLayout.py → test/NeurekaWeight.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,17 @@
import numpy as np
import numpy.typing as npt

from HeaderWriter import HeaderWriter
from NnxTestClasses import NnxWeight, WmemLiteral

class NeurekaMemoryLayout:

class NeurekaWeight(NnxWeight):
_WEIGHT_BANDWIDTH = 256
_CIN_SUBTILE_1x1 = 32
_CIN_SUBTILE_3x3 = 28

@staticmethod
def weightEncode(
def encode(
weight: npt.NDArray[np.uint8], bits: int, depthwise: bool = False
) -> npt.NDArray[np.uint8]:
"""Unroll weight into expected memory format
Expand All @@ -43,9 +46,9 @@ def weightEncode(

cout, cin, height, width = weight.shape
cinSubtile = (
NeurekaMemoryLayout._CIN_SUBTILE_3x3
NeurekaWeight._CIN_SUBTILE_3x3
if height == 3
else NeurekaMemoryLayout._CIN_SUBTILE_1x1
else NeurekaWeight._CIN_SUBTILE_1x1
)

# Pad cin to be divisible with CIN_SUBTILE
Expand Down Expand Up @@ -79,7 +82,7 @@ def weightEncode(
# (-1, Weight Bandwidth)
weight = np.pad(
weight,
((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH - weight.shape[-1])),
((0, 0), (0, NeurekaWeight._WEIGHT_BANDWIDTH - weight.shape[-1])),
"constant",
constant_values=0,
)
Expand All @@ -102,7 +105,7 @@ def weightEncode(
weight = weight.transpose(0, 1, 3, 4, 2, 5)
# (-1, Weight Bandwidth)
weight = weight.reshape(
cout * cinMajor, NeurekaMemoryLayout._WEIGHT_BANDWIDTH
cout * cinMajor, NeurekaWeight._WEIGHT_BANDWIDTH
) # cout*cinMajor, 256b

# Pack bits
Expand All @@ -116,27 +119,27 @@ def weightEncode(
return weight.flatten()

@staticmethod
def weightDecode(
def decode(
weight: npt.NDArray[np.uint8],
bits: int,
cout: int,
cin: int,
height: int,
width: int,
) -> npt.NDArray[np.uint8]:
"""Reverse of weightEncode"""
"""Reverse of encode"""
cinSubtile = (
NeurekaMemoryLayout._CIN_SUBTILE_3x3
NeurekaWeight._CIN_SUBTILE_3x3
if height == 3
else NeurekaMemoryLayout._CIN_SUBTILE_1x1
else NeurekaWeight._CIN_SUBTILE_1x1
)
cinMajor = int(np.ceil(cin / cinSubtile))
cinMinor = cinSubtile
weightBandwidthBytes = int(np.ceil(NeurekaMemoryLayout._WEIGHT_BANDWIDTH / 8))
weightBandwidthBytes = int(np.ceil(NeurekaWeight._WEIGHT_BANDWIDTH / 8))

weight = weight.reshape(-1, weightBandwidthBytes, 1)
weight = np.unpackbits(weight, axis=-1, count=8, bitorder="little")
weight = weight.reshape(-1, NeurekaMemoryLayout._WEIGHT_BANDWIDTH)
weight = weight.reshape(-1, NeurekaWeight._WEIGHT_BANDWIDTH)

if height == 3 and width == 3:
weight = weight[:, : height * width * cinMinor]
Expand All @@ -153,3 +156,22 @@ def weightDecode(
weight = weight[:, :cin, :, :]

return weight

@staticmethod
def source_generate(
wmem: WmemLiteral, init: npt.NDArray[np.uint8], header_writer: HeaderWriter
) -> None:
if wmem == "sram":
section = '__attribute__((section(".weightmem_sram")))'
elif wmem == "mram":
section = '__attribute__((section(".weightmem_mram")))'
else:
section = "PI_L1"

header_writer.generate_vector_files(
"weight",
_type="uint8_t",
size=init.size,
init=init,
section=section,
)
27 changes: 27 additions & 0 deletions test/NnxMapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from enum import Enum
from typing import Dict, NamedTuple, Type

from Ne16TestConf import Ne16TestConf
from Ne16Weight import Ne16Weight
from NeurekaTestConf import NeurekaTestConf
from NeurekaWeight import NeurekaWeight
from NnxTestClasses import NnxTestConf, NnxWeight


class NnxName(Enum):
ne16 = "ne16"
neureka = "neureka"

def __str__(self):
return self.value


class NnxAcceleratorClasses(NamedTuple):
testConfCls: Type[NnxTestConf]
weightCls: Type[NnxWeight]


NnxMapping: Dict[NnxName, NnxAcceleratorClasses] = {
NnxName.ne16: NnxAcceleratorClasses(Ne16TestConf, Ne16Weight),
NnxName.neureka: NnxAcceleratorClasses(NeurekaTestConf, NeurekaWeight),
}
Loading

0 comments on commit 246b8da

Please sign in to comment.