Skip to content

Commit

Permalink
Add Float GEMM on PULP with Tiling (#26)
Browse files Browse the repository at this point in the history
1. add pulpfpgemm binding, tiler, and tile constraint
2. add tiled SB and DB floatgemm test to CI Siracusa
3. change deeploytest for float input & output
4. change testMVP input default datatype from int64 to float64
  • Loading branch information
runwangdl authored Jan 17, 2025
1 parent bb7e56d commit 7344dab
Show file tree
Hide file tree
Showing 12 changed files with 219 additions and 49 deletions.
9 changes: 9 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ jobs:
RQHardswish
testBacktracking
FloatAdder
testFloatGEMM
num-cores: 8

siracusa-models:
Expand Down Expand Up @@ -251,6 +252,10 @@ jobs:
{
"name": "RQHardswish",
"L1": [750]
},
{
"name": "testFloatGEMM",
"L1": [8000]
}
]
num-cores: 8
Expand Down Expand Up @@ -291,6 +296,10 @@ jobs:
{
"name": "RQHardswish",
"L1": [750]
},
{
"name": "testFloatGEMM",
"L1": [8000]
}
]
num-cores: 8
Expand Down
16 changes: 11 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,15 @@
- Add the RTL library to the snitch_cluster build process in the Makefile, required for GVSOC simulation


## Add Float Support & Float GEMM for Generic
## Add Float Support & Float GEMM for Generic and PULP

### Added
- Float Support for Constbuffer
- Float GEMM on Generic
- Added FP GEMM to CI
- Fixed Float bug on Testslice, CMSIS TestUtil, DivInterger
- Fixed AbstractDatayType Float Bugs
- Simple Float GEMM on Generic and Pulp
- FP GEMM to CI
- FP GEMM Tiling on PULP

### Fixed
- Float bug on Testslice, CMSIS TestUtil, DivInterger
- AbstractDatayType Float Bugs

18 changes: 13 additions & 5 deletions Deeploy/Targets/PULPOpen/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,15 @@
from Deeploy.CommonExtensions.CodeTransformationPasses.Closure import ClosureGeneration, MemoryAwareClosureGeneration
from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
MemoryManagementGeneration
from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, int8_t, int32_t, uint8_t
from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, float32_t, int8_t, int32_t, \
uint8_t
from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeTemplate
from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding
from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
from Deeploy.Targets.Generic.Templates import ConcatTemplate, RQSiGELUTemplate, iHardswishTemplate
from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, GELUChecker, HardswishChecker, MatMulChecker, \
MulChecker, ReduceMeanChecker, RQAddChecker, RQHardswishChecker, SliceChecker, SoftmaxChecker, TransposeChecker, \
iLayerNormChecker
from Deeploy.Targets.Generic.Templates import ConcatTemplate, FloatGemmTemplate, RQSiGELUTemplate, iHardswishTemplate
from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, GELUChecker, GEMMChecker, HardswishChecker, \
MatMulChecker, MulChecker, ReduceMeanChecker, RQAddChecker, RQHardswishChecker, SliceChecker, SoftmaxChecker, \
TransposeChecker, iLayerNormChecker
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling
Expand Down Expand Up @@ -196,6 +197,13 @@
ForkTransformer) for type1, type2 in zip([int8_t, uint8_t, int8_t, uint8_t], [int8_t, uint8_t, uint8_t, int8_t])
]

PULPFloatGEMMBindings = [
NodeBinding(
GEMMChecker([PointerClass(float32_t), PointerClass(float32_t),
PointerClass(float32_t)], [PointerClass(float32_t)]), FloatGemmTemplate.referenceTemplate,
ForkTransformer)
]

PULPRQSMatrixVecBindings = [
NodeBinding(
PULPLinearChecker([PointerClass(type1),
Expand Down
29 changes: 16 additions & 13 deletions Deeploy/Targets/PULPOpen/Platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@
from Deeploy.Targets.CortexM.Parsers import CMSISMaxPool2DParser
from Deeploy.Targets.Generic.Bindings import BasicGatherBindings, BasicPad1DBindings, BasicPad2DBindings, \
BasicReshapeBindings, BasicRQIntegerDivBinding
from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, GatherLayer, MatMulLayer, MaxPoolLayer, MulLayer, \
PadLayer, ReduceMeanLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, RQSiHardswishLayer, \
SliceLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer, iSoftmaxLayer
from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, FlattenParser, GatherParser, MatMulParser, \
MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, RequantShiftParser, ReshapeParser, RQAddParser, \
RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SliceParser, TransposeParser, UniformRequantShiftParser, \
UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser
from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, GatherLayer, GEMMLayer, MatMulLayer, MaxPoolLayer, \
MulLayer, PadLayer, ReduceMeanLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \
RQSiHardswishLayer, SliceLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer, iSoftmaxLayer
from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, FlattenParser, GatherParser, GEMMParser, \
MatMulParser, MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, RequantShiftParser, ReshapeParser, \
RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SliceParser, TransposeParser, \
UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser
from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate
from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import IntegerDivRequantMergePass, \
MergeConstAddAndRequantPass, MergeTrueIntegerDivRequantShiftPass, RQSSplitPass, SkipEmptyConcatPass, \
Expand All @@ -53,12 +53,13 @@
PULPDWConv2DParser, PULPGEMMParser, PULPMatrixVecParser, PULPTallGEMMParser
from Deeploy.Targets.PULPOpen.Templates import AllocateTemplate, FreeTemplate
from Deeploy.Targets.PULPOpen.Tiler import PULPAddTilingReadyBindings, PULPConcatTilingReadyBindings, \
PULPFlattenTilingReadyBindings, PULPiHardswishTilingReadyBindings, PULPiRMSNormTilingReadyBindings, \
PULPiRQSGELUTilingReadyBindings, PULPiSoftmaxTilingReadyBindings, PULPMatMulTilingReadyBindings, \
PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, PULPRQAddTilingReadyBindings, \
PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, PULPRQSGEMMTilingReadyBindings, \
PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, \
PULPRQSTilingReadyBindings, PULPTransposeTilingReadyBindings, PULPUniformRQSTilingReadyBindings
PULPFlattenTilingReadyBindings, PULPFPGEMMTilingReadyBindings, PULPiHardswishTilingReadyBindings, \
PULPiRMSNormTilingReadyBindings, PULPiRQSGELUTilingReadyBindings, PULPiSoftmaxTilingReadyBindings, \
PULPMatMulTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, \
PULPRQAddTilingReadyBindings, PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, \
PULPRQSGEMMTilingReadyBindings, PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, \
PULPRQSTallGEMMTilingReadyBindings, PULPRQSTilingReadyBindings, PULPTransposeTilingReadyBindings, \
PULPUniformRQSTilingReadyBindings
from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import PULPAddRequantMergePass, \
PULPConvRequantMergePass, PULPGEMMRequantMergePass, PULPMatMulRequantMergePass

Expand Down Expand Up @@ -87,6 +88,7 @@
Conv2DMapper = NodeMapper(PULPConv2DParser(), PULPRQSConv2DTilingReadyBindings)
DWConv2DMapper = NodeMapper(PULPDWConv2DParser(), PULPRQSDWConv2DTilingReadyBindings)
GEMMMapper = NodeMapper(PULPGEMMParser(), PULPRQSGEMMTilingReadyBindings)
FloatGEMMMapper = NodeMapper(GEMMParser(), PULPFPGEMMTilingReadyBindings)
MatrixVecMapper = NodeMapper(PULPMatrixVecParser(), PULPRQSMatrixVecTilingReadyBindings)
TallGEMMMapper = NodeMapper(PULPTallGEMMParser(), PULPRQSTallGEMMTilingReadyBindings)
MaxPool2DMapper = NodeMapper(CMSISMaxPool2DParser(), PULPMaxPool2DTilingReadyBindings)
Expand All @@ -104,6 +106,7 @@
PULPMapping = {
'RequantizedConv': PULPRQSConvLayer([Conv2DMapper, DWConv2DMapper, Conv1DMapper, DWConv1DMapper]),
'RequantizedGemm': PULPRQSGEMMLayer([MatrixVecMapper, TallGEMMMapper, GEMMMapper]),
'Gemm': GEMMLayer([FloatGEMMMapper]),
'MaxPool': MaxPoolLayer([MaxPool2DMapper]),
'RequantizediGELU': RQSiGELULayer([RQGELU_int8_Mapper]),
'RQIntegerDiv': RQIntegerDivLayer([RQIntegerDivMapper]),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ def alignToContext(self, ctxt: NetworkContext,
inSignage = "s" if signedI else "u"
outSignage = "s" if signedO else "u"
mul_intimmediate = int(mul_immediate)
add_intimmediate = int(add_immediate)
mul_int_immediate = int(mul_immediate)
add_int_immediate = int(add_immediate)
%>
// UniformRequantShift (Name: ${nodeName}, Op: ${nodeOp})
UniformRequantShift_${inSignage}${data_in_type.referencedType.typeWidth}_${outSignage}${data_out_type.referencedType.typeWidth}(${data_in}, ${size}, ${mul_intimmediate}, ${add_intimmediate}, ${data_out}, ${log2Dstring}, ${channel_width}, 0, 0 , ${output_min}, ${output_max}, 1);
UniformRequantShift_${inSignage}${data_in_type.referencedType.typeWidth}_${outSignage}${data_out_type.referencedType.typeWidth}(${data_in}, ${size}, ${mul_int_immediate}, ${add_int_immediate}, ${data_out}, ${log2Dstring}, ${channel_width}, 0, 0 , ${output_min}, ${output_max}, 1);
""")
144 changes: 144 additions & 0 deletions Deeploy/Targets/PULPOpen/TileConstraints/GEMMTileConstraint.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,147 @@ def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkCo
tm = GEMMTileConstraint.addPolicyConstraint(tilerModel, parseDict, ctxt)

return tm


class FloatGEMMTileConstraint(TileConstraint):

@staticmethod
def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel:

# Get to-be-tiled tensor's buffers
bufferA = ctxt.lookup(name = parseDict['A'])
bufferB = ctxt.lookup(name = parseDict['B'])
bufferC = ctxt.lookup(name = parseDict['C'])
outputBuffer = ctxt.lookup(name = parseDict['data_out'])

# Add I/O dimensions to the model as variables
for bufferName in [bufferA.name, bufferB.name, bufferC.name, outputBuffer.name]:
tilerModel.addTensorDimToModel(ctxt, bufferName)

dimOffsetA = len(bufferA.shape) - 2
dimOffsetB = len(bufferB.shape) - 2
dimOffsetOut = len(outputBuffer.shape) - 2

AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = dimOffsetA + parseDict['transA'])
ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name,
dimIdx = dimOffsetA + 1 - parseDict['transA'])
BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = dimOffsetB + parseDict['transB'])
BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name,
dimIdx = dimOffsetB + 1 - parseDict['transB'])
outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = dimOffsetOut)
outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = dimOffsetOut + 1)

# Map output dims to inputs dims
tilerModel.addConstraint(outputFirstDimVar == AFirstDimVar)
tilerModel.addConstraint(outputSecondDimVar == BSecondDimVar)

# Add GEMM Geometrical constraints
tilerModel.addConstraint(ASecondDimVar == BFirstDimVar)

addDimVar_1 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = 0)
addDimVar_2 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = 1)
tilerModel.addConstraint(outputFirstDimVar == addDimVar_1)
tilerModel.addConstraint(outputSecondDimVar == addDimVar_2)

return tilerModel

@staticmethod
def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel:

bufferA = ctxt.lookup(name = parseDict['A'])
bufferB = ctxt.lookup(name = parseDict['B'])

dimOffsetA = len(bufferA.shape) - 2
dimOffsetB = len(bufferB.shape) - 2

AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = dimOffsetA + parseDict['transA'])

ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name,
dimIdx = dimOffsetA + 1 - parseDict['transA'])
BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = dimOffsetB + parseDict['transB'])
BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name,
dimIdx = dimOffsetB + 1 - parseDict['transB'])

# VIC: We don't want to deal with intermediate results between kernel calls
tilerModel.addConstraint(ASecondDimVar == parseDict['N'])
tilerModel.addConstraint(BFirstDimVar == parseDict['N'])

if (parseDict["O"] >= 16):
# modulus = tilerModel.addMinTileSizeConstraint(parseDict, 'O', BSecondDimVar, 8, prefix="8_")
modulus = tilerModel.addTileSizeDivisibleConstraint(parseDict, 'O', BSecondDimVar, 16, prefix = "16_")

return tilerModel

@classmethod
def serializeTilingSolution(
cls, tilingSolution: NodeMemoryConstraint, absoluteOutputCubes: List[AbsoluteHyperRectangle],
targetMemLevel: str, ctxt: NetworkContext,
operatorRepresentation: OperatorRepresentation) -> Tuple[VariableReplacementScheme, TilingSchedule]:
outputCubes = [cube.rectangle for cube in absoluteOutputCubes]

addrNames = ['A', 'B', 'C', 'data_out']
inputBaseOffsets, outputBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel,
operatorRepresentation, addrNames)
varA = operatorRepresentation['A']

NSize = ctxt.lookup(varA).shape[-1]
NOffset = 0

inputACubes = []
inputBCubes = []
inputAddCubes = []

replacements = {"M": [], "O": [], "batch": []}

# Every output is constructed by a pair of inputs. Reconstruct this pair.
for cube in outputCubes:

BSize = 1
BOffset = 0
BatchSize = 1
BatchOffset = 0

if len(cube.offset) == 2:
(MOffset, OOffset) = cube.offset
(MSize, OSize) = cube.dims
elif len(cube.offset) == 3:
(BatchOffset, MOffset, OOffset) = cube.offset
(BatchSize, MSize, OSize) = cube.dims
else:
(BatchOffset, BOffset, MOffset, OOffset) = cube.offset
(BatchSize, BSize, MSize, OSize) = cube.dims

replacements["M"].append(MSize)
replacements["O"].append(OSize)
replacements["batch"].append(BSize)

ACube = HyperRectangle((BatchOffset, BOffset, MOffset, NOffset), (BatchSize, BSize, MSize, NSize))
BCube = HyperRectangle((BatchOffset, BOffset, NOffset, OOffset), (BatchSize, BSize, NSize, OSize))

CCube = HyperRectangle(cube.offset, cube.dims)

inputACubes.append(ACube)
inputBCubes.append(BCube)
inputAddCubes.append(CCube)

inputLoadSchedule = []
outputLoadSchedule = []

replacements["N"] = [NSize] * len(outputCubes)

replacementTypes = {
"M": PointerClass(uint16_t),
"N": PointerClass(uint16_t),
"O": PointerClass(uint16_t),
"batch": PointerClass(uint8_t)
}

for a, b, c in zip(inputACubes, inputBCubes, inputAddCubes):
inputLoadSchedule.append({"A": a, "B": b, "C": c})

for out in outputCubes:
outputLoadSchedule.append({"data_out": out})

schedule = TilingSchedule(inputBaseOffsets, outputBaseOffsets, inputLoadSchedule, outputLoadSchedule)

return VariableReplacementScheme(replacements, replacementTypes), schedule
17 changes: 10 additions & 7 deletions Deeploy/Targets/PULPOpen/Tiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@
from Deeploy.Targets.Generic.TileConstraints.TransposeTileConstraint import TransposeTileConstraint
from Deeploy.Targets.Generic.TileConstraints.UnaryTileConstraint import UnaryTileConstraint
from Deeploy.Targets.Generic.TileConstraints.UntiledTileConstraint import UntiledTileConstraint
from Deeploy.Targets.PULPOpen.Bindings import PULPConcatBindings, PULPiHardswishBindings, PULPiRMSNormBindings, \
PULPiRQSGELUBindings, PULPMatMulBinding, PULPMaxPool2DBindings, PULPMulBindings, PULPRQAddBindings, \
PULPRQSBindings, PULPRQSConv2DBindings, PULPRQSDWConv2DBindings, PULPRQSGEMMBindings, PULPRQSiHardswishBindings, \
PULPRQSMatrixVecBindings, PULPRQSTallGEMMBindings, PULPSoftmaxBindings, PULPTransposeBindings, \
PULPUniformRQSBindings, SimpleTransformer
from Deeploy.Targets.PULPOpen.Bindings import PULPConcatBindings, PULPFloatGEMMBindings, PULPiHardswishBindings, \
PULPiRMSNormBindings, PULPiRQSGELUBindings, PULPMatMulBinding, PULPMaxPool2DBindings, PULPMulBindings, \
PULPRQAddBindings, PULPRQSBindings, PULPRQSConv2DBindings, PULPRQSDWConv2DBindings, PULPRQSGEMMBindings, \
PULPRQSiHardswishBindings, PULPRQSMatrixVecBindings, PULPRQSTallGEMMBindings, PULPSoftmaxBindings, \
PULPTransposeBindings, PULPUniformRQSBindings, SimpleTransformer
from Deeploy.Targets.PULPOpen.TileConstraints.ConvTileConstraint import Conv2DTileConstraint
from Deeploy.Targets.PULPOpen.TileConstraints.DWConvTileConstraint import DWConv2DTileConstraint
from Deeploy.Targets.PULPOpen.TileConstraints.GEMMTileConstraint import GEMMTileConstraint, MatrixVecTileConstraint, \
TallGEMMTileConstraint
from Deeploy.Targets.PULPOpen.TileConstraints.GEMMTileConstraint import FloatGEMMTileConstraint, GEMMTileConstraint, \
MatrixVecTileConstraint, TallGEMMTileConstraint
from Deeploy.Targets.PULPOpen.TileConstraints.iSoftmaxTileConstraint import iSoftmaxTileConstraint
from Deeploy.Targets.PULPOpen.TileConstraints.MatMulTileConstraint import MatMulTileConstraint
from Deeploy.Targets.PULPOpen.TileConstraints.MaxPoolTileConstraint import MaxPoolTileConstraint
Expand All @@ -64,6 +64,9 @@
PULPRQSGEMMTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPRQSGEMMBindings,
tileConstraint = GEMMTileConstraint())

PULPFPGEMMTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPFloatGEMMBindings,
tileConstraint = FloatGEMMTileConstraint())

PULPRQSMatrixVecTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPRQSMatrixVecBindings,
tileConstraint = MatrixVecTileConstraint())

Expand Down
23 changes: 10 additions & 13 deletions DeeployTest/Platforms/Siracusa/src/deeploytest.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ void main(void) {
#ifndef CI
printf("Output:\r\n");
#endif
int32_t diff, tot_err, tot_tested;
float32_t diff, expected_float, actual_float;
uint32_t tot_err, tot_tested;
tot_err = 0;
tot_tested = 0;
char *compbuf;
Expand All @@ -102,22 +103,18 @@ void main(void) {
compbuf = DeeployNetwork_outputs[buf];
}

for (int i = 0; i < DeeployNetwork_outputs_bytes[buf]; i++) {
diff = ((char *)testOutputVector[buf])[i] - ((char *)compbuf)[i];
for (int i = 0; i < DeeployNetwork_outputs_bytes[buf] / sizeof(float32_t); i++) {
tot_tested++;
if (diff) {
expected_float = ((float32_t *)testOutputVector[buf])[i];
actual_float = ((float32_t *)compbuf)[i];
diff = expected_float - actual_float;
if (diff < -1e-5 || diff > 1e-5) {
tot_err += 1;
#ifndef CI
printf("Expected: %i\t\t", ((int8_t *)testOutputVector[buf])[i]);
printf("Actual: %i \t\t", ((int8_t *)compbuf)[i]);
printf("Expected: %f\t\t", expected_float);
printf("Actual: %f \t\t", actual_float);
printf("Diff: %f at Index %u \r\n", diff, i);
#endif
#ifndef CI
printf("Diff: %i at Index %u \r\n", diff, i);
#endif
} else {
/* #ifndef CI */
/* printf("\r\n"); */
/* #endif */
}
}
if (DeeployNetwork_outputs[buf] < 0x1000000) {
Expand Down
Binary file modified DeeployTest/Tests/testFloatGEMM/inputs.npz
Binary file not shown.
Binary file modified DeeployTest/Tests/testFloatGEMM/network.onnx
Binary file not shown.
Binary file modified DeeployTest/Tests/testFloatGEMM/outputs.npz
Binary file not shown.
Loading

0 comments on commit 7344dab

Please sign in to comment.