diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 420aa9d7..58a05335 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -200,6 +200,7 @@ jobs: RQHardswish testBacktracking FloatAdder + testFloatGEMM num-cores: 8 siracusa-models: @@ -251,6 +252,10 @@ jobs: { "name": "RQHardswish", "L1": [750] + }, + { + "name": "testFloatGEMM", + "L1": [8000] } ] num-cores: 8 @@ -291,6 +296,10 @@ jobs: { "name": "RQHardswish", "L1": [750] + }, + { + "name": "testFloatGEMM", + "L1": [8000] } ] num-cores: 8 diff --git a/CHANGELOG.md b/CHANGELOG.md index 4df97d14..42f41c4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -69,9 +69,15 @@ - Add the RTL library to the snitch_cluster build process in the Makefile, required for GVSOC simulation -## Add Float Support & Float GEMM for Generic +## Add Float Support & Float GEMM for Generic and PULP + +### Added - Float Support for Constbuffer -- Float GEMM on Generic -- Added FP GEMM to CI -- Fixed Float bug on Testslice, CMSIS TestUtil, DivInterger -- Fixed AbstractDatayType Float Bugs +- Simple Float GEMM on Generic and Pulp +- FP GEMM to CI +- FP GEMM Tiling on PULP + +### Fixed +- Float bug on Testslice, CMSIS TestUtil, DivInterger +- AbstractDatayType Float Bugs + diff --git a/Deeploy/Targets/PULPOpen/Bindings.py b/Deeploy/Targets/PULPOpen/Bindings.py index cb7515bc..ac61768f 100644 --- a/Deeploy/Targets/PULPOpen/Bindings.py +++ b/Deeploy/Targets/PULPOpen/Bindings.py @@ -32,14 +32,15 @@ from Deeploy.CommonExtensions.CodeTransformationPasses.Closure import ClosureGeneration, MemoryAwareClosureGeneration from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \ MemoryManagementGeneration -from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, int8_t, int32_t, uint8_t +from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, float32_t, int8_t, int32_t, \ + uint8_t from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeTemplate from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration -from Deeploy.Targets.Generic.Templates import ConcatTemplate, RQSiGELUTemplate, iHardswishTemplate -from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, GELUChecker, HardswishChecker, MatMulChecker, \ - MulChecker, ReduceMeanChecker, RQAddChecker, RQHardswishChecker, SliceChecker, SoftmaxChecker, TransposeChecker, \ - iLayerNormChecker +from Deeploy.Targets.Generic.Templates import ConcatTemplate, FloatGemmTemplate, RQSiGELUTemplate, iHardswishTemplate +from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, GELUChecker, GEMMChecker, HardswishChecker, \ + MatMulChecker, MulChecker, ReduceMeanChecker, RQAddChecker, RQHardswishChecker, SliceChecker, SoftmaxChecker, \ + TransposeChecker, iLayerNormChecker from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling @@ -196,6 +197,13 @@ ForkTransformer) for type1, type2 in zip([int8_t, uint8_t, int8_t, uint8_t], [int8_t, uint8_t, uint8_t, int8_t]) ] +PULPFloatGEMMBindings = [ + NodeBinding( + GEMMChecker([PointerClass(float32_t), PointerClass(float32_t), + PointerClass(float32_t)], [PointerClass(float32_t)]), FloatGemmTemplate.referenceTemplate, + ForkTransformer) +] + PULPRQSMatrixVecBindings = [ NodeBinding( PULPLinearChecker([PointerClass(type1), diff --git a/Deeploy/Targets/PULPOpen/Platform.py b/Deeploy/Targets/PULPOpen/Platform.py index bac2d823..4c7cda84 100644 --- a/Deeploy/Targets/PULPOpen/Platform.py +++ b/Deeploy/Targets/PULPOpen/Platform.py @@ -35,13 +35,13 @@ from Deeploy.Targets.CortexM.Parsers import CMSISMaxPool2DParser from Deeploy.Targets.Generic.Bindings import BasicGatherBindings, BasicPad1DBindings, BasicPad2DBindings, \ BasicReshapeBindings, BasicRQIntegerDivBinding -from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, GatherLayer, MatMulLayer, MaxPoolLayer, MulLayer, \ - PadLayer, ReduceMeanLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, RQSiHardswishLayer, \ - SliceLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer, iSoftmaxLayer -from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, FlattenParser, GatherParser, MatMulParser, \ - MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, RequantShiftParser, ReshapeParser, RQAddParser, \ - RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SliceParser, TransposeParser, UniformRequantShiftParser, \ - UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser +from Deeploy.Targets.Generic.Layers import AddLayer, ConcatLayer, GatherLayer, GEMMLayer, MatMulLayer, MaxPoolLayer, \ + MulLayer, PadLayer, ReduceMeanLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \ + RQSiHardswishLayer, SliceLayer, TransposeLayer, iHardswishLayer, iRMSNormLayer, iSoftmaxLayer +from Deeploy.Targets.Generic.Parsers import AddParser, ConcatParser, FlattenParser, GatherParser, GEMMParser, \ + MatMulParser, MulParser, Pad1DParser, Pad2DParser, ReduceMeanParser, RequantShiftParser, ReshapeParser, \ + RQAddParser, RQIntegerDivParser, RQSiGELUParser, RQSiHardswishParser, SliceParser, TransposeParser, \ + UniformRequantShiftParser, UnsqueezeParser, iHardswishParser, iRMSNormParser, iSoftmaxParser from Deeploy.Targets.Generic.Templates import AllocateTemplate as BasicAllocateTemplate from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import IntegerDivRequantMergePass, \ MergeConstAddAndRequantPass, MergeTrueIntegerDivRequantShiftPass, RQSSplitPass, SkipEmptyConcatPass, \ @@ -53,12 +53,13 @@ PULPDWConv2DParser, PULPGEMMParser, PULPMatrixVecParser, PULPTallGEMMParser from Deeploy.Targets.PULPOpen.Templates import AllocateTemplate, FreeTemplate from Deeploy.Targets.PULPOpen.Tiler import PULPAddTilingReadyBindings, PULPConcatTilingReadyBindings, \ - PULPFlattenTilingReadyBindings, PULPiHardswishTilingReadyBindings, PULPiRMSNormTilingReadyBindings, \ - PULPiRQSGELUTilingReadyBindings, PULPiSoftmaxTilingReadyBindings, PULPMatMulTilingReadyBindings, \ - PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, PULPRQAddTilingReadyBindings, \ - PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, PULPRQSGEMMTilingReadyBindings, \ - PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, \ - PULPRQSTilingReadyBindings, PULPTransposeTilingReadyBindings, PULPUniformRQSTilingReadyBindings + PULPFlattenTilingReadyBindings, PULPFPGEMMTilingReadyBindings, PULPiHardswishTilingReadyBindings, \ + PULPiRMSNormTilingReadyBindings, PULPiRQSGELUTilingReadyBindings, PULPiSoftmaxTilingReadyBindings, \ + PULPMatMulTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, \ + PULPRQAddTilingReadyBindings, PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, \ + PULPRQSGEMMTilingReadyBindings, PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, \ + PULPRQSTallGEMMTilingReadyBindings, PULPRQSTilingReadyBindings, PULPTransposeTilingReadyBindings, \ + PULPUniformRQSTilingReadyBindings from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import PULPAddRequantMergePass, \ PULPConvRequantMergePass, PULPGEMMRequantMergePass, PULPMatMulRequantMergePass @@ -87,6 +88,7 @@ Conv2DMapper = NodeMapper(PULPConv2DParser(), PULPRQSConv2DTilingReadyBindings) DWConv2DMapper = NodeMapper(PULPDWConv2DParser(), PULPRQSDWConv2DTilingReadyBindings) GEMMMapper = NodeMapper(PULPGEMMParser(), PULPRQSGEMMTilingReadyBindings) +FloatGEMMMapper = NodeMapper(GEMMParser(), PULPFPGEMMTilingReadyBindings) MatrixVecMapper = NodeMapper(PULPMatrixVecParser(), PULPRQSMatrixVecTilingReadyBindings) TallGEMMMapper = NodeMapper(PULPTallGEMMParser(), PULPRQSTallGEMMTilingReadyBindings) MaxPool2DMapper = NodeMapper(CMSISMaxPool2DParser(), PULPMaxPool2DTilingReadyBindings) @@ -104,6 +106,7 @@ PULPMapping = { 'RequantizedConv': PULPRQSConvLayer([Conv2DMapper, DWConv2DMapper, Conv1DMapper, DWConv1DMapper]), 'RequantizedGemm': PULPRQSGEMMLayer([MatrixVecMapper, TallGEMMMapper, GEMMMapper]), + 'Gemm': GEMMLayer([FloatGEMMMapper]), 'MaxPool': MaxPoolLayer([MaxPool2DMapper]), 'RequantizediGELU': RQSiGELULayer([RQGELU_int8_Mapper]), 'RQIntegerDiv': RQIntegerDivLayer([RQIntegerDivMapper]), diff --git a/Deeploy/Targets/PULPOpen/Templates/UniformRequantShiftTemplate.py b/Deeploy/Targets/PULPOpen/Templates/UniformRequantShiftTemplate.py index 9592b0d3..4cfd3d6f 100644 --- a/Deeploy/Targets/PULPOpen/Templates/UniformRequantShiftTemplate.py +++ b/Deeploy/Targets/PULPOpen/Templates/UniformRequantShiftTemplate.py @@ -77,10 +77,10 @@ def alignToContext(self, ctxt: NetworkContext, inSignage = "s" if signedI else "u" outSignage = "s" if signedO else "u" -mul_intimmediate = int(mul_immediate) -add_intimmediate = int(add_immediate) +mul_int_immediate = int(mul_immediate) +add_int_immediate = int(add_immediate) %> // UniformRequantShift (Name: ${nodeName}, Op: ${nodeOp}) -UniformRequantShift_${inSignage}${data_in_type.referencedType.typeWidth}_${outSignage}${data_out_type.referencedType.typeWidth}(${data_in}, ${size}, ${mul_intimmediate}, ${add_intimmediate}, ${data_out}, ${log2Dstring}, ${channel_width}, 0, 0 , ${output_min}, ${output_max}, 1); +UniformRequantShift_${inSignage}${data_in_type.referencedType.typeWidth}_${outSignage}${data_out_type.referencedType.typeWidth}(${data_in}, ${size}, ${mul_int_immediate}, ${add_int_immediate}, ${data_out}, ${log2Dstring}, ${channel_width}, 0, 0 , ${output_min}, ${output_max}, 1); """) diff --git a/Deeploy/Targets/PULPOpen/TileConstraints/GEMMTileConstraint.py b/Deeploy/Targets/PULPOpen/TileConstraints/GEMMTileConstraint.py index 437b6478..17d3c378 100644 --- a/Deeploy/Targets/PULPOpen/TileConstraints/GEMMTileConstraint.py +++ b/Deeploy/Targets/PULPOpen/TileConstraints/GEMMTileConstraint.py @@ -216,3 +216,147 @@ def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkCo tm = GEMMTileConstraint.addPolicyConstraint(tilerModel, parseDict, ctxt) return tm + + +class FloatGEMMTileConstraint(TileConstraint): + + @staticmethod + def addGeometricalConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: + + # Get to-be-tiled tensor's buffers + bufferA = ctxt.lookup(name = parseDict['A']) + bufferB = ctxt.lookup(name = parseDict['B']) + bufferC = ctxt.lookup(name = parseDict['C']) + outputBuffer = ctxt.lookup(name = parseDict['data_out']) + + # Add I/O dimensions to the model as variables + for bufferName in [bufferA.name, bufferB.name, bufferC.name, outputBuffer.name]: + tilerModel.addTensorDimToModel(ctxt, bufferName) + + dimOffsetA = len(bufferA.shape) - 2 + dimOffsetB = len(bufferB.shape) - 2 + dimOffsetOut = len(outputBuffer.shape) - 2 + + AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = dimOffsetA + parseDict['transA']) + ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, + dimIdx = dimOffsetA + 1 - parseDict['transA']) + BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = dimOffsetB + parseDict['transB']) + BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, + dimIdx = dimOffsetB + 1 - parseDict['transB']) + outputFirstDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = dimOffsetOut) + outputSecondDimVar = tilerModel.getTensorDimVar(tensorName = outputBuffer.name, dimIdx = dimOffsetOut + 1) + + # Map output dims to inputs dims + tilerModel.addConstraint(outputFirstDimVar == AFirstDimVar) + tilerModel.addConstraint(outputSecondDimVar == BSecondDimVar) + + # Add GEMM Geometrical constraints + tilerModel.addConstraint(ASecondDimVar == BFirstDimVar) + + addDimVar_1 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = 0) + addDimVar_2 = tilerModel.getTensorDimVar(tensorName = bufferC.name, dimIdx = 1) + tilerModel.addConstraint(outputFirstDimVar == addDimVar_1) + tilerModel.addConstraint(outputSecondDimVar == addDimVar_2) + + return tilerModel + + @staticmethod + def addPolicyConstraint(tilerModel: TilerModel, parseDict: Dict, ctxt: NetworkContext) -> TilerModel: + + bufferA = ctxt.lookup(name = parseDict['A']) + bufferB = ctxt.lookup(name = parseDict['B']) + + dimOffsetA = len(bufferA.shape) - 2 + dimOffsetB = len(bufferB.shape) - 2 + + AFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, dimIdx = dimOffsetA + parseDict['transA']) + + ASecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferA.name, + dimIdx = dimOffsetA + 1 - parseDict['transA']) + BFirstDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, dimIdx = dimOffsetB + parseDict['transB']) + BSecondDimVar = tilerModel.getTensorDimVar(tensorName = bufferB.name, + dimIdx = dimOffsetB + 1 - parseDict['transB']) + + # VIC: We don't want to deal with intermediate results between kernel calls + tilerModel.addConstraint(ASecondDimVar == parseDict['N']) + tilerModel.addConstraint(BFirstDimVar == parseDict['N']) + + if (parseDict["O"] >= 16): + # modulus = tilerModel.addMinTileSizeConstraint(parseDict, 'O', BSecondDimVar, 8, prefix="8_") + modulus = tilerModel.addTileSizeDivisibleConstraint(parseDict, 'O', BSecondDimVar, 16, prefix = "16_") + + return tilerModel + + @classmethod + def serializeTilingSolution( + cls, tilingSolution: NodeMemoryConstraint, absoluteOutputCubes: List[AbsoluteHyperRectangle], + targetMemLevel: str, ctxt: NetworkContext, + operatorRepresentation: OperatorRepresentation) -> Tuple[VariableReplacementScheme, TilingSchedule]: + outputCubes = [cube.rectangle for cube in absoluteOutputCubes] + + addrNames = ['A', 'B', 'C', 'data_out'] + inputBaseOffsets, outputBaseOffsets = cls.extractBaseAddr(tilingSolution, targetMemLevel, + operatorRepresentation, addrNames) + varA = operatorRepresentation['A'] + + NSize = ctxt.lookup(varA).shape[-1] + NOffset = 0 + + inputACubes = [] + inputBCubes = [] + inputAddCubes = [] + + replacements = {"M": [], "O": [], "batch": []} + + # Every output is constructed by a pair of inputs. Reconstruct this pair. + for cube in outputCubes: + + BSize = 1 + BOffset = 0 + BatchSize = 1 + BatchOffset = 0 + + if len(cube.offset) == 2: + (MOffset, OOffset) = cube.offset + (MSize, OSize) = cube.dims + elif len(cube.offset) == 3: + (BatchOffset, MOffset, OOffset) = cube.offset + (BatchSize, MSize, OSize) = cube.dims + else: + (BatchOffset, BOffset, MOffset, OOffset) = cube.offset + (BatchSize, BSize, MSize, OSize) = cube.dims + + replacements["M"].append(MSize) + replacements["O"].append(OSize) + replacements["batch"].append(BSize) + + ACube = HyperRectangle((BatchOffset, BOffset, MOffset, NOffset), (BatchSize, BSize, MSize, NSize)) + BCube = HyperRectangle((BatchOffset, BOffset, NOffset, OOffset), (BatchSize, BSize, NSize, OSize)) + + CCube = HyperRectangle(cube.offset, cube.dims) + + inputACubes.append(ACube) + inputBCubes.append(BCube) + inputAddCubes.append(CCube) + + inputLoadSchedule = [] + outputLoadSchedule = [] + + replacements["N"] = [NSize] * len(outputCubes) + + replacementTypes = { + "M": PointerClass(uint16_t), + "N": PointerClass(uint16_t), + "O": PointerClass(uint16_t), + "batch": PointerClass(uint8_t) + } + + for a, b, c in zip(inputACubes, inputBCubes, inputAddCubes): + inputLoadSchedule.append({"A": a, "B": b, "C": c}) + + for out in outputCubes: + outputLoadSchedule.append({"data_out": out}) + + schedule = TilingSchedule(inputBaseOffsets, outputBaseOffsets, inputLoadSchedule, outputLoadSchedule) + + return VariableReplacementScheme(replacements, replacementTypes), schedule diff --git a/Deeploy/Targets/PULPOpen/Tiler.py b/Deeploy/Targets/PULPOpen/Tiler.py index 26fd2518..20c671f9 100644 --- a/Deeploy/Targets/PULPOpen/Tiler.py +++ b/Deeploy/Targets/PULPOpen/Tiler.py @@ -40,15 +40,15 @@ from Deeploy.Targets.Generic.TileConstraints.TransposeTileConstraint import TransposeTileConstraint from Deeploy.Targets.Generic.TileConstraints.UnaryTileConstraint import UnaryTileConstraint from Deeploy.Targets.Generic.TileConstraints.UntiledTileConstraint import UntiledTileConstraint -from Deeploy.Targets.PULPOpen.Bindings import PULPConcatBindings, PULPiHardswishBindings, PULPiRMSNormBindings, \ - PULPiRQSGELUBindings, PULPMatMulBinding, PULPMaxPool2DBindings, PULPMulBindings, PULPRQAddBindings, \ - PULPRQSBindings, PULPRQSConv2DBindings, PULPRQSDWConv2DBindings, PULPRQSGEMMBindings, PULPRQSiHardswishBindings, \ - PULPRQSMatrixVecBindings, PULPRQSTallGEMMBindings, PULPSoftmaxBindings, PULPTransposeBindings, \ - PULPUniformRQSBindings, SimpleTransformer +from Deeploy.Targets.PULPOpen.Bindings import PULPConcatBindings, PULPFloatGEMMBindings, PULPiHardswishBindings, \ + PULPiRMSNormBindings, PULPiRQSGELUBindings, PULPMatMulBinding, PULPMaxPool2DBindings, PULPMulBindings, \ + PULPRQAddBindings, PULPRQSBindings, PULPRQSConv2DBindings, PULPRQSDWConv2DBindings, PULPRQSGEMMBindings, \ + PULPRQSiHardswishBindings, PULPRQSMatrixVecBindings, PULPRQSTallGEMMBindings, PULPSoftmaxBindings, \ + PULPTransposeBindings, PULPUniformRQSBindings, SimpleTransformer from Deeploy.Targets.PULPOpen.TileConstraints.ConvTileConstraint import Conv2DTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.DWConvTileConstraint import DWConv2DTileConstraint -from Deeploy.Targets.PULPOpen.TileConstraints.GEMMTileConstraint import GEMMTileConstraint, MatrixVecTileConstraint, \ - TallGEMMTileConstraint +from Deeploy.Targets.PULPOpen.TileConstraints.GEMMTileConstraint import FloatGEMMTileConstraint, GEMMTileConstraint, \ + MatrixVecTileConstraint, TallGEMMTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.iSoftmaxTileConstraint import iSoftmaxTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.MatMulTileConstraint import MatMulTileConstraint from Deeploy.Targets.PULPOpen.TileConstraints.MaxPoolTileConstraint import MaxPoolTileConstraint @@ -64,6 +64,9 @@ PULPRQSGEMMTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPRQSGEMMBindings, tileConstraint = GEMMTileConstraint()) +PULPFPGEMMTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPFloatGEMMBindings, + tileConstraint = FloatGEMMTileConstraint()) + PULPRQSMatrixVecTilingReadyBindings = TilingReadyNodeBindings(nodeBindings = PULPRQSMatrixVecBindings, tileConstraint = MatrixVecTileConstraint()) diff --git a/DeeployTest/Platforms/Siracusa/src/deeploytest.c b/DeeployTest/Platforms/Siracusa/src/deeploytest.c index c111055a..98b7f501 100644 --- a/DeeployTest/Platforms/Siracusa/src/deeploytest.c +++ b/DeeployTest/Platforms/Siracusa/src/deeploytest.c @@ -88,7 +88,8 @@ void main(void) { #ifndef CI printf("Output:\r\n"); #endif - int32_t diff, tot_err, tot_tested; + float32_t diff, expected_float, actual_float; + uint32_t tot_err, tot_tested; tot_err = 0; tot_tested = 0; char *compbuf; @@ -102,22 +103,18 @@ void main(void) { compbuf = DeeployNetwork_outputs[buf]; } - for (int i = 0; i < DeeployNetwork_outputs_bytes[buf]; i++) { - diff = ((char *)testOutputVector[buf])[i] - ((char *)compbuf)[i]; + for (int i = 0; i < DeeployNetwork_outputs_bytes[buf] / sizeof(float32_t); i++) { tot_tested++; - if (diff) { + expected_float = ((float32_t *)testOutputVector[buf])[i]; + actual_float = ((float32_t *)compbuf)[i]; + diff = expected_float - actual_float; + if (diff < -1e-5 || diff > 1e-5) { tot_err += 1; #ifndef CI - printf("Expected: %i\t\t", ((int8_t *)testOutputVector[buf])[i]); - printf("Actual: %i \t\t", ((int8_t *)compbuf)[i]); + printf("Expected: %f\t\t", expected_float); + printf("Actual: %f \t\t", actual_float); + printf("Diff: %f at Index %u \r\n", diff, i); #endif -#ifndef CI - printf("Diff: %i at Index %u \r\n", diff, i); -#endif - } else { - /* #ifndef CI */ - /* printf("\r\n"); */ - /* #endif */ } } if (DeeployNetwork_outputs[buf] < 0x1000000) { diff --git a/DeeployTest/Tests/testFloatGEMM/inputs.npz b/DeeployTest/Tests/testFloatGEMM/inputs.npz index fae7083c..47c86bd2 100644 Binary files a/DeeployTest/Tests/testFloatGEMM/inputs.npz and b/DeeployTest/Tests/testFloatGEMM/inputs.npz differ diff --git a/DeeployTest/Tests/testFloatGEMM/network.onnx b/DeeployTest/Tests/testFloatGEMM/network.onnx index 371c5629..6ce10317 100644 Binary files a/DeeployTest/Tests/testFloatGEMM/network.onnx and b/DeeployTest/Tests/testFloatGEMM/network.onnx differ diff --git a/DeeployTest/Tests/testFloatGEMM/outputs.npz b/DeeployTest/Tests/testFloatGEMM/outputs.npz index 83c13f9d..4d48f842 100644 Binary files a/DeeployTest/Tests/testFloatGEMM/outputs.npz and b/DeeployTest/Tests/testFloatGEMM/outputs.npz differ diff --git a/DeeployTest/testMVP.py b/DeeployTest/testMVP.py index a2e19717..8bb61389 100644 --- a/DeeployTest/testMVP.py +++ b/DeeployTest/testMVP.py @@ -186,7 +186,7 @@ def setupDeployer(graph: gs.Graph, tensors = graph.tensors() # Load as int64 and infer types later - test_inputs = [inputs[x].reshape(-1).astype(np.int64) for x in inputs.files] + test_inputs = [inputs[x].reshape(-1).astype(np.float64) for x in inputs.files] platform, signProp = mapPlatform(args.platform) @@ -325,8 +325,8 @@ def setupDeployer(graph: gs.Graph, test_inputs, test_outputs, graph = generateDebugConfig(inputs, outputs, activations, graph) else: # Load as int64 and infer types later - test_inputs = [inputs[x].reshape(-1).astype(np.int64) for x in inputs.files] - test_outputs = [outputs[x].reshape(-1).astype(np.int64) for x in outputs.files] + test_inputs = [inputs[x].reshape(-1).astype(np.float64) for x in inputs.files] + test_outputs = [outputs[x].reshape(-1).astype(np.float64) for x in outputs.files] # WIESEP: Hack to get CI running because only one specific array is used if "WaveFormer" in args.dir: