From 24841e7a9fbc494b54920dcbe07678b8c0e138ca Mon Sep 17 00:00:00 2001 From: 946166920 <412384199@qq.com> Date: Tue, 20 Feb 2024 11:19:54 +0800 Subject: [PATCH] MNN_NPU IR op modify --- source/backend/hiai/backend/NPUBackend.cpp | 71 ++-- source/backend/hiai/backend/NPUBackend.hpp | 35 ++ .../backend/hiai/execution/NPUActivation.cpp | 85 ++++- .../backend/hiai/execution/NPUActivation.hpp | 1 + source/backend/hiai/execution/NPUArgMax.cpp | 2 +- .../backend/hiai/execution/NPUBatchMatMul.cpp | 93 +++++ .../backend/hiai/execution/NPUBatchMatMul.hpp | 28 ++ source/backend/hiai/execution/NPUBinary.cpp | 355 ++++++++---------- source/backend/hiai/execution/NPUBinary.hpp | 11 +- .../backend/hiai/execution/NPUBroadCastTo.cpp | 50 +++ .../backend/hiai/execution/NPUBroadCastTo.hpp | 27 ++ source/backend/hiai/execution/NPUCast.cpp | 48 +-- source/backend/hiai/execution/NPUConcat.cpp | 17 +- .../hiai/execution/NPUConvertTensor.cpp | 65 ++-- .../hiai/execution/NPUConvertTensor.hpp | 2 - .../backend/hiai/execution/NPUConvolution.cpp | 99 +++-- .../execution/NPUConvolutionDepthwise.cpp | 26 +- source/backend/hiai/execution/NPUCrop.cpp | 49 +++ source/backend/hiai/execution/NPUCrop.hpp | 25 ++ .../hiai/execution/NPUDeconvolution.cpp | 34 +- .../execution/NPUDeconvolutionDepthwise.cpp | 135 +++++++ .../execution/NPUDeconvolutionDepthwise.hpp | 32 ++ .../hiai/execution/NPUDepthToSpace.cpp | 15 +- source/backend/hiai/execution/NPUEltwise.cpp | 94 +++-- .../backend/hiai/execution/NPUExpandDims.cpp | 36 +- source/backend/hiai/execution/NPUFlatten.cpp | 36 ++ source/backend/hiai/execution/NPUFlatten.hpp | 25 ++ source/backend/hiai/execution/NPUGatherV2.cpp | 91 ++--- source/backend/hiai/execution/NPUInterp.cpp | 11 +- source/backend/hiai/execution/NPULRN.cpp | 50 +++ source/backend/hiai/execution/NPULRN.hpp | 25 ++ .../backend/hiai/execution/NPULayerNorm.cpp | 75 ++++ .../backend/hiai/execution/NPULayerNorm.hpp | 30 ++ source/backend/hiai/execution/NPUMatmul.cpp | 155 +++++--- source/backend/hiai/execution/NPUPack.cpp | 17 +- source/backend/hiai/execution/NPUPack.hpp | 2 - source/backend/hiai/execution/NPUPadding.cpp | 11 +- source/backend/hiai/execution/NPUPermute.cpp | 41 ++ source/backend/hiai/execution/NPUPermute.hpp | 28 ++ source/backend/hiai/execution/NPUPooling.cpp | 27 +- .../backend/hiai/execution/NPUPooling3D.cpp | 87 +++++ .../backend/hiai/execution/NPUPooling3D.hpp | 25 ++ .../backend/hiai/execution/NPUReduction.cpp | 51 ++- .../backend/hiai/execution/NPUReduction.hpp | 7 - source/backend/hiai/execution/NPUReshape.cpp | 49 +-- source/backend/hiai/execution/NPUScale.cpp | 31 +- source/backend/hiai/execution/NPUScale.hpp | 1 + source/backend/hiai/execution/NPUSlice.cpp | 24 +- source/backend/hiai/execution/NPUSlice.hpp | 4 - source/backend/hiai/execution/NPUSliceTf.cpp | 17 +- source/backend/hiai/execution/NPUSoftmax.cpp | 47 +-- source/backend/hiai/execution/NPUSoftmax.hpp | 3 - source/backend/hiai/execution/NPUSqueeze.cpp | 63 +++- .../hiai/execution/NPUStridedSlice.cpp | 121 +++--- .../hiai/execution/NPUStridedSlice.hpp | 2 + source/backend/hiai/execution/NPUTile.cpp | 51 +++ source/backend/hiai/execution/NPUTile.hpp | 27 ++ source/backend/hiai/execution/NPUTopKV2.cpp | 10 +- .../backend/hiai/execution/NPUTranspose.cpp | 29 +- source/backend/hiai/execution/NPUUnary.cpp | 43 ++- 60 files changed, 1913 insertions(+), 838 deletions(-) create mode 100644 source/backend/hiai/execution/NPUBatchMatMul.cpp create mode 100644 source/backend/hiai/execution/NPUBatchMatMul.hpp create mode 100644 source/backend/hiai/execution/NPUBroadCastTo.cpp create mode 100644 source/backend/hiai/execution/NPUBroadCastTo.hpp create mode 100644 source/backend/hiai/execution/NPUCrop.cpp create mode 100644 source/backend/hiai/execution/NPUCrop.hpp create mode 100644 source/backend/hiai/execution/NPUDeconvolutionDepthwise.cpp create mode 100644 source/backend/hiai/execution/NPUDeconvolutionDepthwise.hpp create mode 100644 source/backend/hiai/execution/NPUFlatten.cpp create mode 100644 source/backend/hiai/execution/NPUFlatten.hpp create mode 100644 source/backend/hiai/execution/NPULRN.cpp create mode 100644 source/backend/hiai/execution/NPULRN.hpp create mode 100644 source/backend/hiai/execution/NPULayerNorm.cpp create mode 100644 source/backend/hiai/execution/NPULayerNorm.hpp create mode 100644 source/backend/hiai/execution/NPUPermute.cpp create mode 100644 source/backend/hiai/execution/NPUPermute.hpp create mode 100644 source/backend/hiai/execution/NPUPooling3D.cpp create mode 100644 source/backend/hiai/execution/NPUPooling3D.hpp create mode 100644 source/backend/hiai/execution/NPUTile.cpp create mode 100644 source/backend/hiai/execution/NPUTile.hpp diff --git a/source/backend/hiai/backend/NPUBackend.cpp b/source/backend/hiai/backend/NPUBackend.cpp index cd4a0008a..1b4f45fca 100644 --- a/source/backend/hiai/backend/NPUBackend.cpp +++ b/source/backend/hiai/backend/NPUBackend.cpp @@ -231,8 +231,20 @@ namespace MNN { if (isInput && mGrapMap.find(inputIndex) == mGrapMap.end()) { auto opName = string("input") + to_string(inputIndex); shared_ptr data(new hiai::op::Data(opName)); - auto shape = tensorShapeFormat(inputTensor); - ge::TensorDesc desc(ge::Shape(shape), ge::FORMAT_NCHW, ge::DT_FLOAT); + vector dims; + for(int32_t i = 0; i < inputTensor->buffer().dimensions; i++) { + dims.push_back(inputTensor->buffer().dim[i].extent); + } + ge::TensorDesc desc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT); + if (TensorUtils::getDescribe(inputTensor)->dimensionFormat == MNN_DATA_FORMAT::MNN_DATA_FORMAT_NHWC) { + desc.SetFormat(ge::FORMAT_NHWC); + } + if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) { + desc.SetDataType(ge::DT_INT32); + } + if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) { + desc.SetDataType(ge::DT_INT64); + } data->update_input_desc_x(desc); // map vector, string>> ops; @@ -248,19 +260,25 @@ namespace MNN { shared_ptr mConst(new hiai::op::Const(opName)); { ge::TensorPtr filter = std::make_shared(); - auto shape = tensorShapeFormat(inputTensor); - ge::TensorDesc fdesc(ge::Shape(shape), ge::FORMAT_NCHW, ge::DT_FLOAT); - filter->SetTensorDesc(fdesc); - if (TensorUtils::getDescribe(inputTensor)->dimensionFormat == MNN::MNN_DATA_FORMAT_NCHW) { - filter->SetData((uint8_t *)inputTensor->host(), inputTensor->elementSize() * sizeof(float)); - mConst->set_attr_value(filter); - } else { - vector temp(inputTensor->elementSize(), 0); - NHWC2NCHW((float*)inputTensor->host(), (float*)temp.data(), shape[0], shape[1], shape[2]*shape[3]); - filter->SetData((uint8_t *)temp.data(), temp.size() * sizeof(float)); - mConst->set_attr_value(filter); + vector dims; + for(int32_t i = 0; i < inputTensor->buffer().dimensions; i++) { + dims.push_back(inputTensor->buffer().dim[i].extent); + } + ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT); + if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) { + fdesc.SetDataType(ge::DT_INT32); + } + if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) { + fdesc.SetDataType(ge::DT_INT64); } + filter->SetTensorDesc(fdesc); filter->SetData((uint8_t *)inputTensor->host(), inputTensor->elementSize() * sizeof(float)); + if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) { + filter->SetData((uint8_t *)inputTensor->host(), inputTensor->elementSize() * sizeof(int32_t)); + } + if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) { + filter->SetData((uint8_t *)inputTensor->host(), inputTensor->elementSize() * sizeof(int64_t)); + } mConst->set_attr_value(filter); } vector, string>> ops; @@ -339,14 +357,7 @@ namespace MNN { auto index = mInputMap.find((unsigned long)(const_cast(dstTensor))); MNN_ASSERT(index != mInputMap.end()); shared_ptr input = inputTensors[index->second]; - if(TensorUtils::getDescribe(srcTensor)->dimensionFormat == MNN_DATA_FORMAT_NCHW - ||TensorUtils::getDescribe(srcTensor)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4 ) { - memcpy(input->GetData(), srcTensor->host(), (size_t)input->GetSize()); - } else { - shared_ptr tmpTensor(new Tensor(dstTensor, Tensor::DimensionType::CAFFE, true)); - tensorConvert(srcTensor, tmpTensor.get()); - memcpy(input->GetData(), tmpTensor->host(), (size_t)tmpTensor->size()); - } + memcpy(input->GetData(), srcTensor->host(), (size_t)input->GetSize()); } else if(isOutputCopy){ int index; bool flag = false; @@ -361,18 +372,8 @@ namespace MNN { return; } shared_ptr output = outputTensors[index]; - if(TensorUtils::getDescribe(dstTensor)->dimensionFormat == MNN_DATA_FORMAT_NCHW - ||TensorUtils::getDescribe(dstTensor)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4 ) { - memcpy(dstTensor->buffer().host, output->GetData(), (size_t)output->GetSize()); - } else { - auto tmpShape = tensorShapeFormat(srcTensor); - vector srcShape = {(int)tmpShape[0],(int)tmpShape[1],(int)tmpShape[2],(int)tmpShape[3]}; - shared_ptr tmpTensor(Tensor::create(srcShape,halide_type_of(), - (void*)(output->GetData()), - Tensor::DimensionType::CAFFE)); - auto shape = output->GetTensorDesc(); - tensorConvert(tmpTensor.get(), dstTensor); - } + Tensor* tmpTensor = const_cast(dstTensor); + memcpy(tmpTensor->buffer().host, output->GetData(), (size_t)output->GetSize()); } #ifdef HIAI_DEBUG ATrace_endSection(); @@ -420,7 +421,7 @@ namespace MNN { model->SetGraph(graph); hiai::ModelBuildOptions buildOptions; - + buildOptions.formatMode = hiai::FormatMode::USE_ORIGIN; std::ifstream file("quant_param", std::ios::binary | std::ios::ate); if (!file.is_open()) { MNN_PRINT("no quant_param config file, build non-quantized model.\n"); @@ -507,7 +508,7 @@ namespace MNN { void NPUBackend::setOutputOps(const Op *op, vector>&& HIAI_op, const std::vector &outputs){ - if(op->type() == OpType_Slice){ + if(op->type() == OpType_Slice || op->type() == OpType_TopKV2){ for (size_t i = 0; i < op->outputIndexes()->size(); i++){ auto index = op->outputIndexes()->data()[i]; mSclipMap[index] = i; diff --git a/source/backend/hiai/backend/NPUBackend.hpp b/source/backend/hiai/backend/NPUBackend.hpp index 3c8df1db7..4ee14a513 100644 --- a/source/backend/hiai/backend/NPUBackend.hpp +++ b/source/backend/hiai/backend/NPUBackend.hpp @@ -43,6 +43,41 @@ namespace MNN { typedef void *(*fp_ATrace_endSection) (void); #endif void NHWC2NCHW(const float* source, float* dest, int b, int c, int area); + + static ge::DataType mapDataType(DataType src) { + ge::DataType retVal = ge::DataType::DT_UNDEFINED; + switch (src) { + case DataType_DT_FLOAT: + retVal = ge::DataType::DT_FLOAT; + break; + case DataType_DT_DOUBLE: + retVal = ge::DataType::DT_DOUBLE; + break; + case DataType_DT_INT32: + retVal = ge::DataType::DT_INT32; + break; + case DataType_DT_UINT8: + retVal = ge::DataType::DT_UINT8; + break; + case DataType_DT_INT16: + retVal = ge::DataType::DT_INT16; + break; + case DataType_DT_INT8: + retVal = ge::DataType::DT_INT8; + break; + case DataType_DT_INT64: + retVal = ge::DataType::DT_INT64; + break; + case DataType_DT_VARIANT: + retVal = ge::DataType::DT_FLOAT; + break; + default: + MNN_ASSERT(false); + printf("cast Datatype : %d \n", src); + break; + } + return retVal; + } inline std::vector tensorShapeFormat(const Tensor *input, const Tensor *broadCastInput=nullptr) { auto dimSize = input->buffer().dimensions; if(broadCastInput != nullptr) { diff --git a/source/backend/hiai/execution/NPUActivation.cpp b/source/backend/hiai/execution/NPUActivation.cpp index f5559d98a..14b3b8412 100644 --- a/source/backend/hiai/execution/NPUActivation.cpp +++ b/source/backend/hiai/execution/NPUActivation.cpp @@ -21,39 +21,84 @@ NPUActivation::NPUActivation(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); auto opName = mOp->name()->str(); - - - auto xOp = mNpuBackend->getInputOps(mOp); - - if(mType == 5){ - shared_ptr prelu(new hiai::op::PRelu(opName + "_prelu")); - auto slopePtr = mOp->main_as_PRelu()->slope()->data(); - auto slopeSize = mOp->main_as_PRelu()->slope()->size(); - - mConst_w = hiai::op::Const(opName + "_w_const"); - { - ge::TensorDesc fdesc(ge::Shape({1, slopeSize, 1, 1}), ge::FORMAT_NCHW, - ge::DT_FLOAT); // in o h w ? + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + xOp = iops.back().first; + if (mType == OpType_PReLU && mOp->main_as_PRelu()->slope() != nullptr) { + if (mOp->main_as_PRelu()->slope()->size() == 1) { + const float* slopePtr = mOp->main_as_PRelu()->slope()->data(); + shared_ptr relu(new hiai::op::Activation(opName + "_relu")); + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*relu).set_input_x(*xOp.get()); + } else { + (*relu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } + (*relu) + .set_attr_coef(.000000) + .set_attr_negative_slope(*slopePtr) + .set_attr_mode(mType); + mNpuBackend->setOutputOps(mOp, {relu}, outputs); + } else { + shared_ptr prelu(new hiai::op::PRelu(opName + "_prelu")); + auto slopePtr = mOp->main_as_PRelu()->slope()->data(); + auto slopeSize = mOp->main_as_PRelu()->slope()->size(); + mConst_w = hiai::op::Const(opName + "_w_const"); + ge::TensorDesc fdesc(ge::Shape({1, slopeSize, 1, 1}), ge::FORMAT_NCHW, ge::DT_FLOAT); ge::TensorPtr filter = std::make_shared(); filter->SetTensorDesc(fdesc); filter->SetData((uint8_t *)slopePtr, slopeSize * sizeof(float)); mConst_w.set_attr_value(filter); + if (inputs[0]->buffer().dimensions < 4) { + std::vector shape; + for (int32_t i = 0; i < inputs[0]->buffer().dimensions; i++) { + shape.push_back(inputs[0]->buffer().dim[i].extent); + } + for (int32_t i = inputs[0]->buffer().dimensions; i < 4; i++) { + shape.push_back(1); + } + shapeConst = hiai::op::Const(opName +"_reshapeConst"); + { + ge::TensorDesc fdesc(ge::Shape({static_cast(shape.size())}), ge::FORMAT_NCHW, ge::DT_INT32); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)shape.data(), shape.size() * sizeof(int32_t)); + shapeConst.set_attr_value(filter); + } + shared_ptr reshape(new hiai::op::Reshape(opName + "_reshape")); + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*reshape).set_input_x(*xOp.get()); + } else { + (*reshape).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } + (*reshape).set_input_shape(shapeConst); + (*prelu).set_input_x(*reshape.get()).set_input_weight(mConst_w); + mNpuBackend->setOutputOps(mOp, {reshape, prelu}, outputs); + } else { + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*prelu).set_input_x(*xOp.get()); + } else { + (*prelu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } + (*prelu).set_input_weight(mConst_w); + mNpuBackend->setOutputOps(mOp, {prelu}, outputs); + } } - - (*prelu) - .set_input_x(*xOp.get()).set_input_weight(mConst_w); - mNpuBackend->setOutputOps(mOp, {prelu}, outputs); }else{ float slope = 0.0; if (mOp->type() == OpType_ReLU) { slope = mOp->main_as_Relu()->slope(); - mType = 5; + if (slope != 0.0) { + mType = 5; + } } - shared_ptr relu(new hiai::op::Activation(opName + "_relu")); + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*relu).set_input_x(*xOp.get()); + } else { + (*relu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } (*relu) - .set_input_x(*xOp.get()) .set_attr_coef(.000000) .set_attr_negative_slope(slope) .set_attr_mode(mType); diff --git a/source/backend/hiai/execution/NPUActivation.hpp b/source/backend/hiai/execution/NPUActivation.hpp index 118e4bd40..9b9f137bc 100644 --- a/source/backend/hiai/execution/NPUActivation.hpp +++ b/source/backend/hiai/execution/NPUActivation.hpp @@ -20,6 +20,7 @@ class NPUActivation : public NPUCommonExecution { virtual ~NPUActivation() = default; private: hiai::op::Const mConst_w; + hiai::op::Const shapeConst; int mType; }; diff --git a/source/backend/hiai/execution/NPUArgMax.cpp b/source/backend/hiai/execution/NPUArgMax.cpp index a75ec3fc8..8cfda35ea 100644 --- a/source/backend/hiai/execution/NPUArgMax.cpp +++ b/source/backend/hiai/execution/NPUArgMax.cpp @@ -28,7 +28,7 @@ ErrorCode NPUArgMax::onResize(const std::vector &inputs, const std::ve // om input weight const op mConst_axis = hiai::op::Const(opName + "_w_const"); { - auto aixs = axisFormat(inputs[0], argMaxParam->axis()); + auto aixs = argMaxParam->axis(); ge::TensorDesc fdesc(ge::Shape({1}),ge::DT_INT32); ge::TensorPtr axis = std::make_shared(); axis->SetTensorDesc(fdesc); diff --git a/source/backend/hiai/execution/NPUBatchMatMul.cpp b/source/backend/hiai/execution/NPUBatchMatMul.cpp new file mode 100644 index 000000000..c593e70b7 --- /dev/null +++ b/source/backend/hiai/execution/NPUBatchMatMul.cpp @@ -0,0 +1,93 @@ +// +// NPUBatchMatMul.cpp +// MNN +// +// Created by MNN on b'2020/10/15'. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "NPUBatchMatMul.hpp" +#include "NPUBackend.hpp" + +using namespace std; + +namespace MNN { + +NPUBatchMatMul::NPUBatchMatMul(MNN::Backend *b, const MNN::Op *op, const std::vector &inputs, const std::vector &outputs) : NPUCommonExecution(b, op) { + auto opName = mOp->name()->str(); + + bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + + Tensor* input = nullptr; + if (isConst0 && !isConst1){ + input = inputs[0]; + } + if (!isConst0 && isConst1){ + input = inputs[1]; + } + if (input != nullptr) { + mConst = ge::op::Const(opName + "_w_const"); + ge::TensorPtr filter = std::make_shared(); + vector dims; + for (int32_t i = 0; i < input->buffer().dimensions; i++) { + dims.push_back(input->buffer().dim[i].extent); + } + ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT); + if (input->getType().code == halide_type_int && input->getType().bits == 32) { + fdesc.SetDataType(ge::DT_INT32); + filter->SetData((uint8_t *)input->host(), input->elementSize() * sizeof(int32_t)); + } else { + filter->SetData((uint8_t *)input->host(), input->elementSize() * sizeof(float)); + } + filter->SetTensorDesc(fdesc); + mConst.set_attr_value(filter); + } + +} + +ErrorCode NPUBatchMatMul::onResize(const std::vector &inputs, const std::vector &outputs) { + mNpuBackend->setNetworkInput(inputs, mOp); + auto opName = mOp->name()->str(); + bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + auto param = mOp->main_as_BatchMatMulParam(); + shared_ptr batchMatMul(new hiai::op::BatchMatMul(opName)); + if (isConst0 && !isConst1) { + auto inputIndex1 = mOp->inputIndexes()->data()[1]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; + auto xOp1 = iops1.back().first; + (*batchMatMul) + .set_input_x1(mConst) + .set_input_x2(*xOp1.get()) + .set_attr_adj_x1(param->adjX()) + .set_attr_adj_x2(param->adjY()); + } else if (!isConst0 && isConst1) { + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + auto xOp = iops.back().first; + (*batchMatMul) + .set_input_x1(*xOp.get()) + .set_input_x2(mConst) + .set_attr_adj_x1(param->adjX()) + .set_attr_adj_x2(param->adjY()); + } else { + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + auto xOp = iops.back().first; + auto inputIndex1 = mOp->inputIndexes()->data()[1]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; + auto xOp1 = iops1.back().first; + (*batchMatMul) + .set_input_x1(*xOp.get()) + .set_input_x2(*xOp1.get()) + .set_attr_adj_x1(param->adjX()) + .set_attr_adj_x2(param->adjY()); + } + mNpuBackend->setOutputOps(mOp, {batchMatMul}, outputs); + return NO_ERROR; +} + +NPUCreatorRegister> __BatchMatMul_op(OpType_BatchMatMul); + +} // namespace MNN \ No newline at end of file diff --git a/source/backend/hiai/execution/NPUBatchMatMul.hpp b/source/backend/hiai/execution/NPUBatchMatMul.hpp new file mode 100644 index 000000000..c44e480dc --- /dev/null +++ b/source/backend/hiai/execution/NPUBatchMatMul.hpp @@ -0,0 +1,28 @@ +// +// NPUBatchMatMul.hpp +// MNN +// +// Created by MNN on b'2020/10/15'. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef NPUDEMO_NPUBatchMatMul_HPP +#define NPUDEMO_NPUBatchMatMul_HPP + +#include "NPUCommonExecution.hpp" +#include "NPUBackend.hpp" + +namespace MNN { + +class NPUBatchMatMul : public NPUCommonExecution { +public: + NPUBatchMatMul(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); + ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); + virtual ~NPUBatchMatMul() = default; + +private: + ge::op::Const mConst; +}; +} // namespace MNN + +#endif // NPUDEMO_NPUBatchMatMul_HPP diff --git a/source/backend/hiai/execution/NPUBinary.cpp b/source/backend/hiai/execution/NPUBinary.cpp index 83bd43b4c..8412be276 100644 --- a/source/backend/hiai/execution/NPUBinary.cpp +++ b/source/backend/hiai/execution/NPUBinary.cpp @@ -13,133 +13,117 @@ using namespace std; namespace MNN { +template +void NPUBinary::BinaryCastIR(string opName, hiai::Operator& input0, hiai::Operator& input1, + const std::vector& outputs, int activationType, shared_ptr binary) { + shared_ptr castTOp(new hiai::op::CastT(opName + "castTOp")); + shared_ptr castTOp1(new hiai::op::CastT(opName + "castTOp1")); + shared_ptr castTOpAfter(new hiai::op::CastT(opName + "castTOpAfter")); + auto binaryParam = mOp->main_as_BinaryOp(); + auto t = binaryParam->T(); + if (flag0) { + (*castTOp) + .set_input_x(input0.GetOutput(mNpuBackend->mSclipMap[inputIndex0])) + .set_attr_dst_dtype(0); + (*binary).set_input_x1(*castTOp.get()); + } else { + (*castTOp) + .set_input_x(input0) + .set_attr_dst_dtype(0); + (*binary).set_input_x1(*castTOp.get()); + } + if (flag1) { + (*castTOp1) + .set_input_x(input1.GetOutput(mNpuBackend->mSclipMap[inputIndex1])) + .set_attr_dst_dtype(0); + (*binary).set_input_x2(*castTOp1.get()); + } else { + (*castTOp1) + .set_input_x(input1) + .set_attr_dst_dtype(0); + (*binary).set_input_x2(*castTOp1.get()); + } + (*castTOpAfter) + .set_input_x(*binary.get()) + .set_attr_dst_dtype(mapDataType(t)); + if(activationType == 1) { + shared_ptr binary_activation(new hiai::op::Activation(opName + "_Relu")); + (*binary_activation) + .set_input_x(*castTOpAfter.get()) + .set_attr_mode(1); + mNpuBackend->setOutputOps(mOp, {castTOp, castTOp1, binary, castTOpAfter, binary_activation}, outputs); + } else { + mNpuBackend->setOutputOps(mOp, {castTOp, castTOp1, binary, castTOpAfter}, outputs); + } +} +template +void NPUBinary::BinaryIR(string opName, hiai::Operator& input0, hiai::Operator& input1, + const std::vector& outputs, int activationType, shared_ptr binary) { + if (flag0) { + (*binary).set_input_x1(input0.GetOutput(mNpuBackend->mSclipMap[inputIndex0])); + } else { + (*binary).set_input_x1(input0); + } + if (flag1) { + (*binary).set_input_x2(input1.GetOutput(mNpuBackend->mSclipMap[inputIndex1])); + } else { + (*binary).set_input_x2(input1); + } + if(activationType == 1) { + shared_ptr binary_activation(new hiai::op::Activation(opName + "_Relu")); + (*binary_activation) + .set_input_x(*binary.get()) + .set_attr_mode(1); + mNpuBackend->setOutputOps(mOp, {binary, binary_activation}, outputs); + } else { + mNpuBackend->setOutputOps(mOp, {binary}, outputs); + } +} void NPUBinary::OpInsert(int binary_type, string opName, hiai::Operator& input0, hiai::Operator& input1, const std::vector &outputs, int activationType){ - if(binary_type == BinaryOpOperation_ADD) { + if (binary_type == BinaryOpOperation_ADD) { shared_ptr binary(new hiai::op::Add(opName)); - (*binary) - .set_input_x1(input0) - .set_input_x2(input1); - - if(activationType == 1) { - shared_ptr binary_activation(new hiai::op::Activation(opName + "_Relu")); - (*binary_activation) - .set_input_x(*binary.get()) - .set_attr_mode(1); - - mNpuBackend->setOutputOps(mOp, {binary, binary_activation}, outputs); - } else { - mNpuBackend->setOutputOps(mOp, {binary}, outputs); - } - } else if(binary_type == BinaryOpOperation_MUL) { + BinaryCastIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_MUL) { shared_ptr binary(new hiai::op::Mul(opName)); - (*binary) - .set_input_x1(input0) - .set_input_x2(input1); - if(activationType == 1) { - shared_ptr binary_activation(new hiai::op::Activation(opName + "_Relu")); - (*binary_activation) - .set_input_x(*binary.get()) - .set_attr_mode(1); - - mNpuBackend->setOutputOps(mOp, {binary, binary_activation}, outputs); - } else { - mNpuBackend->setOutputOps(mOp, {binary}, outputs); - } - } else if(binary_type == BinaryOpOperation_REALDIV) { + BinaryCastIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_REALDIV) { shared_ptr binary(new hiai::op::RealDiv(opName)); - (*binary) - .set_input_x1(input0) - .set_input_x2(input1); - if(activationType == 1) { - shared_ptr binary_activation(new hiai::op::Activation(opName + "_Relu")); - (*binary_activation) - .set_input_x(*binary.get()) - .set_attr_mode(1); - - mNpuBackend->setOutputOps(mOp, {binary, binary_activation}, outputs); - } else { - mNpuBackend->setOutputOps(mOp, {binary}, outputs); - } - } else if(binary_type == BinaryOpOperation_SUB) { + BinaryCastIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_SUB) { shared_ptr binary(new hiai::op::Sub(opName)); - (*binary) - .set_input_x1(input0) - .set_input_x2(input1); - if(activationType == 1) { - shared_ptr binary_activation(new hiai::op::Activation(opName + "_Relu")); - (*binary_activation) - .set_input_x(*binary.get()) - .set_attr_mode(1); - - mNpuBackend->setOutputOps(mOp, {binary, binary_activation}, outputs); - } else { - mNpuBackend->setOutputOps(mOp, {binary}, outputs); - } - } else if(binary_type == BinaryOpOperation_MINIMUM) { + BinaryCastIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_MINIMUM) { shared_ptr binary(new hiai::op::Minimum(opName)); - (*binary) - .set_input_x1(input0) - .set_input_x2(input1); - if(activationType == 1) { - shared_ptr binary_activation(new hiai::op::Activation(opName + "_Relu")); - (*binary_activation) - .set_input_x(*binary.get()) - .set_attr_mode(1); - - mNpuBackend->setOutputOps(mOp, {binary, binary_activation}, outputs); - } else { - mNpuBackend->setOutputOps(mOp, {binary}, outputs); - } - } else if(binary_type == BinaryOpOperation_MAXIMUM) { + BinaryIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_MAXIMUM) { shared_ptr binary(new hiai::op::Maximum(opName)); - (*binary) - .set_input_x1(input0) - .set_input_x2(input1); - if(activationType == 1) { - shared_ptr binary_activation(new hiai::op::Activation(opName + "_Relu")); - (*binary_activation) - .set_input_x(*binary.get()) - .set_attr_mode(1); - - mNpuBackend->setOutputOps(mOp, {binary, binary_activation}, outputs); - } else { - mNpuBackend->setOutputOps(mOp, {binary}, outputs); - } - } else if(binary_type == BinaryOpOperation_EQUAL) { + BinaryIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_EQUAL) { shared_ptr binary(new hiai::op::Equal(opName)); - (*binary) - .set_input_x1(input0) - .set_input_x2(input1); - if(activationType == 1) { - shared_ptr binary_activation(new hiai::op::Activation(opName + "_Relu")); - (*binary_activation) - .set_input_x(*binary.get()) - .set_attr_mode(1); - - mNpuBackend->setOutputOps(mOp, {binary, binary_activation}, outputs); - } else { - mNpuBackend->setOutputOps(mOp, {binary}, outputs); - } - } else if(binary_type == BinaryOpOperation_LESS_EQUAL) { + BinaryIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_LESS_EQUAL) { shared_ptr binary(new hiai::op::LessEqual(opName)); - (*binary) - .set_input_x1(input0) - .set_input_x2(input1); - if(activationType == 1) { - shared_ptr binary_activation(new hiai::op::Activation(opName + "_Relu")); - (*binary_activation) - .set_input_x(*binary.get()) - .set_attr_mode(1); - - mNpuBackend->setOutputOps(mOp, {binary, binary_activation}, outputs); - } else { - mNpuBackend->setOutputOps(mOp, {binary}, outputs); - } - }else{ + BinaryIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_POW) { + shared_ptr binary(new hiai::op::Pow(opName)); + BinaryIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_LESS) { + shared_ptr binary(new hiai::op::Less(opName)); + BinaryIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_MOD) { + shared_ptr binary(new hiai::op::FloorMod(opName)); + BinaryCastIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_SquaredDifference) { + shared_ptr binary(new hiai::op::SquaredDifference(opName)); + BinaryCastIR(opName, input0, input1, outputs, activationType, binary); + } else if (binary_type == BinaryOpOperation_GREATER) { + shared_ptr binary(new hiai::op::Greater(opName)); + BinaryIR(opName, input0, input1, outputs, activationType, binary); + } else { MNN_ERROR("npu binary not support type : %d \n", binary_type); MNN_ASSERT(false); } @@ -149,112 +133,77 @@ NPUBinary::NPUBinary(MNN::Backend *b, const MNN::Op *op, const std::vectorname()->str(); bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT; bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; - - vector, string>> ops; auto binary_type = mOp->main_as_BinaryOp()->opType(); - - if(!isConst0 && isConst1){ - // - auto inputIndex0 = mOp->inputIndexes()->data()[0]; - auto iops0 = mNpuBackend->mGrapMap[inputIndex0]; // x - auto xOp0 = iops0.back().first; - auto input1 = inputs[1]; - auto input0 = inputs[1]; - // om input weight const op - mConst = hiai::op::Const(opName + "_w_const"); - { - ge::TensorPtr filter = std::make_shared(); - - auto shape = tensorShapeFormat(input1,inputs[0]); - ge::TensorDesc fdesc(ge::Shape(shape), ge::FORMAT_NCHW, ge::DT_FLOAT); - filter->SetTensorDesc(fdesc); - if (TensorUtils::getDescribe(inputs[1])->dimensionFormat == MNN::MNN_DATA_FORMAT_NCHW) { - filter->SetData((uint8_t *)input1->host(), input1->elementSize() * sizeof(float)); - mConst.set_attr_value(filter); - }else{ - vector temp(input1->elementSize(), 0); - NHWC2NCHW((float*)input1->host(), (float*)temp.data(), shape[0], shape[1], shape[2]*shape[3]); - filter->SetData((uint8_t *)temp.data(), temp.size() * sizeof(float)); - mConst.set_attr_value(filter); - } - - filter->SetData((uint8_t *)input1->host(), input1->elementSize() * sizeof(float)); - mConst.set_attr_value(filter); + auto len = mOp->inputIndexes()->size(); + Tensor* input = nullptr; + if(isConst0 && !isConst1) { + input = inputs[0]; + } else if (!isConst0 && isConst1) { + input = inputs[1]; + } + mConst = hiai::op::Const(opName + "_w_const"); + if(input != nullptr) { + ge::TensorPtr filter = std::make_shared(); + vector dims; + for (int32_t i = 0; i < input->buffer().dimensions; i++) { + dims.push_back(input->buffer().dim[i].extent); } - - }else if(isConst0 && !isConst1){ - // - auto inputIndex1 = mOp->inputIndexes()->data()[1]; - auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x - auto xOp1 = iops1.back().first; - auto input0 = inputs[0]; - auto input1 = inputs[1]; - // om input weight const op - mConst = hiai::op::Const(opName + "_w_const"); - { - ge::TensorPtr filter = std::make_shared(); - auto shape = tensorShapeFormat(input0); - ge::TensorDesc fdesc(ge::Shape(shape), ge::FORMAT_NCHW, ge::DT_FLOAT); - filter->SetTensorDesc(fdesc); - if (TensorUtils::getDescribe(inputs[0])->dimensionFormat == MNN::MNN_DATA_FORMAT_NCHW) { - filter->SetData((uint8_t *)input0->host(), input0->elementSize() * sizeof(float)); - mConst.set_attr_value(filter); - }else{ - vector temp(input0->elementSize(), 0); - NHWC2NCHW((float*)input0->host(), (float*)temp.data(), shape[0], shape[1], shape[2]*shape[3]); - filter->SetData((uint8_t *)temp.data(), temp.size() * sizeof(float)); - mConst.set_attr_value(filter); - } - filter->SetData((uint8_t *)input0->host(), input0->elementSize() * sizeof(float)); - mConst.set_attr_value(filter); + ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT); + if (input->getType().code == halide_type_float) { + filter->SetData((uint8_t *)input->host(), input->elementSize() * sizeof(float)); } - + if (input->getType().code == halide_type_int && input->getType().bits == 32) { + fdesc.SetDataType(ge::DT_INT32); + filter->SetData((uint8_t *)input->host(), input->elementSize() * sizeof(int32_t)); + } + filter->SetTensorDesc(fdesc); + mConst.set_attr_value(filter); } } ErrorCode NPUBinary::onResize(const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); - auto opName = mOp->name()->str(); bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT; bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; - - vector, string>> ops; auto binary_type = mOp->main_as_BinaryOp()->opType(); int activationType = mOp->main_as_BinaryOp()->activationType(); - if(!isConst0 && isConst1){ - // - auto inputIndex0 = mOp->inputIndexes()->data()[0]; - auto iops0 = mNpuBackend->mGrapMap[inputIndex0]; // x - auto xOp0 = iops0.back().first; - + flag0 = false; + flag1 = false; + if (!isConst0 && isConst1) { + inputIndex0 = mOp->inputIndexes()->data()[0]; + auto iops0 = mNpuBackend->mGrapMap[inputIndex0]; // x + auto xOp0 = iops0.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex0) != mNpuBackend->mSclipMap.end()) { + flag0 = true; + } + inputIndex1 = -1; OpInsert(binary_type, opName, *xOp0.get(), mConst, outputs, activationType); - }else if(isConst0 && !isConst1){ - // - auto inputIndex1 = mOp->inputIndexes()->data()[1]; - auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x - auto xOp1 = iops1.back().first; - - OpInsert(binary_type, opName, mConst, *xOp1.get(), outputs, activationType); - - }else{ - - // - auto inputIndex0 = mOp->inputIndexes()->data()[0]; - auto iops0 = mNpuBackend->mGrapMap[inputIndex0]; // x - auto xOp0 = iops0.back().first; - - - auto inputIndex1 = mOp->inputIndexes()->data()[1]; - auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x - auto xOp1 = iops1.back().first; - + } else if(isConst0 && !isConst1) { + inputIndex1 = mOp->inputIndexes()->data()[1]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x + auto xOp1 = iops1.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex1) != mNpuBackend->mSclipMap.end()) { + flag1 = true; + } + inputIndex0 = -1; + OpInsert(binary_type, opName, mConst, *xOp1.get(), outputs, activationType); + } else { + inputIndex0 = mOp->inputIndexes()->data()[0]; + auto iops0 = mNpuBackend->mGrapMap[inputIndex0]; // x + auto xOp0 = iops0.back().first; + inputIndex1 = mOp->inputIndexes()->data()[1]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x + auto xOp1 = iops1.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex0) != mNpuBackend->mSclipMap.end()) { + flag0 = true; + } + if (mNpuBackend->mSclipMap.find(inputIndex1) != mNpuBackend->mSclipMap.end()) { + flag1 = true; + } OpInsert(binary_type, opName, *xOp0.get(), *xOp1.get(), outputs, activationType); - } - - auto index = mOp->outputIndexes()->data()[0]; return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUBinary.hpp b/source/backend/hiai/execution/NPUBinary.hpp index 9b8a6ac34..bb813f394 100644 --- a/source/backend/hiai/execution/NPUBinary.hpp +++ b/source/backend/hiai/execution/NPUBinary.hpp @@ -16,6 +16,12 @@ namespace MNN { class NPUBinary : public NPUCommonExecution { public: + template + void BinaryCastIR(string opName, hiai::Operator& input0, hiai::Operator& input1, + const std::vector& outputs, int activationType, shared_ptr binary); + template + void BinaryIR(string opName, hiai::Operator& input0, hiai::Operator& input1, + const std::vector& outputs, int activationType, shared_ptr binary); void OpInsert(int binary_type, string opName, hiai::Operator& input0, hiai::Operator& input1, const std::vector &outputs, int activationType); @@ -25,7 +31,10 @@ class NPUBinary : public NPUCommonExecution { private: hiai::op::Const mConst; - + bool flag0 = false; + bool flag1 = false; + int32_t inputIndex0 = -1; + int32_t inputIndex1 = -1; }; } // namespace MNN diff --git a/source/backend/hiai/execution/NPUBroadCastTo.cpp b/source/backend/hiai/execution/NPUBroadCastTo.cpp new file mode 100644 index 000000000..ca2f5d7a1 --- /dev/null +++ b/source/backend/hiai/execution/NPUBroadCastTo.cpp @@ -0,0 +1,50 @@ +// +// NPUBroadCastTo.cpp +// MNN +// +// Created by MNN on 2019/09/19. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "NPUBroadCastTo.hpp" +#include "NPUBackend.hpp" + +using namespace std; + +namespace MNN { + +NPUBroadCastTo::NPUBroadCastTo(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs) : MNN::NPUCommonExecution(b,op) { +} + +ErrorCode NPUBroadCastTo::onResize(const std::vector &inputs, const std::vector &outputs) { + auto opName = mOp->name()->str(); + bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + if (isConst1) { + auto depth = inputs[1]; + mConst_s = hiai::op::Const(opName + "_s_const"); + vector dims; + for (int32_t i = 0; i < depth->buffer().dimensions; i++) { + dims.push_back(depth->buffer().dim[i].extent); + } + ge::TensorPtr filter = std::make_shared(); + ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_INT32); + filter->SetData((uint8_t *)depth->host(), depth->elementSize() * sizeof(int32_t)); + filter->SetTensorDesc(fdesc); + mConst_s.set_attr_value(filter); + } + mNpuBackend->setNetworkInput(inputs, mOp); + shared_ptr broadCastTo(new hiai::op::BroadcastTo(opName)); + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + auto xOp = iops.back().first; + if (isConst1) { + (*broadCastTo).set_input_x(*xOp.get()).set_input_shape(mConst_s); + mNpuBackend->setOutputOps(mOp, {broadCastTo}, outputs); + return NO_ERROR; + } + return NOT_SUPPORT; +} + +NPUCreatorRegister> __BroadCastTo_op(OpType_BroadcastTo); + +} // namespace MNN \ No newline at end of file diff --git a/source/backend/hiai/execution/NPUBroadCastTo.hpp b/source/backend/hiai/execution/NPUBroadCastTo.hpp new file mode 100644 index 000000000..ba6c96604 --- /dev/null +++ b/source/backend/hiai/execution/NPUBroadCastTo.hpp @@ -0,0 +1,27 @@ +// +// NPUBroadCastTo.hpp +// MNN +// +// Created by MNN on 2019/09/19. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef NPUDEMO_NPUBroadCastTo_HPP +#define NPUDEMO_NPUBroadCastTo_HPP + +#include "NPUCommonExecution.hpp" + +namespace MNN { + +class NPUBroadCastTo : public NPUCommonExecution { +public: + NPUBroadCastTo(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); + ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); + virtual ~NPUBroadCastTo() = default; +private: + hiai::op::Const mConst_s; +}; + +} // namespace MNN + +#endif // NPUDEMO_NPUBroadCastTo_HPP diff --git a/source/backend/hiai/execution/NPUCast.cpp b/source/backend/hiai/execution/NPUCast.cpp index f74c719d4..866234a3c 100644 --- a/source/backend/hiai/execution/NPUCast.cpp +++ b/source/backend/hiai/execution/NPUCast.cpp @@ -13,56 +13,26 @@ using namespace std; namespace MNN { -static ge::DataType mapDataType(DataType src) { - ge::DataType retVal = ge::DataType::DT_UNDEFINED; - switch (src) { - case DataType_DT_FLOAT: - retVal = ge::DataType::DT_FLOAT; - break; - case DataType_DT_DOUBLE: - retVal = ge::DataType::DT_DOUBLE; - break; - case DataType_DT_INT32: - retVal = ge::DataType::DT_INT32; - break; - case DataType_DT_UINT8: - retVal = ge::DataType::DT_UINT8; - break; - case DataType_DT_INT16: - retVal = ge::DataType::DT_INT16; - break; - case DataType_DT_INT8: - retVal = ge::DataType::DT_INT8; - break; - case DataType_DT_INT64: - retVal = ge::DataType::DT_INT64; - break; - default: - MNN_ASSERT(false); - printf("cast Datatype : %d \n", src); - break; - } - return retVal; -} - NPUCast::NPUCast(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs) : MNN::NPUCommonExecution(b,op) { } ErrorCode NPUCast::onResize(const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); auto opName = mOp->name()->str(); - shared_ptr castTOp(new hiai::op::CastT(opName)); - auto xOp = mNpuBackend->getInputOps(mOp); auto castPara = mOp->main_as_CastParam(); - DataType srcT = castPara->srcT(); DataType dstT = castPara->dstT(); - (*castTOp) - .set_input_x(*xOp.get()) - .set_attr_src_dtype(mapDataType(srcT)) - .set_attr_dst_dtype(mapDataType(dstT)); + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex) != mNpuBackend->mSclipMap.end()) { + (*castTOp).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } else { + (*castTOp).set_input_x(*xOp.get()); + } + (*castTOp).set_attr_dst_dtype(mapDataType(dstT)); mNpuBackend->setOutputOps(mOp, {castTOp}, outputs); return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUConcat.cpp b/source/backend/hiai/execution/NPUConcat.cpp index f4a27dfc9..bd596c579 100644 --- a/source/backend/hiai/execution/NPUConcat.cpp +++ b/source/backend/hiai/execution/NPUConcat.cpp @@ -23,21 +23,22 @@ ErrorCode NPUConcat::onResize(const std::vector &inputs, const std::ve auto param = mOp->main_as_Axis(); shared_ptr concatD(new hiai::op::ConcatD(opName)); - + auto xOp = mNpuBackend->getInputOps(mOp); auto inputSize = mOp->inputIndexes()->size(); - (*concatD).create_dynamic_input_x(inputSize) - .set_attr_concat_dim(axisFormat(inputs[0], param->axis())); + int32_t axis = param->axis(); + (*concatD).create_dynamic_input_x(inputSize).set_attr_concat_dim(axis); for (int i = 0; i < inputSize; ++i) { auto inputIndex = mOp->inputIndexes()->data()[i]; auto iops = mNpuBackend->mGrapMap[inputIndex]; // x - auto xOp = iops.back().first; - hiai::Operator *px = (hiai::Operator *)xOp.get(); - (*concatD).set_dynamic_input_x(i + 1, *px); + xOp = iops.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*concatD).set_dynamic_input_x(i + 1, *xOp.get()); + } else { + (*concatD).set_dynamic_input_x(i + 1, xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } } - mNpuBackend->setOutputOps(mOp, {concatD}, outputs); - return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUConvertTensor.cpp b/source/backend/hiai/execution/NPUConvertTensor.cpp index 93a7aa099..5c2e40c85 100644 --- a/source/backend/hiai/execution/NPUConvertTensor.cpp +++ b/source/backend/hiai/execution/NPUConvertTensor.cpp @@ -21,39 +21,44 @@ ErrorCode NPUConvertTensor::onResize(const std::vector &inputs, const auto opName = mOp->name()->str(); auto xOp = mNpuBackend->getInputOps(mOp); - //om input weight const op - auto shapeFormt = tensorShapeFormat(outputs[0]); - std::vector shapeDims (shapeFormt.begin(), shapeFormt.end()); - shapeConst = hiai::op::Const(opName + "_shape_const"); - { - ge::TensorDesc fdesc(ge::Shape({static_cast(shapeDims.size())}), - ge::FORMAT_NCHW, ge::DT_INT32); - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)shapeDims.data(), shapeDims.size() * sizeof(int32_t)); - - shapeConst.set_attr_value(filter); - } + //om input weight const op + std::vector inputShape = inputs[0]->shape(); + std::vector outShape = outputs[0]->shape(); + std::vector> dims ={{0,1,2,3}, {0,2,3,1}, {0,3,1,2}, {0,1,2}, {0,2,1}, {0,1}, {1,0}}; - if (outputs[0]->buffer().dimensions==2) { //These conditions require special processing dimensions, not simple reshape, but equivalent transposes - shared_ptr permute1(new hiai::op::Permute(opName)); - (*permute1) - .set_input_x(*xOp.get()) - .set_attr_order(ge::AttrValue::LIST_INT({2,1,0,3})); - mNpuBackend->setOutputOps(mOp, {permute1}, outputs); - } else { - shared_ptr convertTensor(new hiai::op::Reshape(opName)); - - int index = mOp->inputIndexes()->data()[0]; - auto iter = mNpuBackend->mSclipMap.find(index); - if(iter != mNpuBackend->mSclipMap.end()){ - (*convertTensor).SetInput(0, *xOp, mNpuBackend->mSclipMap[index]); - (*convertTensor).set_input_shape(shapeConst); - }else{ - (*convertTensor).set_input_x(*xOp).set_input_shape(shapeConst); + int32_t dimIndex = -1; + bool flag = true; + if (inputShape.size() != outShape.size()) { + std::cout<<"inputsize not equal outputs size" <setOutputOps(mOp, {convertTensor}, outputs); } + if (dimIndex == -1) { + std::cout<<"inputsize cannot tans output" < convertTensor(new hiai::op::Permute(opName)); + int index = mOp->inputIndexes()->data()[0]; + auto iter = mNpuBackend->mSclipMap.find(index); + if (iter != mNpuBackend->mSclipMap.end()){ + (*convertTensor).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[index])) + .set_attr_order(dims[dimIndex]); + } else { + (*convertTensor).set_input_x(*xOp).set_attr_order(dims[dimIndex]); + } + mNpuBackend->setOutputOps(mOp, {convertTensor}, outputs); return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUConvertTensor.hpp b/source/backend/hiai/execution/NPUConvertTensor.hpp index b31eec65e..9d7f1b3a0 100644 --- a/source/backend/hiai/execution/NPUConvertTensor.hpp +++ b/source/backend/hiai/execution/NPUConvertTensor.hpp @@ -19,8 +19,6 @@ class NPUConvertTensor : public NPUCommonExecution { NPUConvertTensor(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); virtual ~NPUConvertTensor() = default; -private: - hiai::op::Const shapeConst; }; } // namespace MNN diff --git a/source/backend/hiai/execution/NPUConvolution.cpp b/source/backend/hiai/execution/NPUConvolution.cpp index dbafa6870..40832368f 100644 --- a/source/backend/hiai/execution/NPUConvolution.cpp +++ b/source/backend/hiai/execution/NPUConvolution.cpp @@ -21,7 +21,7 @@ NPUConvolution::NPUConvolution(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); - + auto xOp = mNpuBackend->getInputOps(mOp); auto opName = mOp->name()->str(); auto conv2D = mOp->main_as_Convolution2D(); @@ -30,6 +30,19 @@ ErrorCode NPUConvolution::onResize(const std::vector &inputs, const st auto kernelX = conv2DCommon->kernelX(); auto kernelY = conv2DCommon->kernelY(); auto outputCount = conv2DCommon->outputCount(); + std::vector pads; + if (conv2DCommon->pads() != nullptr) { + int32_t size = conv2DCommon->pads()->size() / 2; + for (int32_t i = 0; i < size; i++) { + pads.push_back(static_cast(conv2DCommon->pads()->data()[i])); + pads.push_back(static_cast(conv2DCommon->pads()->data()[i+size])); + } + } else { + pads.push_back(static_cast(conv2DCommon->padY())); + pads.push_back(static_cast(conv2DCommon->padY())); + pads.push_back(static_cast(conv2DCommon->padX())); + pads.push_back(static_cast(conv2DCommon->padX())); + } int weightSize = 0; const float *filterDataPtr = nullptr; @@ -48,38 +61,51 @@ ErrorCode NPUConvolution::onResize(const std::vector &inputs, const st weightSize = quanCommon->weightFloat.size(); } - if (nullptr == filterDataPtr) { - weightSize = conv2D->weight()->size(); - filterDataPtr = conv2D->weight()->data(); - } - - int inputCount = weightSize / (kernelX * kernelY * outputCount); - shared_ptr conv(new hiai::op::Convolution(opName)); - - auto xOp = mNpuBackend->getInputOps(mOp); - - // om input weight const op mConst_w = hiai::op::Const(opName + "_w_const"); - { - ge::TensorDesc fdesc(ge::Shape({outputCount, inputCount, kernelY, kernelX}), ge::FORMAT_NCHW, - ge::DT_FLOAT); // in o h w ? - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)filterDataPtr, weightSize * sizeof(float)); - - mConst_w.set_attr_value(filter); - } - - // om input bias const op mConst_b = hiai::op::Const(opName + "_b_const"); - { - ge::TensorDesc fdesc(ge::Shape({1, outputCount, 1, 1}), ge::FORMAT_NCHW, ge::DT_FLOAT); - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)conv2D->bias()->data(), conv2D->bias()->size() * sizeof(float)); - - mConst_b.set_attr_value(filter); + if (inputs.size() == 3 && conv2D->weight() == nullptr) { + bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + bool isConst2 = TensorUtils::getDescribe(inputs[2])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + if (isConst1 && isConst2) { + { + weightSize = inputs[1]->elementSize(); + int inputCount = weightSize / (kernelX * kernelY * outputCount); + ge::TensorDesc fdesc(ge::Shape({outputCount, inputCount, kernelY, kernelX}), ge::DT_FLOAT); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)inputs[1]->host(), weightSize * sizeof(float)); + mConst_w.set_attr_value(filter); + } + { + weightSize = inputs[2]->elementSize(); + ge::TensorDesc fdesc(ge::Shape({1, outputCount, 1, 1}), ge::DT_FLOAT); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)inputs[2]->host(), weightSize * sizeof(float)); + mConst_b.set_attr_value(filter); + } + } + } else { + if (filterDataPtr == nullptr) { + weightSize = conv2D->weight()->size(); + filterDataPtr = conv2D->weight()->data(); + } + int inputCount = weightSize / (kernelX * kernelY * outputCount); + { + ge::TensorDesc fdesc(ge::Shape({outputCount, inputCount, kernelY, kernelX}), ge::FORMAT_NCHW, ge::DT_FLOAT); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)filterDataPtr, weightSize * sizeof(float)); + mConst_w.set_attr_value(filter); + } + { + ge::TensorDesc fdesc(ge::Shape({1, outputCount, 1, 1}), ge::FORMAT_NCHW, ge::DT_FLOAT); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)conv2D->bias()->data(), conv2D->bias()->size() * sizeof(float)); + mConst_b.set_attr_value(filter); + } } auto padMode = "SPECIFIC"; // NOTSET @@ -88,16 +114,21 @@ ErrorCode NPUConvolution::onResize(const std::vector &inputs, const st } else if (PadMode_SAME == conv2DCommon->padMode()) { padMode = "SAME"; } - + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + xOp = iops.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*conv).set_input_x(*xOp.get()); + } else { + (*conv).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } (*conv) - .set_input_x(*xOp.get()) .set_input_filter(mConst_w) .set_input_bias(mConst_b) .set_attr_strides(ge::AttrValue::LIST_INT({conv2DCommon->strideY(), conv2DCommon->strideX()})) .set_attr_dilations(ge::AttrValue::LIST_INT({conv2DCommon->dilateY(), conv2DCommon->dilateX()})) .set_attr_groups(conv2DCommon->group()) - .set_attr_pads(ge::AttrValue::LIST_INT( - {conv2DCommon->padY(), conv2DCommon->padY(), conv2DCommon->padX(), conv2DCommon->padX()})) // 上下左右 + .set_attr_pads(pads) // 上下左右 .set_attr_pad_mode(padMode); shared_ptr relu_conv(new hiai::op::Activation(opName + "_Relu")); diff --git a/source/backend/hiai/execution/NPUConvolutionDepthwise.cpp b/source/backend/hiai/execution/NPUConvolutionDepthwise.cpp index b7adde852..059165ef0 100644 --- a/source/backend/hiai/execution/NPUConvolutionDepthwise.cpp +++ b/source/backend/hiai/execution/NPUConvolutionDepthwise.cpp @@ -34,6 +34,19 @@ ErrorCode NPUConvolutionDepthwise::onResize(const std::vector &inputs, int weightSize = 0; const float *filterDataPtr = nullptr; + std::vector pads; + if (conv2DCommon->pads() != nullptr) { + int32_t size = conv2DCommon->pads()->size() / 2; + for (int32_t i = 0; i < size; i++) { + pads.push_back(static_cast(conv2DCommon->pads()->data()[i])); + pads.push_back(static_cast(conv2DCommon->pads()->data()[i+size])); + } + } else { + pads.push_back(static_cast(conv2DCommon->padY())); + pads.push_back(static_cast(conv2DCommon->padY())); + pads.push_back(static_cast(conv2DCommon->padX())); + pads.push_back(static_cast(conv2DCommon->padX())); + } std::shared_ptr quanCommon; if (nullptr != conv2D->quanParameter()) { quanCommon = ConvolutionCommon::load(conv2D, backend(), true); @@ -87,15 +100,20 @@ ErrorCode NPUConvolutionDepthwise::onResize(const std::vector &inputs, } else if (PadMode_SAME == conv2DCommon->padMode()) { padMode = "SAME"; } - + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + xOp = iops.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*conv).set_input_x(*xOp.get()); + } else { + (*conv).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } (*conv) - .set_input_x(*(xOp.get())) .set_input_filter(mConst_w) .set_input_bias(mConst_b) .set_attr_strides(ge::AttrValue::LIST_INT({conv2DCommon->strideY(), conv2DCommon->strideX()})) .set_attr_dilations(ge::AttrValue::LIST_INT({conv2DCommon->dilateY(), conv2DCommon->dilateX()})) - .set_attr_pads(ge::AttrValue::LIST_INT( - {conv2DCommon->padY(), conv2DCommon->padY(), conv2DCommon->padX(), conv2DCommon->padX()})) // 上下左右 + .set_attr_pads(pads) // 上下左右 .set_attr_pad_mode(padMode); shared_ptr relu_conv(new hiai::op::Activation(opName + "_Relu")); diff --git a/source/backend/hiai/execution/NPUCrop.cpp b/source/backend/hiai/execution/NPUCrop.cpp new file mode 100644 index 000000000..86864d883 --- /dev/null +++ b/source/backend/hiai/execution/NPUCrop.cpp @@ -0,0 +1,49 @@ +// +// NPUCrop.cpp +// MNN +// +// Created by MNN on 2019/09/11. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "NPUCrop.hpp" +#include "NPUBackend.hpp" + +using namespace std; + +namespace MNN { + +NPUCrop::NPUCrop(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs) : MNN::NPUCommonExecution(b,op) { +} + +ErrorCode NPUCrop::onResize(const std::vector &inputs, const std::vector &outputs) { + mNpuBackend->setNetworkInput(inputs, mOp); + auto opName = mOp->name()->str(); + shared_ptr crop(new hiai::op::Crop(opName)); + auto xOp = mNpuBackend->getInputOps(mOp); + + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + xOp = iops.back().first; + auto inputIndex1 = mOp->inputIndexes()->data()[1]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; + auto xOp1 = iops1.back().first; + + auto param = mOp->main_as_Crop(); + int32_t axis = param->axis(); + auto offsetTmp = param->offset(); + vector offset; + for (int32_t i = 0; i < offsetTmp->size(); i++) { + offset.push_back(offsetTmp->Get(i)); + } + (*crop).set_input_x(*xOp.get()) + .set_input_size(*xOp1.get()) + .set_attr_axis(axis) + .set_attr_offsets(ge::AttrValue::LIST_INT(offset)); + mNpuBackend->setOutputOps(mOp, {crop}, outputs); + return NO_ERROR; +} + +NPUCreatorRegister> __CropD_op(OpType_Crop); + +} // namespace MNN diff --git a/source/backend/hiai/execution/NPUCrop.hpp b/source/backend/hiai/execution/NPUCrop.hpp new file mode 100644 index 000000000..11c17b135 --- /dev/null +++ b/source/backend/hiai/execution/NPUCrop.hpp @@ -0,0 +1,25 @@ +// +// NPUCrop.hpp +// MNN +// +// Created by MNN on 2019/09/11. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef MNN_NPUCrop_HPP +#define MNN_NPUCrop_HPP + +#include "NPUCommonExecution.hpp" + +namespace MNN { + +class NPUCrop : public NPUCommonExecution { +public: + NPUCrop(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); + ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); + virtual ~NPUCrop() = default; +}; + +} // namespace MNN + +#endif // MNN_NPUCrop_HPP diff --git a/source/backend/hiai/execution/NPUDeconvolution.cpp b/source/backend/hiai/execution/NPUDeconvolution.cpp index d3c830aa2..5be7a664f 100644 --- a/source/backend/hiai/execution/NPUDeconvolution.cpp +++ b/source/backend/hiai/execution/NPUDeconvolution.cpp @@ -30,6 +30,19 @@ ErrorCode NPUDeconvolution::onResize(const std::vector &inputs, const auto kernelY = conv2DCommon->kernelY(); auto outputCount = conv2DCommon->outputCount(); + std::vector pads; + if (conv2DCommon->pads() != nullptr) { + int32_t size = conv2DCommon->pads()->size() / 2; + for (int32_t i = 0; i < size; i++) { + pads.push_back(static_cast(conv2DCommon->pads()->data()[i])); + pads.push_back(static_cast(conv2DCommon->pads()->data()[i+size])); + } + } else { + pads.push_back(static_cast(conv2DCommon->padY())); + pads.push_back(static_cast(conv2DCommon->padY())); + pads.push_back(static_cast(conv2DCommon->padX())); + pads.push_back(static_cast(conv2DCommon->padX())); + } int weightSize = 0; const float *filterDataPtr = nullptr; @@ -72,17 +85,30 @@ ErrorCode NPUDeconvolution::onResize(const std::vector &inputs, const } else if (PadMode_SAME == conv2DCommon->padMode()) { padMode = "SAME"; } - + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + xOp = iops.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*deconv).set_input_x(*xOp.get()); + } else { + (*deconv).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } (*deconv) - .set_input_x(*xOp.get()) .set_input_filter(mConst_w) .set_input_bias(mConst_b) .set_attr_strides(ge::AttrValue::LIST_INT({conv2DCommon->strideY(), conv2DCommon->strideX()})) .set_attr_dilations(ge::AttrValue::LIST_INT({conv2DCommon->dilateY(), conv2DCommon->dilateX()})) .set_attr_groups(conv2DCommon->group()) - .set_attr_pads(ge::AttrValue::LIST_INT( - {conv2DCommon->padY(), conv2DCommon->padY(), conv2DCommon->padX(), conv2DCommon->padX()})) // 上下左右 + .set_attr_pads(pads) // 上下左右 .set_attr_pad_mode(padMode); + vector outputpads; + if (conv2DCommon->outPads() != nullptr) { + int32_t size = conv2DCommon->outPads()->size(); + for (int32_t i = 0; i < size; i++) { + outputpads.push_back(static_cast(conv2DCommon->outPads()->data()[i])); + } + (*deconv).SetAttr("output_padding", ge::AttrValue::CreateFrom(outputpads)); + } shared_ptr relu_conv(new hiai::op::Activation(opName + "_Relu")); mRelu_conv = relu_conv; diff --git a/source/backend/hiai/execution/NPUDeconvolutionDepthwise.cpp b/source/backend/hiai/execution/NPUDeconvolutionDepthwise.cpp new file mode 100644 index 000000000..84950a575 --- /dev/null +++ b/source/backend/hiai/execution/NPUDeconvolutionDepthwise.cpp @@ -0,0 +1,135 @@ +// +// NPUDeconvolutionDepthwise.cpp +// MNN +// +// Created by MNN on 2019/09/11. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "NPUDeconvolutionDepthwise.hpp" +#include "NPUBackend.hpp" +#include +#include "core/ConvolutionCommon.hpp" + +using namespace std; + +namespace MNN { + +NPUDeconvolutionDepthwise::NPUDeconvolutionDepthwise(Backend *b, const Op *op, const std::vector &inputs, + const std::vector &outputs) + : MNN::NPUCommonExecution(b,op) {} + +ErrorCode NPUDeconvolutionDepthwise::onResize(const std::vector &inputs, const std::vector &outputs) { + mNpuBackend->setNetworkInput(inputs, mOp); + auto opName = mOp->name()->str(); + + auto conv2D = mOp->main_as_Convolution2D(); + auto conv2DCommon = conv2D->common(); + + auto kernelX = conv2DCommon->kernelX(); + auto kernelY = conv2DCommon->kernelY(); + auto outputCount = conv2DCommon->outputCount(); + + std::vector pads; + if (conv2DCommon->pads() != nullptr) { + int32_t size = conv2DCommon->pads()->size() / 2; + for (int32_t i = 0; i < size; i++) { + pads.push_back(static_cast(conv2DCommon->pads()->data()[i])); + pads.push_back(static_cast(conv2DCommon->pads()->data()[i+size])); + } + } else { + pads.push_back(static_cast(conv2DCommon->padY())); + pads.push_back(static_cast(conv2DCommon->padY())); + pads.push_back(static_cast(conv2DCommon->padX())); + pads.push_back(static_cast(conv2DCommon->padX())); + } + int weightSize = 0; + const float *filterDataPtr = nullptr; + + if (nullptr == filterDataPtr) { + weightSize = conv2D->weight()->size(); + filterDataPtr = conv2D->weight()->data(); + } + + int inputCount = weightSize / (kernelX * kernelY * outputCount); + + shared_ptr deconv(new hiai::op::ConvTranspose(opName)); + + auto xOp = mNpuBackend->getInputOps(mOp); + // om input weight const op + mConst_w = hiai::op::Const(opName + "_w_const"); + { + ge::TensorDesc fdesc(ge::Shape({outputCount, inputCount, kernelY, kernelX}), ge::FORMAT_NCHW, + ge::DT_FLOAT); // in o h w ? + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)filterDataPtr, weightSize * sizeof(float)); + + mConst_w.set_attr_value(filter); + } + + // om input bias const op + mConst_b = hiai::op::Const(opName + "_b_const"); + { + ge::TensorDesc fdesc(ge::Shape({1, outputCount, 1, 1}), ge::FORMAT_NCHW, ge::DT_FLOAT); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)conv2D->bias()->data(), conv2D->bias()->size() * sizeof(float)); + + mConst_b.set_attr_value(filter); + } + + std::string padMode = "SPECIFIC"; // NOTSET + if (PadMode_VALID == conv2DCommon->padMode()) { + padMode = "VALID"; + } else if (PadMode_SAME == conv2DCommon->padMode()) { + padMode = "SAME"; + } + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + xOp = iops.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*deconv).set_input_x(*xOp.get()); + } else { + (*deconv).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } + (*deconv) + .set_input_filter(mConst_w) + .set_input_bias(mConst_b) + .set_attr_strides(ge::AttrValue::LIST_INT({conv2DCommon->strideY(), conv2DCommon->strideX()})) + .set_attr_dilations(ge::AttrValue::LIST_INT({conv2DCommon->dilateY(), conv2DCommon->dilateX()})) + .set_attr_groups(conv2DCommon->group()) + .set_attr_pads(pads) // 上下左右 + .set_attr_pad_mode(padMode); + + vector outputpads; + if (conv2DCommon->outPads() != nullptr) { + int32_t size = conv2DCommon->outPads()->size(); + for (int32_t i = 0; i < size; i++) { + outputpads.push_back(static_cast(conv2DCommon->outPads()->data()[i])); + } + (*deconv).SetAttr("output_padding", ge::AttrValue::CreateFrom(outputpads)); + } + + shared_ptr relu_conv(new hiai::op::Activation(opName + "_Relu")); + mRelu_conv = relu_conv; + + auto relu = conv2DCommon->relu(); + auto relu6 = conv2DCommon->relu6(); + if (relu || relu6) { + (*mRelu_conv) + .set_input_x(*deconv.get()) + .set_attr_mode(relu?1:14); + } + + if (relu || relu6) { + mNpuBackend->setOutputOps(mOp, {deconv, mRelu_conv}, outputs); + }else{ + mNpuBackend->setOutputOps(mOp, {deconv}, outputs); + } + return NO_ERROR; +} + +NPUCreatorRegister> __depthwise_deconv_op(OpType_DeconvolutionDepthwise); + +} // namespace MNN diff --git a/source/backend/hiai/execution/NPUDeconvolutionDepthwise.hpp b/source/backend/hiai/execution/NPUDeconvolutionDepthwise.hpp new file mode 100644 index 000000000..58e24d7fd --- /dev/null +++ b/source/backend/hiai/execution/NPUDeconvolutionDepthwise.hpp @@ -0,0 +1,32 @@ +// +// NPUDeconvolutionDepthwise.hpp +// MNN +// +// Created by MNN on 2019/09/11. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef MNN_NPUDeconvolutionDepthwise_HPP +#define MNN_NPUDeconvolutionDepthwise_HPP + +#include "NPUCommonExecution.hpp" +#include "NPUBackend.hpp" + +namespace MNN { + +class NPUDeconvolutionDepthwise : public NPUCommonExecution { +public: + NPUDeconvolutionDepthwise(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); + ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); + virtual ~NPUDeconvolutionDepthwise() = default; + +private: + hiai::op::Const mConst_w; + hiai::op::Const mConst_b; + + shared_ptr mRelu_conv; +}; + +} // namespace MNN + +#endif // MNN_NPUDeconvolutionDepthwise_HPP diff --git a/source/backend/hiai/execution/NPUDepthToSpace.cpp b/source/backend/hiai/execution/NPUDepthToSpace.cpp index 51468f1db..dcff93f28 100644 --- a/source/backend/hiai/execution/NPUDepthToSpace.cpp +++ b/source/backend/hiai/execution/NPUDepthToSpace.cpp @@ -24,20 +24,15 @@ ErrorCode NPUDepthToSpace::onResize(const std::vector &inputs, const s shared_ptr depthToSpace(new hiai::op::DepthToSpace(opName)); shared_ptr permuteBefore(new hiai::op::Permute(opName+"_before")); shared_ptr permuteAfter(new hiai::op::Permute(opName+"_after")); - - /* - * set om op - * */ - - // - auto inputIndex1 = mOp->inputIndexes()->data()[0]; - auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x - auto xOp1 = iops1.back().first; + auto xOp = mNpuBackend->getInputOps(mOp); + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; auto param = mOp->main_as_DepthSpaceParam(); (*permuteBefore) - .set_input_x(*xOp1.get()) + .set_input_x(*xOp.get()) .set_attr_order({0,2,3,1}) .SetAttr("NCHW_to_NHWC", ge::AttrValue::CreateFrom(static_cast(1))); diff --git a/source/backend/hiai/execution/NPUEltwise.cpp b/source/backend/hiai/execution/NPUEltwise.cpp index 0ad78b88e..85f56f3ee 100644 --- a/source/backend/hiai/execution/NPUEltwise.cpp +++ b/source/backend/hiai/execution/NPUEltwise.cpp @@ -21,42 +21,72 @@ ErrorCode NPUEltwise::onResize(const std::vector &inputs, const std::v mNpuBackend->setNetworkInput(inputs, mOp); auto opName = mOp->name()->str(); - shared_ptr eltwise(new hiai::op::Eltwise(opName)); - - /* - * set om op - * */ - auto inputIndex1 = mOp->inputIndexes()->data()[0]; - auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x - auto xOp1 = iops1.back().first; - - - auto inputIndex2 = mOp->inputIndexes()->data()[1]; - auto iops2 = mNpuBackend->mGrapMap[inputIndex2]; // x - auto xOp2 = iops2.back().first; - - auto inputSize = mOp->inputIndexes()->size(); auto param = mOp->main_as_Eltwise(); - (*eltwise).create_dynamic_input_x(inputSize) - .set_attr_N(inputSize) - .set_attr_mode(param->type()); // 0:product,1:sum,2:max;default is CC_ELTWISE_SUM. TODO SUB Weight - - for (int i = 0; i < inputSize; ++i) { - auto inputIndex = mOp->inputIndexes()->data()[i]; - auto iops = mNpuBackend->mGrapMap[inputIndex]; // x - auto xOp = iops.back().first; - - hiai::Operator *px = (hiai::Operator *)xOp.get(); - (* eltwise).set_dynamic_input_x(i + 1, *px); - } - - if(param->type()==EltwiseType_SUB) { + auto coffs = param->coeff(); + if (param->type() == EltwiseType_SUM && coffs == nullptr) { + auto inputIndex1 = mOp->inputIndexes()->data()[0]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x + auto xOp1 = iops1.back().first; + auto inputIndex2 = mOp->inputIndexes()->data()[1]; + auto iops2 = mNpuBackend->mGrapMap[inputIndex2]; // x + auto xOp2 = iops2.back().first; + shared_ptr sub(new hiai::op::Add(opName)); + if (mNpuBackend->mSclipMap.find(inputIndex1) == mNpuBackend->mSclipMap.end()) { + (*sub).set_input_x1(*xOp1.get()); + } else { + (*sub).set_input_x1(xOp1->GetOutput(mNpuBackend->mSclipMap[inputIndex1])); + } + if (mNpuBackend->mSclipMap.find(inputIndex2) == mNpuBackend->mSclipMap.end()) { + (*sub).set_input_x2(*xOp2.get()); + } else { + (*sub).set_input_x2(xOp2->GetOutput(mNpuBackend->mSclipMap[inputIndex2])); + } + mNpuBackend->setOutputOps(mOp, {sub}, outputs); + } else if (param->type() == EltwiseType_SUB && coffs == nullptr) { + auto inputIndex1 = mOp->inputIndexes()->data()[0]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x + auto xOp1 = iops1.back().first; + auto inputIndex2 = mOp->inputIndexes()->data()[1]; + auto iops2 = mNpuBackend->mGrapMap[inputIndex2]; // x + auto xOp2 = iops2.back().first; shared_ptr sub(new hiai::op::Sub(opName)); - (*sub) - .set_input_x1(*xOp1.get()) - .set_input_x2(*xOp2.get()); + if (mNpuBackend->mSclipMap.find(inputIndex1) == mNpuBackend->mSclipMap.end()) { + (*sub).set_input_x1(*xOp1.get()); + } else { + (*sub).set_input_x1(xOp1->GetOutput(mNpuBackend->mSclipMap[inputIndex1])); + } + if (mNpuBackend->mSclipMap.find(inputIndex2) == mNpuBackend->mSclipMap.end()) { + (*sub).set_input_x2(*xOp2.get()); + } else { + (*sub).set_input_x2(xOp2->GetOutput(mNpuBackend->mSclipMap[inputIndex2])); + } mNpuBackend->setOutputOps(mOp, {sub}, outputs); } else { + vector coffAttr; + if (coffs != nullptr) { + for (int32_t j = 0; j < coffs->size(); j++) { + coffAttr.push_back(coffs->Get(j)); + } + } + auto inputSize = mOp->inputIndexes()->size(); + shared_ptr eltwise(new hiai::op::Eltwise(opName)); + (*eltwise) + .create_dynamic_input_x(inputSize) + .set_attr_N(inputSize) + .set_attr_mode(param->type()); + for (int i = 0; i < inputSize; ++i) { + auto inputIndex = mOp->inputIndexes()->data()[i]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + auto xOp = iops.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*eltwise).set_dynamic_input_x(i + 1, *xOp.get()); + } else { + (*eltwise).set_dynamic_input_x(i + 1, xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } + } + if (coffAttr.size() > 0) { + (*eltwise).set_attr_coeff(coffAttr); + } mNpuBackend->setOutputOps(mOp, {eltwise}, outputs); } return NO_ERROR; diff --git a/source/backend/hiai/execution/NPUExpandDims.cpp b/source/backend/hiai/execution/NPUExpandDims.cpp index 1b6c3b4ef..86b689811 100644 --- a/source/backend/hiai/execution/NPUExpandDims.cpp +++ b/source/backend/hiai/execution/NPUExpandDims.cpp @@ -21,25 +21,27 @@ ErrorCode NPUExpandDims::onResize(const std::vector &inputs, const std auto opName = mOp->name()->str(); auto xOp = mNpuBackend->getInputOps(mOp); - auto shapeFormat = tensorShapeFormat(outputs[0]); - std::vector shapeDims(shapeFormat.begin(), shapeFormat.end()); + + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; + + auto param = mOp->main_as_ExpandDims(); + vector axs = {param->axis()}; shapeConst = hiai::op::Const(opName + "_shape_const"); - { - ge::TensorDesc fdesc(ge::Shape({static_cast(shapeDims.size())}), - ge::FORMAT_NCHW, ge::DT_INT32); - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)shapeDims.data(), shapeDims.size() * sizeof(int32_t)); - - shapeConst.set_attr_value(filter); + ge::TensorDesc fdesc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)axs.data(), sizeof(int32_t)); + shapeConst.set_attr_value(filter); + + shared_ptr prob(new hiai::op::ExpandDims(opName)); + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*prob).set_input_x(*xOp.get()); + } else { + (*prob).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); } - - shared_ptr prob(new hiai::op::Reshape(opName)); - - auto output = outputs[0]; - - (*prob).set_input_x(*xOp.get()).set_input_shape(shapeConst); - + (*prob).set_input_axis(shapeConst); mNpuBackend->setOutputOps(mOp, {prob}, outputs); return NO_ERROR; diff --git a/source/backend/hiai/execution/NPUFlatten.cpp b/source/backend/hiai/execution/NPUFlatten.cpp new file mode 100644 index 000000000..3e91bb888 --- /dev/null +++ b/source/backend/hiai/execution/NPUFlatten.cpp @@ -0,0 +1,36 @@ +// +// NPUFlatten.cpp +// MNN +// +// Created by MNN on b'2020/10/15'. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "NPUFlatten.hpp" +#include "NPUBackend.hpp" + +using namespace std; + +namespace MNN { + +NPUFlatten::NPUFlatten(MNN::Backend *b, const MNN::Op *op, const std::vector &inputs, const std::vector &outputs) : NPUCommonExecution(b, op) { +} + +ErrorCode NPUFlatten::onResize(const std::vector &inputs, const std::vector &outputs) { + mNpuBackend->setNetworkInput(inputs, mOp); + + auto opName = mOp->name()->str(); + auto xOp = mNpuBackend->getInputOps(mOp); + + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; + shared_ptr flatten(new hiai::op::Flatten(opName)); + (*flatten).set_input_x(*xOp.get()); + mNpuBackend->setOutputOps(mOp, {flatten}, outputs); + return NO_ERROR; +} + +NPUCreatorRegister> __flatten_op(OpType_Flatten); + +} // namespace MNN \ No newline at end of file diff --git a/source/backend/hiai/execution/NPUFlatten.hpp b/source/backend/hiai/execution/NPUFlatten.hpp new file mode 100644 index 000000000..1ad632eae --- /dev/null +++ b/source/backend/hiai/execution/NPUFlatten.hpp @@ -0,0 +1,25 @@ +// +// NPUFlatten.hpp +// MNN +// +// Created by MNN on b'2020/10/15'. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef NPUDEMO_NPUFlatten_HPP +#define NPUDEMO_NPUFlatten_HPP + +#include "NPUCommonExecution.hpp" +#include "NPUBackend.hpp" + +namespace MNN { + +class NPUFlatten : public NPUCommonExecution { +public: + NPUFlatten(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); + ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); + virtual ~NPUFlatten() = default; +}; +} // namespace MNN + +#endif // NPUDEMO_NPUFlatten_HPP diff --git a/source/backend/hiai/execution/NPUGatherV2.cpp b/source/backend/hiai/execution/NPUGatherV2.cpp index 09ec6df72..0a19219e9 100644 --- a/source/backend/hiai/execution/NPUGatherV2.cpp +++ b/source/backend/hiai/execution/NPUGatherV2.cpp @@ -19,29 +19,37 @@ NPUGatherV2::NPUGatherV2(Backend *b, const Op *op, const std::vector & bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT; bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; - if(!isConst0 && isConst1){ - auto input1 = inputs[1]; + if (isConst0 && !isConst1) { + auto input = inputs[0]; // om input weight const op - mConst = hiai::op::Const(opName + "_w_const"); - { - ge::TensorDesc fdesc(ge::Shape({input1->batch(), input1->channel(), input1->height(), input1->width()}), ge::FORMAT_NCHW, ge::DT_FLOAT); // in o h w ? - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)input1->host(), input1->elementSize() * sizeof(float)); - mConst.set_attr_value(filter); + mConst = hiai::op::Const(opName + "_x_const"); + vector dims; + for (int32_t i = 0; i < input->buffer().dimensions; i++) { + dims.push_back(input->buffer().dim[i].extent); } - - }else if(isConst0 && !isConst1){ - auto input0 = inputs[0]; + ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT); // in o h w ? + ge::TensorPtr filter = std::make_shared(); + if (input->getType().code == halide_type_int && input->getType().bits == 32) { + fdesc.SetDataType(ge::DT_INT32); + filter->SetData((uint8_t *)input->host(), input->elementSize() * sizeof(int32_t)); + } else { + filter->SetData((uint8_t *)input->host(), input->elementSize() * sizeof(float)); + } + filter->SetTensorDesc(fdesc); + mConst.set_attr_value(filter); + } else if (!isConst0 && isConst1) { + auto input = inputs[1]; // om input weight const op - mConst = hiai::op::Const(opName + "_w_const"); - { - ge::TensorDesc fdesc(ge::Shape({input0->batch(), input0->channel(), input0->height(), input0->width()}), ge::FORMAT_NCHW, ge::DT_FLOAT); // in o h w ? - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)input0->host(), input0->elementSize() * sizeof(float)); - mConst.set_attr_value(filter); - } + vector dims; + for (int32_t i = 0; i < input->buffer().dimensions; i++) { + dims.push_back(input->buffer().dim[i].extent); + } + mConst = hiai::op::Const(opName + "_i_const"); + ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_INT32); // in o h w ? + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)input->host(), input->elementSize() * sizeof(int32_t)); + mConst.set_attr_value(filter); } } @@ -55,57 +63,54 @@ ErrorCode NPUGatherV2::onResize(const std::vector &inputs, const std:: auto param = mOp->main_as_GatherV2(); shared_ptr prob(new hiai::op::GatherV2D(opName)); - vector, string>> ops; - + shared_ptr castOp(new hiai::op::CastT(opName + "_cast")); bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT; bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + bool isConst2 = TensorUtils::getDescribe(inputs[2])->usage==Tensor::InsideDescribe::Usage::CONSTANT; int axis = 0; - if (inputs.size() == 3) { + if (isConst2 && inputs.size() == 3) { const Tensor *axisTensor = inputs[2]; axis = axisTensor->host()[0]; } - if (axis < 0) { axis = params->buffer().dimensions + axis; } - - if(!isConst0 && isConst1){ - // + auto xOp = mNpuBackend->getInputOps(mOp); + if (!isConst0 && isConst1) { auto inputIndex0 = mOp->inputIndexes()->data()[0]; auto iops0 = mNpuBackend->mGrapMap[inputIndex0]; // x auto xOp0 = iops0.back().first; (*prob) - .set_input_indices(*xOp0.get()) - .set_input_x(mConst) + .set_input_x(*xOp0.get()) + .set_input_indices(mConst) .set_attr_axis(axis); - }else if(isConst0 && !isConst1){ - // + mNpuBackend->setOutputOps(mOp, {prob}, outputs); + } else if (isConst0 && !isConst1){ auto inputIndex1 = mOp->inputIndexes()->data()[1]; auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x auto xOp1 = iops1.back().first; - + (*castOp).set_input_x(*xOp1.get()).set_attr_dst_dtype(ge::DataType::DT_INT32); (*prob) - .set_input_indices(mConst) - .set_input_x(*xOp1.get()) - .set_attr_axis(axis); - }else{ + .set_input_x(mConst) + .set_input_indices(*castOp.get()) + .set_attr_axis(axis); + mNpuBackend->setOutputOps(mOp, {castOp, prob}, outputs); + } else { auto inputIndex = mOp->inputIndexes()->data()[0]; auto iops = mNpuBackend->mGrapMap[inputIndex]; // x - auto xOp = iops.back().first; + xOp = iops.back().first; auto inputIndex1 = mOp->inputIndexes()->data()[1]; auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; // x auto xOp1 = iops1.back().first; - + (*castOp).set_input_x(*xOp1.get()).set_attr_dst_dtype(ge::DataType::DT_INT32); (*prob) - .set_input_indices(*xOp.get()) - .set_input_x(*xOp1.get()) + .set_input_x(*xOp.get()) + .set_input_indices(*castOp.get()) .set_attr_axis(axis); + mNpuBackend->setOutputOps(mOp, {castOp, prob}, outputs); } - - mNpuBackend->setOutputOps(mOp, {prob}, outputs); - return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUInterp.cpp b/source/backend/hiai/execution/NPUInterp.cpp index 0d7b5ac2c..722aef7b1 100644 --- a/source/backend/hiai/execution/NPUInterp.cpp +++ b/source/backend/hiai/execution/NPUInterp.cpp @@ -23,7 +23,10 @@ ErrorCode NPUInterp::onResize(const std::vector &inputs, const std::ve auto xOp = mNpuBackend->getInputOps(mOp); auto resizeType = param->resizeType(); MNN_ASSERT(resizeType <= 3); - + if (resizeType > 3) { + MNN_ERROR("npu Interp not support type: %d", resizeType); + return NOT_SUPPORT; + } vector hw = {outputs[0]->height(),outputs[0]->width()}; mConstShape = hiai::op::Const(opName + "_w_const"); { @@ -34,19 +37,19 @@ ErrorCode NPUInterp::onResize(const std::vector &inputs, const std::ve mConstShape.set_attr_value(filter); } - if(resizeType == 1) { + if (resizeType == 1) { shared_ptr interp(new hiai::op::ResizeNearestNeighbor(opName)); (*interp).set_input_x(*xOp) .set_input_size(mConstShape) .set_attr_align_corners(param->alignCorners()); mNpuBackend->setOutputOps(mOp, {interp}, outputs); - } else if(resizeType == 2) { + } else if (resizeType == 2) { shared_ptr interp(new hiai::op::ResizeBilinear(opName)); (*interp).set_input_x(*xOp) .set_input_size(mConstShape) .set_attr_align_corners(param->alignCorners()); mNpuBackend->setOutputOps(mOp, {interp}, outputs); - } else if(resizeType == 3) { + } else if (resizeType == 3) { shared_ptr interp(new hiai::op::ResizeBilinear(opName)); (*interp).set_input_x(*xOp) .set_input_size(mConstShape) diff --git a/source/backend/hiai/execution/NPULRN.cpp b/source/backend/hiai/execution/NPULRN.cpp new file mode 100644 index 000000000..cdfd54c33 --- /dev/null +++ b/source/backend/hiai/execution/NPULRN.cpp @@ -0,0 +1,50 @@ +// +// NPULRN.cpp +// MNN +// +// Created by MNN on b'2020/10/15'. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "NPULRN.hpp" +#include "NPUBackend.hpp" + +using namespace std; + +namespace MNN { + +NPULRN::NPULRN(MNN::Backend *b, const MNN::Op *op, const std::vector &inputs, const std::vector &outputs) : NPUCommonExecution(b, op) {} + +ErrorCode NPULRN::onResize(const std::vector &inputs, const std::vector &outputs) { + mNpuBackend->setNetworkInput(inputs, mOp); + auto opName = mOp->name()->str(); + shared_ptr lrn(new hiai::op::LRN(opName)); + auto param = mOp->main_as_LRN(); + int32_t depth_radius = param->localSize(); + float bias = param->bias(); + float alpha = param->alpha(); + float beta = param->beta(); + int32_t normRegion = param->regionType(); + string normRegionName = "ACROSS_CHANNELS"; + if (normRegion == 1) { + normRegionName = "WITHIN_CHANNEL"; + } + auto xOp = mNpuBackend->getInputOps(mOp); + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; + (*lrn) + .set_input_x(*xOp.get()) + .set_attr_depth_radius(depth_radius) + .set_attr_bias(bias) + .set_attr_alpha(alpha) + .set_attr_beta(beta) + .set_attr_norm_region(normRegionName); + + mNpuBackend->setOutputOps(mOp, {lrn}, outputs); + return NO_ERROR; +} + +NPUCreatorRegister> __LRN_op(OpType_LRN); + +} // namespace MNN \ No newline at end of file diff --git a/source/backend/hiai/execution/NPULRN.hpp b/source/backend/hiai/execution/NPULRN.hpp new file mode 100644 index 000000000..ca64367b8 --- /dev/null +++ b/source/backend/hiai/execution/NPULRN.hpp @@ -0,0 +1,25 @@ +// +// NPULRN.hpp +// MNN +// +// Created by MNN on b'2020/10/15'. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef NPUDEMO_NPULRN_HPP +#define NPUDEMO_NPULRN_HPP + +#include "NPUCommonExecution.hpp" +#include "NPUBackend.hpp" + +namespace MNN { + +class NPULRN : public NPUCommonExecution { +public: + NPULRN(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); + ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); + virtual ~NPULRN() = default; +}; +} // namespace MNN + +#endif // NPUDEMO_NPULRN_HPP diff --git a/source/backend/hiai/execution/NPULayerNorm.cpp b/source/backend/hiai/execution/NPULayerNorm.cpp new file mode 100644 index 000000000..d0eb8ac90 --- /dev/null +++ b/source/backend/hiai/execution/NPULayerNorm.cpp @@ -0,0 +1,75 @@ +// +// NPULayerNorm.cpp +// MNN +// +// Created by MNN on b'2020/10/15'. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "NPULayerNorm.hpp" +#include "NPUBackend.hpp" + +using namespace std; + +namespace MNN { + +NPULayerNorm::NPULayerNorm(MNN::Backend *b, const MNN::Op *op, const std::vector &inputs, const std::vector &outputs) : NPUCommonExecution(b, op) {} + +ErrorCode NPULayerNorm::onResize(const std::vector &inputs, const std::vector &outputs) { + mNpuBackend->setNetworkInput(inputs, mOp); + auto opName = mOp->name()->str(); + auto param = mOp->main_as_LayerNorm(); + auto xOp = mNpuBackend->getInputOps(mOp); + shared_ptr layerNorm(new hiai::op::LayerNorm(opName)); + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; + + constw = hiai::op::Const(opName + "_w_const"); + constb = hiai::op::Const(opName + "_b_const"); + if (param->gamma() == nullptr && param->beta() == nullptr) { + auto shape = inputs[0]->shape(); + int32_t size = shape[shape.size()-1]; + vector data(size, 1); + vector data1(size, 0); + vector shape1{static_cast(size)}; + ge::TensorDesc fdesc(ge::Shape(shape1), ge::FORMAT_NCHW, ge::DT_FLOAT); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)data.data(), size * sizeof(float)); + constw.set_attr_value(filter); + + ge::TensorDesc fdesc1(ge::Shape(shape1), ge::FORMAT_NCHW, ge::DT_FLOAT); + ge::TensorPtr filter1 = std::make_shared(); + filter1->SetTensorDesc(fdesc1); + filter1->SetData((uint8_t *)data1.data(), size * sizeof(float)); + constb.set_attr_value(filter1); + } else { + uint32_t size = param->gamma()->size(); + vector shape1{size}; + ge::TensorDesc fdesc(ge::Shape(shape1), ge::FORMAT_NCHW, ge::DT_FLOAT); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)param->gamma()->Data(), size * sizeof(float)); + constw.set_attr_value(filter); + + size = param->beta()->size(); + vector shape2{size}; + ge::TensorDesc fdesc1(ge::Shape(shape2), ge::FORMAT_NCHW, ge::DT_FLOAT); + ge::TensorPtr filter1 = std::make_shared(); + filter1->SetTensorDesc(fdesc1); + filter1->SetData((uint8_t *)param->beta()->Data(), size * sizeof(float)); + constb.set_attr_value(filter1); + } + float eps = param->epsilon(); + (*layerNorm).set_input_x(*xOp.get()) + .set_input_gamma(constw) + .set_input_beta(constb) + .set_attr_epsilon(eps); + mNpuBackend->setOutputOps(mOp, {layerNorm}, outputs); + return NO_ERROR; +} + +NPUCreatorRegister> __LayerNorm_op(OpType_LayerNorm); + +} // namespace MNN \ No newline at end of file diff --git a/source/backend/hiai/execution/NPULayerNorm.hpp b/source/backend/hiai/execution/NPULayerNorm.hpp new file mode 100644 index 000000000..e6e3510d4 --- /dev/null +++ b/source/backend/hiai/execution/NPULayerNorm.hpp @@ -0,0 +1,30 @@ +// +// NPULayerNorm.hpp +// MNN +// +// Created by MNN on b'2020/10/15'. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef NPUDEMO_NPULayerNorm_HPP +#define NPUDEMO_NPULayerNorm_HPP + +#include "NPUCommonExecution.hpp" +#include "NPUBackend.hpp" + +namespace MNN { + +class NPULayerNorm : public NPUCommonExecution { +public: + NPULayerNorm(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); + ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); + + virtual ~NPULayerNorm() = default; + +private: + hiai::op::Const constw; + hiai::op::Const constb; +}; +} // namespace MNN + +#endif // NPUDEMO_NPULayerNorm_HPP diff --git a/source/backend/hiai/execution/NPUMatmul.cpp b/source/backend/hiai/execution/NPUMatmul.cpp index efaac5291..1e0722257 100644 --- a/source/backend/hiai/execution/NPUMatmul.cpp +++ b/source/backend/hiai/execution/NPUMatmul.cpp @@ -16,79 +16,110 @@ namespace MNN { NPUMatmul::NPUMatmul(MNN::Backend *b, const MNN::Op *op, const std::vector &inputs, const std::vector &outputs) : NPUCommonExecution(b, op) { auto opName = mOp->name()->str(); + bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT; bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; - if(isConst1){ - auto input1 = inputs[1]; - // om input weight const op + Tensor* input = nullptr; + if (isConst0 && !isConst1){ + input = inputs[0]; + } + if (!isConst0 && isConst1){ + input = inputs[1]; + } + if (input != nullptr) { mConst = ge::op::Const(opName + "_w_const"); - { - ge::TensorPtr filter = std::make_shared(); - ge::TensorDesc fdesc(ge::Shape({inputs[1]->buffer().dim[0].extent, inputs[1]->buffer().dim[1].extent}), ge::FORMAT_NCHW, ge::DT_FLOAT); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)input1->host(), input1->elementSize() * sizeof(float)); - mConst.set_attr_value(filter); + ge::TensorPtr filter = std::make_shared(); + vector dims; + for (int32_t i = 0; i < input->buffer().dimensions; i++) { + dims.push_back(input->buffer().dim[i].extent); } + ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT); + if (input->getType().code == halide_type_int && input->getType().bits == 32) { + fdesc.SetDataType(ge::DT_INT32); + filter->SetData((uint8_t *)input->host(), input->elementSize() * sizeof(int32_t)); + } else { + filter->SetData((uint8_t *)input->host(), input->elementSize() * sizeof(float)); + } + filter->SetTensorDesc(fdesc); + mConst.set_attr_value(filter); } + } ErrorCode NPUMatmul::onResize(const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); - auto opName = mOp->name()->str(); - - // - auto inputIndex1 = mOp->inputIndexes()->data()[0]; - auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; - auto xOp1 = iops1.back().first; - - shared_ptr reshape(new ge::op::Reshape(opName + "_reshape")); - (*reshape).set_input_tensor(*xOp1.get()).set_attr_shape(ge::AttrValue::LIST_INT({inputs[0]->buffer().dim[0].extent, inputs[0]->buffer().dim[1].extent})); - - vector, string>> ops; - auto param = mOp->main_as_MatMul(); - - shared_ptr matmul(new ge::op::MatMul(opName)); - + bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT; bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; - - if(isConst1){ - - (*matmul) - .set_input_x1(*reshape) - .set_input_x2(mConst) - .set_attr_transpose_x1(param->transposeA()) - .set_attr_transpose_x2(param->transposeB()); - - shared_ptr reshape3(new ge::op::Reshape(opName + "_reshape3")); - auto shape = tensorShapeFormat(outputs[0]); - (*reshape3).set_input_tensor(*matmul).set_attr_shape(ge::AttrValue::LIST_INT(shape)); - - mNpuBackend->setOutputOps(mOp, {reshape, matmul, reshape3}, outputs); - - }else{ -//hangxing todo - - auto inputIndex2 = mOp->inputIndexes()->data()[1]; - auto iops2 = mNpuBackend->mGrapMap[inputIndex2]; - auto xOp2 = iops2.back().first; - shared_ptr reshape2(new ge::op::Reshape(opName + "_reshape2")); - (*reshape2).set_input_tensor(*xOp2.get()).set_attr_shape(ge::AttrValue::LIST_INT({inputs[1]->buffer().dim[0].extent, inputs[1]->buffer().dim[1].extent})); - - (*matmul) - .set_input_x1(*reshape) - .set_input_x2(*reshape2) - .set_attr_transpose_x1(!param->transposeA()) - .set_attr_transpose_x2(param->transposeB()); - - shared_ptr permute(new ge::op::Permute(opName + "_permute")); - (*permute).set_input_x(*matmul).set_attr_order(ge::AttrValue::LIST_INT({1,0})); - - shared_ptr reshape3(new ge::op::Reshape(opName + "_reshape3")); - (*reshape3).set_input_tensor(*permute).set_attr_shape(ge::AttrValue::LIST_INT({1, outputs[0]->buffer().dim[1].extent, outputs[0]->buffer().dim[0].extent, 1})); - - mNpuBackend->setOutputOps(mOp, {reshape, reshape2, matmul, permute, reshape3}, outputs); - + auto param = mOp->main_as_MatMul(); + if (outputs[0]->buffer().dimensions == 4 || outputs[0]->buffer().dimensions == 3) { + shared_ptr matmul(new hiai::op::BatchMatMul(opName)); + if (isConst0 && !isConst1) { + auto inputIndex1 = mOp->inputIndexes()->data()[1]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; + auto xOp1 = iops1.back().first; + (*matmul) + .set_input_x1(mConst) + .set_input_x2(*xOp1.get()) + .set_attr_adj_x1(param->transposeA()) + .set_attr_adj_x2(param->transposeB()); + } else if (!isConst0 && isConst1) { + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + auto xOp = iops.back().first; + (*matmul) + .set_input_x1(*xOp.get()) + .set_input_x2(mConst) + .set_attr_adj_x1(param->transposeA()) + .set_attr_adj_x2(param->transposeB()); + } else { + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + auto xOp = iops.back().first; + auto inputIndex1 = mOp->inputIndexes()->data()[1]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; + auto xOp1 = iops1.back().first; + (*matmul) + .set_input_x1(*xOp.get()) + .set_input_x2(*xOp1.get()) + .set_attr_adj_x1(param->transposeA()) + .set_attr_adj_x2(param->transposeB()); + } + mNpuBackend->setOutputOps(mOp, {matmul}, outputs); + } else { + shared_ptr matmul(new ge::op::MatMul(opName)); + if (isConst0 && !isConst1) { + auto inputIndex1 = mOp->inputIndexes()->data()[1]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; + auto xOp1 = iops1.back().first; + (*matmul) + .set_input_x1(mConst) + .set_input_x2(*xOp1.get()) + .set_attr_transpose_x1(param->transposeA()) + .set_attr_transpose_x2(param->transposeB()); + } else if (!isConst0 && isConst1) { + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + auto xOp = iops.back().first; + (*matmul) + .set_input_x1(*xOp.get()) + .set_input_x2(mConst) + .set_attr_transpose_x1(param->transposeA()) + .set_attr_transpose_x2(param->transposeB()); + } else { + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; + auto xOp = iops.back().first; + auto inputIndex1 = mOp->inputIndexes()->data()[1]; + auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; + auto xOp1 = iops1.back().first; + (*matmul) + .set_input_x1(*xOp.get()) + .set_input_x2(*xOp1.get()) + .set_attr_transpose_x1(param->transposeA()) + .set_attr_transpose_x2(param->transposeB()); + } + mNpuBackend->setOutputOps(mOp, {matmul}, outputs); } return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUPack.cpp b/source/backend/hiai/execution/NPUPack.cpp index a2b3aa2fb..593d8691e 100644 --- a/source/backend/hiai/execution/NPUPack.cpp +++ b/source/backend/hiai/execution/NPUPack.cpp @@ -17,17 +17,18 @@ NPUPack::NPUPack(MNN::Backend *b, const MNN::Op *op, const std::vector ErrorCode NPUPack::onResize(const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); - + auto xOp = mNpuBackend->getInputOps(mOp); auto opName = mOp->name()->str(); shared_ptr pack(new hiai::op::Pack(opName)); - auto param = mOp->main_as_PackParam(); - - auto xOp = mNpuBackend->getInputOps(mOp); - (*pack) - .set_dynamic_input_x(0, *xOp.get()) - .set_attr_axis(axisFormat(inputs[0], param->axis())); - + int64_t N = inputs.size(); + (*pack).create_dynamic_input_x(N).set_attr_axis(param->axis()).set_attr_N(N); + for (int32_t i = 0; i < inputs.size(); i++) { + auto inputIndex = mOp->inputIndexes()->data()[i]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; + (*pack).set_dynamic_input_x(i+1, *xOp.get()); + } mNpuBackend->setOutputOps(mOp, {pack}, outputs); return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUPack.hpp b/source/backend/hiai/execution/NPUPack.hpp index a4b6dd03d..1470abb43 100644 --- a/source/backend/hiai/execution/NPUPack.hpp +++ b/source/backend/hiai/execution/NPUPack.hpp @@ -19,8 +19,6 @@ class NPUPack : public NPUCommonExecution { NPUPack(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); virtual ~NPUPack() = default; - -private: }; } // namespace MNN diff --git a/source/backend/hiai/execution/NPUPadding.cpp b/source/backend/hiai/execution/NPUPadding.cpp index cc7cf4644..a9ce6bff5 100644 --- a/source/backend/hiai/execution/NPUPadding.cpp +++ b/source/backend/hiai/execution/NPUPadding.cpp @@ -22,9 +22,9 @@ NPUPadding::NPUPadding(Backend *b, const Op *op, const std::vector &in //MNN_PRINT("Padding input1->buffer().dim[0].extent=%d\n",input1->buffer().dim[0].extent); if (input1->buffer().dim[0].extent == 3) { mPadData = {0, 0, data[4], data[5], data[0], data[1], data[2], data[3]}; - } else if (input1->buffer().dim[0].extent == 4) { + } else if ((input1->buffer().dim[0].extent == 4) || (input1->buffer().dim[0].extent == 8)) { mPadData = {data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]}; - } + } // om input weight const op mConst = hiai::op::Const(opName + "_w_const"); { @@ -41,12 +41,11 @@ ErrorCode NPUPadding::onResize(const std::vector &inputs, const std::v auto opName = mOp->name()->str(); auto xOp = mNpuBackend->getInputOps(mOp); - shared_ptr padding(new hiai::op::Pad(opName)); - + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; (*padding).set_input_x(*xOp.get()).set_input_paddings(mConst); - - mNpuBackend->setOutputOps(mOp, {padding}, outputs); return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUPermute.cpp b/source/backend/hiai/execution/NPUPermute.cpp new file mode 100644 index 000000000..a16178604 --- /dev/null +++ b/source/backend/hiai/execution/NPUPermute.cpp @@ -0,0 +1,41 @@ +// +// NPUPermute.cpp +// MNN +// +// Created by MNN on b'2020/10/15'. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "NPUPermute.hpp" +#include "NPUBackend.hpp" + +using namespace std; + +namespace MNN { + +NPUPermute::NPUPermute(MNN::Backend *b, const MNN::Op *op, const std::vector &inputs, const std::vector &outputs) : NPUCommonExecution(b, op) { +} + +ErrorCode NPUPermute::onResize(const std::vector &inputs, const std::vector &outputs) { + mNpuBackend->setNetworkInput(inputs, mOp); + auto opName = mOp->name()->str(); + auto xOp = mNpuBackend->getInputOps(mOp); + shared_ptr permute(new hiai::op::Permute(opName)); + + auto param = mOp->main_as_Permute(); + auto axis = param->dims(); + int32_t size = param->dims()->size(); + vector dims; + for (int32_t i = 0; i < size; i++) { + int32_t index = axis->Get(i); + dims.push_back(index); + } + int index = mOp->inputIndexes()->data()[0]; + (*permute).set_input_x(*xOp.get()).set_attr_order(dims); + mNpuBackend->setOutputOps(mOp, {permute}, outputs); + return NO_ERROR; +} + +NPUCreatorRegister> __permute_op(OpType_Permute); + +} // namespace MNN \ No newline at end of file diff --git a/source/backend/hiai/execution/NPUPermute.hpp b/source/backend/hiai/execution/NPUPermute.hpp new file mode 100644 index 000000000..a68420d22 --- /dev/null +++ b/source/backend/hiai/execution/NPUPermute.hpp @@ -0,0 +1,28 @@ +// +// NPUPermute.hpp +// MNN +// +// Created by MNN on b'2020/10/15'. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef NPUDEMO_NPUPermute_HPP +#define NPUDEMO_NPUPermute_HPP + +#include "NPUCommonExecution.hpp" +#include "NPUBackend.hpp" + +namespace MNN { + +class NPUPermute : public NPUCommonExecution { +public: + NPUPermute(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); + ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); + virtual ~NPUPermute() = default; +private: + vector mNHWC{0, 1, 2, 3}; + vector mNCHW{0, 2, 3, 1}; +}; +} // namespace MNN + +#endif // NPUDEMO_NPUPermute_HPP diff --git a/source/backend/hiai/execution/NPUPooling.cpp b/source/backend/hiai/execution/NPUPooling.cpp index 2f9c2efe5..c9a2294e8 100644 --- a/source/backend/hiai/execution/NPUPooling.cpp +++ b/source/backend/hiai/execution/NPUPooling.cpp @@ -36,6 +36,7 @@ ErrorCode NPUPooling::onResize(const std::vector &inputs, const std::v } // 0:max pooling 1:avg pooling 2:L2 pooling + bool ceilMode = poolParam->ceilModel(); auto mode = 0; // TODO if (PoolType_MAXPOOL == poolParam->type()) { mode = 0; @@ -53,13 +54,25 @@ ErrorCode NPUPooling::onResize(const std::vector &inputs, const std::v int64_t strideWidth = std::max(poolParam->strideX(), 1); int64_t strideHeight = std::max(poolParam->strideY(), 1); - + vector pads; + if (poolParam->pads() != nullptr) { + int32_t size = poolParam->pads()->size() / 2; + for (int32_t i = 0; i < size; i++) { + pads.push_back(static_cast(poolParam->pads()->data()[i])); + pads.push_back(static_cast(poolParam->pads()->data()[i + size])); + } + } else { + pads.push_back(static_cast(poolParam->padY())); + pads.push_back(static_cast(poolParam->padY())); + pads.push_back(static_cast(poolParam->padX())); + pads.push_back(static_cast(poolParam->padX())); + } if (poolParam->isGlobal() == true && kernelH%2 == 0 && kernelW%2==0 && kernelH*kernelW >65535) { shared_ptr pooling2X2(new hiai::op::PoolingD(opName+"_2x2")); (*pooling2X2) .set_input_x(*xOp.get()).set_attr_data_mode(0) - .set_attr_pad_mode(0).set_attr_ceil_mode(0) + .set_attr_pad_mode(0).set_attr_ceil_mode(ceilMode) .set_attr_mode(mode) .set_attr_pad(ge::AttrValue::LIST_INT({0, 0, 0, 0})) // 上下左右 .set_attr_window(ge::AttrValue::LIST_INT({2, 2})) @@ -69,10 +82,9 @@ ErrorCode NPUPooling::onResize(const std::vector &inputs, const std::v .set_input_x(*pooling2X2.get()) .set_attr_data_mode(data_mode) // data_mode, DOMI_CAFFE_DATA_MODE =0, TENSORFLOW_DATA_MODE = 1. TODO .set_attr_pad_mode(pad_mode) - .set_attr_ceil_mode(0) // pooling ceil mode, 0: DOMI_POOLING_CEIL, 1:DOMI_POOLING_FLOOR + .set_attr_ceil_mode(ceilMode) // pooling ceil mode, 0: DOMI_POOLING_CEIL, 1:DOMI_POOLING_FLOOR .set_attr_mode(mode) - .set_attr_pad(ge::AttrValue::LIST_INT( - {poolParam->padY(), poolParam->padY(), poolParam->padX(), poolParam->padX()})) // 上下左右 + .set_attr_pad(pads) // 上下左右 .set_attr_window(ge::AttrValue::LIST_INT({kernelH/2, kernelW/2})) .set_attr_stride(ge::AttrValue::LIST_INT({strideHeight, strideWidth})) .set_attr_global_pooling(poolParam->isGlobal()); @@ -82,10 +94,9 @@ ErrorCode NPUPooling::onResize(const std::vector &inputs, const std::v .set_input_x(*xOp.get()) .set_attr_data_mode(data_mode) // data_mode, DOMI_CAFFE_DATA_MODE =0, TENSORFLOW_DATA_MODE = 1. TODO .set_attr_pad_mode(pad_mode) - .set_attr_ceil_mode(0) // pooling ceil mode, 0: DOMI_POOLING_CEIL, 1:DOMI_POOLING_FLOOR + .set_attr_ceil_mode(ceilMode) // pooling ceil mode, 0: DOMI_POOLING_CEIL, 1:DOMI_POOLING_FLOOR .set_attr_mode(mode) - .set_attr_pad(ge::AttrValue::LIST_INT( - {poolParam->padY(), poolParam->padY(), poolParam->padX(), poolParam->padX()})) // 上下左右 + .set_attr_pad(pads) // 上下左右 .set_attr_window(ge::AttrValue::LIST_INT({kernelH, kernelW})) .set_attr_stride(ge::AttrValue::LIST_INT({strideHeight, strideWidth})) .set_attr_global_pooling(poolParam->isGlobal()); diff --git a/source/backend/hiai/execution/NPUPooling3D.cpp b/source/backend/hiai/execution/NPUPooling3D.cpp new file mode 100644 index 000000000..51ed25407 --- /dev/null +++ b/source/backend/hiai/execution/NPUPooling3D.cpp @@ -0,0 +1,87 @@ +// +// NPUPooling3D.cpp +// MNN +// +// Created by MNN on 2019/09/07. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "NPUPooling3D.hpp" +#include "NPUBackend.hpp" + +using namespace std; + +namespace MNN { + +NPUPooling3D::NPUPooling3D(MNN::Backend *b, const MNN::Op *op, const std::vector &inputs, const std::vector &outputs) : NPUCommonExecution(b, op) { +} + +ErrorCode NPUPooling3D::onResize(const std::vector &inputs, const std::vector &outputs) { + mNpuBackend->setNetworkInput(inputs, mOp); + auto opName = mOp->name()->str(); + shared_ptr poolingD(new hiai::op::PoolingD(opName)); + + auto poolParam = mOp->main_as_Pool3D(); + + // 0:NOTSET, 6:SAME 5:VALID. defaul default value is 0:NOTSET + auto pad_mode = 0; + int data_mode = 0; + + if (PoolPadType_VALID == poolParam->padType()) { + pad_mode = 5; + data_mode = 1; + } else if (PoolPadType_SAME == poolParam->padType()) { + pad_mode = 6; + data_mode = 1; + } + + bool ceilMode = 0; + // 0:max Pooling3D 1:avg Pooling3D 2:L2 Pooling3D + auto mode = 0; // TODO + if (PoolType_MAXPOOL == poolParam->type()) { + mode = 0; + } else if (PoolType_AVEPOOL == poolParam->type()) { + mode = 1; + } + int64_t kernelH = 1; + int64_t kernelW = 1; + if(poolParam->isGlobal() == true) { + kernelH = inputs[0]->height(); + kernelW = inputs[0]->width(); + } + + auto xOp = mNpuBackend->getInputOps(mOp); + + int64_t strideWidth = 1; + int64_t strideHeight = 1; + vector pads; + if (poolParam->pads() != nullptr) { + int32_t size = poolParam->pads()->size() / 2; + for (int32_t i = 0; i < size; i++) { + pads.push_back(static_cast(poolParam->pads()->data()[i])); + pads.push_back(static_cast(poolParam->pads()->data()[i + size])); + } + } else { + pads.push_back(0); + pads.push_back(0); + pads.push_back(0); + pads.push_back(0); + } + (*poolingD) + .set_input_x(*xOp.get()) + .set_attr_data_mode(data_mode) // data_mode, DOMI_CAFFE_DATA_MODE =0, TENSORFLOW_DATA_MODE = 1. TODO + .set_attr_pad_mode(pad_mode) + .set_attr_ceil_mode(0) // Pooling3D ceil mode, 0: DOMI_Pooling3D_CEIL, 1:DOMI_Pooling3D_FLOOR + .set_attr_mode(mode) + .set_attr_pad(pads) // 上下左右 + .set_attr_window(ge::AttrValue::LIST_INT({kernelH, kernelW})) + .set_attr_stride(ge::AttrValue::LIST_INT({strideHeight, strideWidth})) + .set_attr_global_pooling(true); + + mNpuBackend->setOutputOps(mOp, {poolingD}, outputs); + return NO_ERROR; +} + +NPUCreatorRegister> __Pooling3D_op(OpType_Pooling3D); + +} // namespace MNN \ No newline at end of file diff --git a/source/backend/hiai/execution/NPUPooling3D.hpp b/source/backend/hiai/execution/NPUPooling3D.hpp new file mode 100644 index 000000000..b529311eb --- /dev/null +++ b/source/backend/hiai/execution/NPUPooling3D.hpp @@ -0,0 +1,25 @@ +// +// NPUPooling3D.hpp +// MNN +// +// Created by MNN on 2019/09/07. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef MNN_NPUPooling3D_HPP +#define MNN_NPUPooling3D_HPP + +#include "NPUCommonExecution.hpp" + +namespace MNN { + +class NPUPooling3D : public NPUCommonExecution { +public: + NPUPooling3D(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); + ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); + virtual ~NPUPooling3D() = default; +}; + +} // namespace MNN + +#endif // MNN_NPUPooling3D_HPP diff --git a/source/backend/hiai/execution/NPUReduction.cpp b/source/backend/hiai/execution/NPUReduction.cpp index 560028ef7..5927ebb54 100644 --- a/source/backend/hiai/execution/NPUReduction.cpp +++ b/source/backend/hiai/execution/NPUReduction.cpp @@ -16,18 +16,6 @@ namespace MNN { NPUReduction::NPUReduction(MNN::Backend *b, const MNN::Op *op, const std::vector &inputs, const std::vector &outputs) : NPUCommonExecution(b, op) { } -vector NPUReduction::convertAxis(vector origAxis, Tensor * input) -{ - vector newAxis(origAxis.size(),0); - int step = TensorUtils::getDescribe(input)->dimensionFormat == MNN_DATA_FORMAT_NCHW ? 0 : 1; - int index = step + (input->buffer().dimensions-1)*2; - for (size_t i = 0; i < origAxis.size(); i++) { - newAxis[i] = axisMap[index][origAxis[i]]; - MNN_PRINT("i = %d, newAxis[i] = %ld\n",i,newAxis[i]); - } - return newAxis; -} - ErrorCode NPUReduction::onResize(const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); @@ -37,41 +25,40 @@ ErrorCode NPUReduction::onResize(const std::vector &inputs, const std: auto xOp = mNpuBackend->getInputOps(mOp); - vector origAxis; - vector axis; + vector origAxis; auto reduct = mOp->main_as_ReductionParam(); - if (nullptr != reduct->dim()) { - for (int i = 0; i < reduct->dim()->size(); ++i) { - origAxis.push_back(reduct->dim()->data()[i]); - } - }else if(inputs.size() == 2){ - for (int i = 0; i < inputs[1]->length(0);++i) { + + if (inputs.size() >= 2) { + for (int i = 0; i < inputs[1]->elementSize(); ++i) { int32_t *reduce_dim = inputs[1]->host(); origAxis.push_back(reduce_dim[i]); } - }else{ + } else if (nullptr != reduct->dim()) { + for (int i = 0; i < reduct->dim()->size(); ++i) { + origAxis.push_back(reduct->dim()->data()[i]); + } + } else { MNN_ASSERT(false); } - - axis = convertAxis(origAxis,inputs[0]); - mConstAxis = hiai::op::Const(opName + "_axis"); { - ge::TensorDesc fdesc(ge::Shape({static_cast(axis.size())}), ge::FORMAT_ND, ge::DT_INT32); + ge::TensorDesc fdesc(ge::Shape({static_cast(origAxis.size())}), ge::FORMAT_ND, ge::DT_INT32); ge::TensorPtr constTensor = std::make_shared(); constTensor->SetTensorDesc(fdesc); - constTensor->SetData((uint8_t *)(axis.data()), axis.size()*sizeof(float)); + constTensor->SetData((uint8_t *)(origAxis.data()), origAxis.size()*sizeof(int32_t)); mConstAxis.set_attr_value(constTensor); } - std::vector shapeDims (tensorShapeFormat(outputs[0]).begin(), tensorShapeFormat(outputs[0]).end()); + vector dims; + for (int32_t i = 0; i < outputs[0]->buffer().dimensions; i++) { + dims.push_back(outputs[0]->buffer().dim[i].extent); + } shapeConst = hiai::op::Const(opName + "_shape_const"); { - ge::TensorDesc fdesc(ge::Shape({static_cast(shapeDims.size())}), + ge::TensorDesc fdesc(ge::Shape({static_cast(dims.size())}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr filter = std::make_shared(); filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)shapeDims.data(), shapeDims.size() * sizeof(int32_t)); - + filter->SetData((uint8_t *)dims.data(), dims.size() * sizeof(int32_t)); shapeConst.set_attr_value(filter); } @@ -101,6 +88,10 @@ ErrorCode NPUReduction::onResize(const std::vector &inputs, const std: } } else if(type == ReductionType_ANY) { shared_ptr reduction(new ge::op::ReduceAll(opName)); + vector axis; + for (int32_t j = 0; j < origAxis.size(); j++) { + axis.push_back(static_cast(origAxis[j])); + } (*reduction) .set_input_x(*xOp.get()).set_attr_axes(axis) .set_attr_keep_dims(mOp->main_as_ReductionParam()->keepDims()); diff --git a/source/backend/hiai/execution/NPUReduction.hpp b/source/backend/hiai/execution/NPUReduction.hpp index f10368b1b..ad9ca1e14 100644 --- a/source/backend/hiai/execution/NPUReduction.hpp +++ b/source/backend/hiai/execution/NPUReduction.hpp @@ -21,15 +21,8 @@ class NPUReduction : public NPUCommonExecution { virtual ~NPUReduction() = default; private: - vector convertAxis(vector origAxis, Tensor * input); hiai::op::Const mConstAxis; hiai::op::Const shapeConst; - - int axisMap[8][4] = {{0},{1}, //mNCHW1d,mNHWC1d - {0, 1},{2, 1}, //mNCHW2d,mNHWC2d - {0, 1, 2},{2, 3, 1}, //mNCHW3d,mNHWC3d - {0, 1, 2, 3},{0, 2, 3, 1}, //mNCHW4d,mNHWC4d - }; }; } // namespace MNN diff --git a/source/backend/hiai/execution/NPUReshape.cpp b/source/backend/hiai/execution/NPUReshape.cpp index 74a60068c..825173434 100644 --- a/source/backend/hiai/execution/NPUReshape.cpp +++ b/source/backend/hiai/execution/NPUReshape.cpp @@ -32,13 +32,9 @@ static bool isSameDims(Tensor * input,Tensor * output) ErrorCode NPUReshape::onResize(const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); - - auto input = inputs[0]; auto opName = mOp->name()->str(); shared_ptr reshape(new hiai::op::Reshape(opName)); - - auto shapeFormt = tensorShapeFormat(outputs[0]); - std::vector shape(shapeFormt.begin(), shapeFormt.end()); + std::vector shape = outputs[0]->shape(); shapeConst = hiai::op::Const(opName + "_shape_const"); { ge::TensorDesc fdesc(ge::Shape({static_cast(shape.size())}), @@ -46,46 +42,19 @@ ErrorCode NPUReshape::onResize(const std::vector &inputs, const std::v ge::TensorPtr filter = std::make_shared(); filter->SetTensorDesc(fdesc); filter->SetData((uint8_t *)shape.data(), shape.size() * sizeof(int32_t)); - shapeConst.set_attr_value(filter); } - - auto inputDims = tensorShapeFormat(inputs[0]); - auto shapeDims = tensorShapeFormat(outputs[0]); - + auto xOp = mNpuBackend->getInputOps(mOp); auto inputIndex = mOp->inputIndexes()->data()[0]; - auto iops = mNpuBackend->mGrapMap[inputIndex]; // x - auto xOp = iops.back().first; - - if ((TensorUtils::getDescribe(input)->dimensionFormat != MNN::MNN_DATA_FORMAT_NHWC) || - (isSameDims(input, outputs[0]) || (inputDims == shapeDims))) { - (*reshape).set_input_x(*xOp).set_input_shape(shapeConst); - mNpuBackend->setOutputOps(mOp, {reshape}, outputs); + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*reshape).set_input_x(*xOp.get()); } else { - shared_ptr permute1(new hiai::op::Permute(opName+"_perm1")); - shared_ptr permute2(new hiai::op::Permute(opName+"_perm2")); - (*permute1) - .set_input_x(*xOp.get()) - .set_attr_order(ge::AttrValue::LIST_INT({0,2,3,1})); - vector nhwcShape = {static_cast(shapeDims[0]), static_cast(shapeDims[2]), - static_cast(shapeDims[3]), static_cast(shapeDims[1])}; - nhwshapeConst = hiai::op::Const(opName + "_nhwshape_const"); - { - ge::TensorDesc fdesc(ge::Shape({4}), ge::FORMAT_NCHW, ge::DT_INT32); - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)nhwcShape.data(), nhwcShape.size() * sizeof(int32_t)); - - nhwshapeConst.set_attr_value(filter); - } - (*reshape) - .set_input_x(*permute1.get()) - .set_input_shape(nhwshapeConst); - (*permute2) - .set_input_x(*reshape.get()) - .set_attr_order(ge::AttrValue::LIST_INT({0,3,1,2})); - mNpuBackend->setOutputOps(mOp, {permute1,reshape,permute2}, outputs); + (*reshape).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); } + (*reshape).set_input_shape(shapeConst); + mNpuBackend->setOutputOps(mOp, {reshape}, outputs); return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUScale.cpp b/source/backend/hiai/execution/NPUScale.cpp index e7633effc..2a6e59337 100644 --- a/source/backend/hiai/execution/NPUScale.cpp +++ b/source/backend/hiai/execution/NPUScale.cpp @@ -35,7 +35,6 @@ ErrorCode NPUScale::onResize(const std::vector &inputs, const std::vec ge::TensorPtr filter = std::make_shared(); filter->SetTensorDesc(fdesc); filter->SetData((uint8_t *)scaleData->data(), scaleData->size() * sizeof(float)); - mConst_fliter.set_attr_value(filter); } // om input bias const op @@ -45,14 +44,32 @@ ErrorCode NPUScale::onResize(const std::vector &inputs, const std::vec ge::TensorPtr filter = std::make_shared(); filter->SetTensorDesc(fdesc); filter->SetData((uint8_t *)biasData->data(), biasData->size() * sizeof(float)); - mConst_bias.set_attr_value(filter); } - - (*scale).set_input_x(*xOp.get()).set_input_scale(mConst_fliter).set_input_bias(mConst_bias); - - mNpuBackend->setOutputOps(mOp, {scale}, outputs); - + if (inputs[0]->buffer().dimensions == 2) { + vector shape; + for (int32_t i = 0; i < inputs[0]->buffer().dimensions; i++) { + shape.push_back(inputs[0]->buffer().dim[i].extent); + } + for (int32_t i = inputs[0]->buffer().dimensions; i < 4; i++) { + shape.push_back(1); + } + shapeConst = hiai::op::Const(opName + "_shape_const"); + { + ge::TensorDesc fdesc(ge::Shape({static_cast(shape.size())}), ge::FORMAT_NCHW, ge::DT_INT32); // in o h w ? + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)shape.data(), shape.size() * sizeof(int32_t)); + shapeConst.set_attr_value(filter); + } + shared_ptr reshape(new hiai::op::Reshape(opName + "_reshape")); + (*reshape).set_input_x(*xOp.get()).set_input_shape(shapeConst); + (*scale).set_input_x(*reshape.get()).set_input_scale(mConst_fliter).set_input_bias(mConst_bias); + mNpuBackend->setOutputOps(mOp, {reshape, scale}, outputs); + } else { + (*scale).set_input_x(*xOp.get()).set_input_scale(mConst_fliter).set_input_bias(mConst_bias); + mNpuBackend->setOutputOps(mOp, {scale}, outputs); + } return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUScale.hpp b/source/backend/hiai/execution/NPUScale.hpp index 4bd90515e..aaccad4c6 100644 --- a/source/backend/hiai/execution/NPUScale.hpp +++ b/source/backend/hiai/execution/NPUScale.hpp @@ -23,6 +23,7 @@ class NPUScale : public NPUCommonExecution { private: hiai::op::Const mConst_fliter; hiai::op::Const mConst_bias; + hiai::op::Const shapeConst; }; } // namespace MNN diff --git a/source/backend/hiai/execution/NPUSlice.cpp b/source/backend/hiai/execution/NPUSlice.cpp index 0a846bc75..99ea9310c 100644 --- a/source/backend/hiai/execution/NPUSlice.cpp +++ b/source/backend/hiai/execution/NPUSlice.cpp @@ -23,23 +23,25 @@ ErrorCode NPUSlice::onResize(const std::vector &inputs, const std::vec auto param = mOp->main_as_Slice(); auto axis = param->axis(); - if (axis < 0) { - axis = axis + inputs[0]->dimensions(); + int64_t slice_num = 0; + if (param->slicePoints() != nullptr) { + if (param->slicePoints()->size() < outputs.size()) { + slice_num = static_cast(outputs.size()); + } else if (param->slicePoints()->size() == 1) { + slice_num = static_cast(param->slicePoints()->Get(0)); + } else { + slice_num = static_cast(param->slicePoints()->size()); + } + } else { + slice_num = static_cast(outputs.size()); } - - if(TensorUtils::getDescribe(inputs[0])->dimensionFormat == MNN_DATA_FORMAT_NHWC){ - axis = mNCHW[axis]; - }else{ - axis = mNHWC[axis]; - } - auto xOp = mNpuBackend->getInputOps(mOp); (*slice) .set_input_x(*xOp.get()) .set_attr_split_dim(axis) - .set_attr_num_split(outputs.size()) - .create_dynamic_output_y(outputs.size()); + .set_attr_num_split(slice_num) + .create_dynamic_output_y(slice_num); mNpuBackend->setOutputOps(mOp, {slice}, outputs); return NO_ERROR; diff --git a/source/backend/hiai/execution/NPUSlice.hpp b/source/backend/hiai/execution/NPUSlice.hpp index 75c4b48c2..7a64bed52 100644 --- a/source/backend/hiai/execution/NPUSlice.hpp +++ b/source/backend/hiai/execution/NPUSlice.hpp @@ -19,10 +19,6 @@ class NPUSlice : public NPUCommonExecution { NPUSlice(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); virtual ~NPUSlice() = default; - -private: - vector mNHWC{0, 1, 2, 3}; - vector mNCHW{0, 2, 3, 1}; }; } // namespace MNN diff --git a/source/backend/hiai/execution/NPUSliceTf.cpp b/source/backend/hiai/execution/NPUSliceTf.cpp index b7bb518be..45c977b86 100644 --- a/source/backend/hiai/execution/NPUSliceTf.cpp +++ b/source/backend/hiai/execution/NPUSliceTf.cpp @@ -25,38 +25,33 @@ NPUSliceTf::NPUSliceTf(MNN::Backend *b, const MNN::Op *op, const std::vectorbatch(), input1->channel(), input1->height(), input1->width()}), ge::FORMAT_NCHW, ge::DT_FLOAT); // in o h w ? + ge::TensorDesc fdesc(ge::Shape({input1->elementSize()}), ge::FORMAT_NCHW, ge::DT_INT32); // in o h w ? ge::TensorPtr filter = std::make_shared(); filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)input1->host(), input1->elementSize() * sizeof(float)); + filter->SetData((uint8_t *)input1->host(), input1->elementSize() * sizeof(int32_t)); mConst_start.set_attr_value(filter); } mConst_size = hiai::op::Const(opName + "_size_const"); { auto input1 = inputs[2]; - ge::TensorDesc fdesc(ge::Shape({input1->batch(), input1->channel(), input1->height(), input1->width()}), ge::FORMAT_NCHW, ge::DT_FLOAT); // in o h w ? + ge::TensorDesc fdesc(ge::Shape({input1->elementSize()}), ge::FORMAT_NCHW, ge::DT_INT32); // in o h w ? ge::TensorPtr filter = std::make_shared(); filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)input1->host(), input1->elementSize() * sizeof(float)); + filter->SetData((uint8_t *)input1->host(), input1->elementSize() * sizeof(int32_t)); mConst_size.set_attr_value(filter); } } ErrorCode NPUSliceTf::onResize(const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); - auto opName = mOp->name()->str(); - shared_ptr slice(new hiai::op::Slice(opName)); - auto xOp = mNpuBackend->getInputOps(mOp); - auto dims = outputs[0]->shape(); - (*slice).set_input_x(*xOp) - .set_input_offsets(mConst_start) - .set_input_size(mConst_size); + .set_input_offsets(mConst_start) + .set_input_size(mConst_size); mNpuBackend->setOutputOps(mOp, {slice}, outputs); return NO_ERROR; diff --git a/source/backend/hiai/execution/NPUSoftmax.cpp b/source/backend/hiai/execution/NPUSoftmax.cpp index f5d5b9c5d..289105f8e 100644 --- a/source/backend/hiai/execution/NPUSoftmax.cpp +++ b/source/backend/hiai/execution/NPUSoftmax.cpp @@ -22,52 +22,11 @@ ErrorCode NPUSoftmax::onResize(const std::vector &inputs, const std::v auto param = mOp->main_as_Axis(); auto xOp = mNpuBackend->getInputOps(mOp); + shared_ptr softmax(new hiai::op::Softmax(opName)); - auto shape = tensorShapeFormat(inputs[0]); - if(shape[1] > 10000 && shape[0] == shape[2] == shape[3] == 1){ - mConstSub = hiai::op::Const(opName + "_sub_n"); - { - ge::TensorDesc fdesc(ge::Shape({1,shape[1],1,1}), ge::FORMAT_NCHW, ge::DT_FLOAT); - ge::TensorPtr constTensor = std::make_shared(); - constTensor->SetTensorDesc(fdesc); - vector x(shape[1], 50); - constTensor->SetData((uint8_t *)(x.data()), x.size()*sizeof(float)); - mConstSub.set_attr_value(constTensor); - } + (*softmax).set_input_x(*xOp.get()).set_attr_axis(param->axis()); - shared_ptr sub(new hiai::op::Sub(opName + "_sub")); - (*sub).set_input_x1(*xOp.get()).set_input_x2(mConstSub); - - shared_ptr exp(new hiai::op::Exp(opName + "_exp")); - (*exp).set_input_x(*sub.get()); - - mConstAxis = hiai::op::Const(opName + "_axis"); - { - ge::TensorDesc fdesc(ge::Shape({1}), ge::FORMAT_ND, ge::DT_INT32); - ge::TensorPtr constTensor = std::make_shared(); - constTensor->SetTensorDesc(fdesc); - int x = axisFormat(inputs[0], param->axis()); - constTensor->SetData((uint8_t *)(&x), sizeof(int)); - mConstAxis.set_attr_value(constTensor); - } - shared_ptr sum(new hiai::op::ReduceSum(opName + "_sum")); - (*sum).set_input_x(*exp.get()).set_input_axes(mConstAxis).set_attr_keep_dims(true); - - shared_ptr rec(new hiai::op::Reciprocal(opName + "_rec")); - (*rec).set_input_x(*sum.get()); - - shared_ptr mul(new hiai::op::Mul(opName + "_mul")); - (*mul).set_input_x1(*exp.get()).set_input_x2(*rec.get()); - - mNpuBackend->setOutputOps(mOp, {sub, exp, sum, rec, mul}, outputs); - - }else{ - shared_ptr softmax(new hiai::op::Softmax(opName)); - - (*softmax).set_input_x(*xOp.get()).set_attr_axis(axisFormat(inputs[0], param->axis())); - - mNpuBackend->setOutputOps(mOp, {softmax}, outputs); - } + mNpuBackend->setOutputOps(mOp, {softmax}, outputs); return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUSoftmax.hpp b/source/backend/hiai/execution/NPUSoftmax.hpp index 02866c940..4ae6d0167 100644 --- a/source/backend/hiai/execution/NPUSoftmax.hpp +++ b/source/backend/hiai/execution/NPUSoftmax.hpp @@ -17,9 +17,6 @@ class NPUSoftmax : public NPUCommonExecution { NPUSoftmax(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); virtual ~NPUSoftmax() = default; -private: - hiai::op::Const mConstAxis; - hiai::op::Const mConstSub; }; } // namespace MNN diff --git a/source/backend/hiai/execution/NPUSqueeze.cpp b/source/backend/hiai/execution/NPUSqueeze.cpp index bc515b600..ea51215f8 100644 --- a/source/backend/hiai/execution/NPUSqueeze.cpp +++ b/source/backend/hiai/execution/NPUSqueeze.cpp @@ -18,28 +18,53 @@ NPUSqueeze::NPUSqueeze(Backend *b, const Op *op, const std::vector &in ErrorCode NPUSqueeze::onResize(const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); - auto opName = mOp->name()->str(); - auto shapeFormt = tensorShapeFormat(outputs[0]); - std::vector shapeDims (shapeFormt.begin(), shapeFormt.end()); - shapeConst = hiai::op::Const(opName + "_shape_const"); - { - ge::TensorDesc fdesc(ge::Shape({static_cast(shapeDims.size())}), - ge::FORMAT_NCHW, ge::DT_INT32); - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)shapeDims.data(), shapeDims.size() * sizeof(int32_t)); - - shapeConst.set_attr_value(filter); + auto param = mOp->main_as_SqueezeParam(); + auto axis = param->squeezeDims(); + vector ax; + if (axis != nullptr) { + for (int32_t i = 0; i < axis->size(); i++) { + ax.push_back(axis->Get(i)); + } + } else { + ax = {0}; } - - shared_ptr prob(new hiai::op::Reshape(opName)); - auto xOp = mNpuBackend->getInputOps(mOp); - - (*prob).set_input_x(*xOp.get()).set_input_shape(shapeConst); - - mNpuBackend->setOutputOps(mOp, {prob}, outputs); + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; + if (mOp->type() == OpType_Squeeze) { + shared_ptr squeeze(new hiai::op::Squeeze(opName)); + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*squeeze).set_input_x(*xOp.get()); + } else { + (*squeeze).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } + (*squeeze).set_attr_axis(ax); + mNpuBackend->setOutputOps(mOp, {squeeze}, outputs); + } else { + shapeConst = hiai::op::Const(opName + "_axis_const"); + if (ax.size() > 1) { + std::cout<<"unsqueeze axis only one element const, not "<< ax.size() << std::endl; + return NOT_SUPPORT; + } + vector axs = {static_cast(ax[0])}; + { + ge::TensorDesc fdesc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)axs.data(), sizeof(int32_t)); + shapeConst.set_attr_value(filter); + } + shared_ptr prob(new hiai::op::ExpandDims(opName)); + if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) { + (*prob).set_input_x(*xOp.get()); + } else { + (*prob).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex])); + } + (*prob).set_input_axis(shapeConst); + mNpuBackend->setOutputOps(mOp, {prob}, outputs); + } return NO_ERROR; } diff --git a/source/backend/hiai/execution/NPUStridedSlice.cpp b/source/backend/hiai/execution/NPUStridedSlice.cpp index 5db02a3d9..296c62026 100644 --- a/source/backend/hiai/execution/NPUStridedSlice.cpp +++ b/source/backend/hiai/execution/NPUStridedSlice.cpp @@ -15,48 +15,28 @@ namespace MNN { NPUStridedSlice::NPUStridedSlice(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs) : MNN::NPUCommonExecution(b,op) { - bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; - bool isConst2 = TensorUtils::getDescribe(inputs[2])->usage==Tensor::InsideDescribe::Usage::CONSTANT; - bool isConst3 = TensorUtils::getDescribe(inputs[3])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + isConst2 = TensorUtils::getDescribe(inputs[2])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + isConst3 = false; + isConst4 = false; auto opName = mOp->name()->str(); Tensor *begin = inputs[1]; Tensor *end = inputs[2]; - Tensor *strided = inputs[3]; - - if(isConst1 == true) { - auto beginShape = convertShapeConstValue(begin, 0); + if(isConst1) { mConst_b = hiai::op::Const(opName + "_b_const"); - { - ge::TensorDesc fdesc(ge::Shape({4}), ge::DT_INT32); - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)&beginShape[0], 4*sizeof(int32_t)); - mConst_b.set_attr_value(filter); - } + ge::TensorDesc fdesc(ge::Shape({begin->elementSize()}), ge::DT_INT32); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t*)begin->host(), begin->elementSize()*sizeof(int32_t)); + mConst_b.set_attr_value(filter); } - - if(isConst2 == true) { - auto endShape = convertShapeConstValue(end, 0); + if(isConst2) { mConst_e = hiai::op::Const(opName + "_e_const"); - { - ge::TensorDesc fdesc(ge::Shape({4}), ge::DT_INT32); - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)&endShape[0], 4*sizeof(int32_t)); - mConst_e.set_attr_value(filter); - } - } - - if(isConst3 == true) { - auto stridedShape = convertShapeConstValue(strided); - mConst_s = hiai::op::Const(opName + "_s_const"); - { - ge::TensorDesc fdesc(ge::Shape({4}), ge::DT_INT32); - ge::TensorPtr filter = std::make_shared(); - filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)&stridedShape[0], 4*sizeof(int32_t)); - mConst_s.set_attr_value(filter); - } + ge::TensorDesc fdesc(ge::Shape({end->elementSize()}), ge::DT_INT32); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)end->host(), end->elementSize()*sizeof(int32_t)); + mConst_e.set_attr_value(filter); } auto parameter = mOp->main_as_StridedSliceParam(); beginMask = convertMask(begin, parameter->beginMask(),1); @@ -70,27 +50,68 @@ ErrorCode NPUStridedSlice::onResize(const std::vector &inputs, const s mNpuBackend->setNetworkInput(inputs, mOp); auto opName = mOp->name()->str(); - auto param = mOp->main_as_Axis(); - - shared_ptr stride_slice(new hiai::op::StridedSlice(opName)); + Tensor *axis = nullptr; + Tensor *strides = nullptr; + if (inputs.size() > 3) { + axis = inputs[3]; + isConst3 = TensorUtils::getDescribe(inputs[3])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + } + if (inputs.size() > 4) { + strides = inputs[4]; + isConst4 = TensorUtils::getDescribe(inputs[4])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + } + if (isConst3) { + vector axisdims; + vector axisdims1; + for (int32_t i = 0; i < axis->elementSize(); i++) { + axisdims.push_back(i); + if (count(axisdims1.begin(), axisdims1.end(), axis->host()[i]) == 0) { + axisdims1.push_back(axis->host()[i]); + } + } + mConst_a = hiai::op::Const(opName + "_a_const"); + ge::TensorDesc fdesc(ge::Shape({axis->elementSize()}), ge::DT_INT32); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + if (axisdims1.size() != axisdims.size() || (axisdims.size() == 1 && axisdims1[0] == 1)) { + filter->SetData((uint8_t*)axisdims.data(), axis->elementSize()*sizeof(int32_t)); + } else { + filter->SetData((uint8_t*)axisdims1.data(), axis->elementSize()*sizeof(int32_t)); + } + mConst_a.set_attr_value(filter); + } + if (isConst4) { + mConst_s = hiai::op::Const(opName + "_s_const"); + ge::TensorDesc fdesc(ge::Shape({strides->elementSize()}), ge::DT_INT32); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t*)strides->host(), strides->elementSize()*sizeof(int32_t)); + mConst_s.set_attr_value(filter); + } else { + vector axisdims; + for (int32_t i = 0; i < axis->elementSize(); i++) { + axisdims.push_back(1); + } + mConst_s = hiai::op::Const(opName + "_s_const"); + ge::TensorDesc fdesc(ge::Shape({axis->elementSize()}), ge::DT_INT32); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t*)axisdims.data(), axis->elementSize()*sizeof(int32_t)); + mConst_s.set_attr_value(filter); + } + shared_ptr stride_slice(new hiai::op::StridedSliceV2(opName)); auto inputIndex = mOp->inputIndexes()->data()[0]; auto iops = mNpuBackend->mGrapMap[inputIndex]; // x auto xOp = iops.back().first; - - auto parameter = mOp->main_as_StridedSliceParam(); - (*stride_slice) .set_input_x(*xOp.get()) .set_input_begin(mConst_b) - .set_input_end(mConst_e) - .set_input_strides(mConst_s) - .set_attr_begin_mask(beginMask) - .set_attr_end_mask(endMask) - .set_attr_ellipsis_mask(ellipsisMask) - .set_attr_new_axis_mask(newAxisMask) - .set_attr_shrink_axis_mask(shrinkAxisMask); - + .set_input_end(mConst_e); + if (isConst3) { + (*stride_slice).set_input_axes(mConst_a); + } + (*stride_slice).set_input_strides(mConst_s); mNpuBackend->setOutputOps(mOp, {stride_slice}, outputs); return NO_ERROR; diff --git a/source/backend/hiai/execution/NPUStridedSlice.hpp b/source/backend/hiai/execution/NPUStridedSlice.hpp index 94fb400f3..cf2818980 100644 --- a/source/backend/hiai/execution/NPUStridedSlice.hpp +++ b/source/backend/hiai/execution/NPUStridedSlice.hpp @@ -22,10 +22,12 @@ class NPUStridedSlice : public NPUCommonExecution { private: hiai::op::Const mConst_b; hiai::op::Const mConst_e; + hiai::op::Const mConst_a; hiai::op::Const mConst_s; bool isConst1; bool isConst2; bool isConst3; + bool isConst4; int32_t beginMask; int32_t endMask; int32_t ellipsisMask; diff --git a/source/backend/hiai/execution/NPUTile.cpp b/source/backend/hiai/execution/NPUTile.cpp new file mode 100644 index 000000000..e54c37196 --- /dev/null +++ b/source/backend/hiai/execution/NPUTile.cpp @@ -0,0 +1,51 @@ +// +// NPUTile.cpp +// MNN +// +// Created by MNN on 2019/09/19. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#include "NPUTile.hpp" +#include "NPUBackend.hpp" + +using namespace std; + +namespace MNN { + +NPUTile::NPUTile(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs) : MNN::NPUCommonExecution(b,op) { +} + +ErrorCode NPUTile::onResize(const std::vector &inputs, const std::vector &outputs) { + bool isConst2 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT; + auto input = inputs[1]; + auto opName = mOp->name()->str(); + if (isConst2) { + mConst_m = hiai::op::Const(opName + "_mul_const"); + vector dims; + for (int32_t i = 0; i< input->buffer().dimensions; i++) { + dims.push_back(static_cast(input->buffer().dim[i].extent)); + } + ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_INT32); + ge::TensorPtr filter = std::make_shared(); + filter->SetTensorDesc(fdesc); + filter->SetData((uint8_t *)input->host(), input->elementSize() * sizeof(int32_t)); + mConst_m.set_attr_value(filter); + } + mNpuBackend->setNetworkInput(inputs, mOp); + shared_ptr tile(new hiai::op::Tile(opName)); + auto xOp = mNpuBackend->getInputOps(mOp); + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; + if (isConst2) { + (*tile).set_input_x(*xOp.get()).set_input_multiples(mConst_m); + mNpuBackend->setOutputOps(mOp, {tile}, outputs); + return NO_ERROR; + } + return NOT_SUPPORT; +} + +NPUCreatorRegister> __Tile_op(OpType_Tile); + +} // namespace MNN \ No newline at end of file diff --git a/source/backend/hiai/execution/NPUTile.hpp b/source/backend/hiai/execution/NPUTile.hpp new file mode 100644 index 000000000..c6546a6c1 --- /dev/null +++ b/source/backend/hiai/execution/NPUTile.hpp @@ -0,0 +1,27 @@ +// +// NPUTile.hpp +// MNN +// +// Created by MNN on 2019/09/19. +// Copyright © 2018, Alibaba Group Holding Limited +// + +#ifndef NPUDEMO_NPUTile_HPP +#define NPUDEMO_NPUTile_HPP + +#include "NPUCommonExecution.hpp" + +namespace MNN { + +class NPUTile : public NPUCommonExecution { +public: + NPUTile(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs); + ErrorCode onResize(const std::vector &inputs, const std::vector &outputs); + virtual ~NPUTile() = default; +private: + hiai::op::Const mConst_m; +}; + +} // namespace MNN + +#endif // NPUDEMO_NPUTile_HPP diff --git a/source/backend/hiai/execution/NPUTopKV2.cpp b/source/backend/hiai/execution/NPUTopKV2.cpp index fc3a43968..01b5f9c26 100644 --- a/source/backend/hiai/execution/NPUTopKV2.cpp +++ b/source/backend/hiai/execution/NPUTopKV2.cpp @@ -22,19 +22,17 @@ ErrorCode NPUTopKV2::onResize(const std::vector &inputs, const std::ve auto param = mOp->main_as_Axis(); shared_ptr prob(new hiai::op::TopK(opName)); - + auto xOp = mNpuBackend->getInputOps(mOp); auto inputIndex = mOp->inputIndexes()->data()[0]; auto iops = mNpuBackend->mGrapMap[inputIndex]; // x - auto xOp = iops.back().first; + xOp = iops.back().first; mConst_w = hiai::op::Const(opName + "_w_const"); { - ge::TensorDesc fdesc(ge::Shape({1, 1, 1, 1}), ge::FORMAT_NCHW, - ge::DT_FLOAT); // in o h w ? + ge::TensorDesc fdesc(ge::Shape({1}), ge::FORMAT_NCHW, ge::DT_INT32); ge::TensorPtr filter = std::make_shared(); filter->SetTensorDesc(fdesc); - filter->SetData((uint8_t *)inputs[1]->host(), sizeof(float)); - + filter->SetData((uint8_t *)inputs[1]->host(), sizeof(int32_t)); mConst_w.set_attr_value(filter); } diff --git a/source/backend/hiai/execution/NPUTranspose.cpp b/source/backend/hiai/execution/NPUTranspose.cpp index 24480d0e7..78653e6ef 100644 --- a/source/backend/hiai/execution/NPUTranspose.cpp +++ b/source/backend/hiai/execution/NPUTranspose.cpp @@ -16,24 +16,8 @@ namespace MNN { NPUTranspose::NPUTranspose(Backend *b, const Op *op, const std::vector &inputs, const std::vector &outputs) : MNN::NPUCommonExecution(b,op) { const Tensor* perm = inputs[1]; - for (int i = 0; i < perm->buffer().dim[0].extent; i++) { - permutation.push_back(axisFormat(inputs[0], perm->host()[i])); - } - auto dimSize = inputs[0]->buffer().dimensions; - if(dimSize == 3) { - permutation.insert(permutation.begin(),0); - } else if (dimSize == 2) { - permutation.insert(permutation.begin(),0); - permutation.push_back(3); - } else if (dimSize == 1) { - permutation.insert(permutation.begin(),0); - permutation.push_back(2); - permutation.push_back(3); - } - if(TensorUtils::getDescribe(inputs[0])->dimensionFormat == MNN::MNN_DATA_FORMAT_NHWC) - { - std::vector tmp = permutation; - permutation = {tmp[0],tmp[3],tmp[1],tmp[2]}; + for (int i = 0; i < perm->elementSize(); i++) { + permutation.push_back(perm->host()[i]); } } @@ -50,9 +34,9 @@ ErrorCode NPUTranspose::onResize(const std::vector &inputs, const std: mNpuBackend->setNetworkInput(inputs, mOp); auto opName = mOp->name()->str(); - auto xOp = mNpuBackend->getInputOps(mOp); - std::vector shapeDims (tensorShapeFormat(outputs[0]).begin(), tensorShapeFormat(outputs[0]).end()); + + std::vector shapeDims = outputs[0]->shape(); shapeConst = hiai::op::Const(opName + "_shape_const"); { ge::TensorDesc fdesc(ge::Shape({static_cast(shapeDims.size())}), @@ -60,11 +44,10 @@ ErrorCode NPUTranspose::onResize(const std::vector &inputs, const std: ge::TensorPtr filter = std::make_shared(); filter->SetTensorDesc(fdesc); filter->SetData((uint8_t *)shapeDims.data(), shapeDims.size() * sizeof(int32_t)); - shapeConst.set_attr_value(filter); } - - MNN_ASSERT((permutation.size()==4)); + + MNN_ASSERT((permutation.size() == 4)); if(isPermNoChange(permutation)) { shared_ptr reshape(new hiai::op::Reshape(opName)); diff --git a/source/backend/hiai/execution/NPUUnary.cpp b/source/backend/hiai/execution/NPUUnary.cpp index a665ee984..9c7a6934a 100644 --- a/source/backend/hiai/execution/NPUUnary.cpp +++ b/source/backend/hiai/execution/NPUUnary.cpp @@ -18,31 +18,54 @@ NPUUnary::NPUUnary(MNN::Backend *b, const MNN::Op *op, const std::vector &inputs, const std::vector &outputs) { mNpuBackend->setNetworkInput(inputs, mOp); auto opName = mOp->name()->str(); - auto xOp = mNpuBackend->getInputOps(mOp); - - shared_ptr xOp2; - + auto inputIndex = mOp->inputIndexes()->data()[0]; + auto iops = mNpuBackend->mGrapMap[inputIndex]; // x + xOp = iops.back().first; auto unary_type = mOp->main_as_UnaryOp()->opType(); - if(unary_type == UnaryOpOperation_EXP){ + if (unary_type == UnaryOpOperation_EXP){ shared_ptr unary(new hiai::op::Exp(opName)); (*unary).set_input_x(*xOp.get()); mNpuBackend->setOutputOps(mOp, {unary}, outputs); - }else if(unary_type == UnaryOpOperation_NEG){ + } else if (unary_type == UnaryOpOperation_NEG){ shared_ptr unary(new hiai::op::Neg(opName)); (*unary).set_input_x(*xOp.get()); mNpuBackend->setOutputOps(mOp, {unary}, outputs); - }else if(unary_type == UnaryOpOperation_ABS){ - shared_ptr unary(new hiai::op::Activation(opName)); + } else if (unary_type == UnaryOpOperation_ABS){ + shared_ptr unary(new hiai::op::Activation(opName+ "_abs")); (*unary).set_input_x(*xOp.get()) .set_attr_mode(6); mNpuBackend->setOutputOps(mOp, {unary}, outputs); - }else if(unary_type == UnaryOpOperation_SQRT){ + } else if (unary_type == UnaryOpOperation_SQRT){ shared_ptr unary(new hiai::op::Sqrt(opName)); (*unary).set_input_x(*xOp.get()); mNpuBackend->setOutputOps(mOp, {unary}, outputs); - }else{ + } else if (unary_type == UnaryOpOperation_HARDSWISH){ + shared_ptr unary(new hiai::op::HardSwish(opName)); + (*unary).set_input_x(*xOp.get()); + mNpuBackend->setOutputOps(mOp, {unary}, outputs); + } else if (unary_type == UnaryOpOperation_RSQRT){ + shared_ptr unary(new hiai::op::Rsqrt(opName)); + (*unary).set_input_x(*xOp.get()); + mNpuBackend->setOutputOps(mOp, {unary}, outputs); + } else if (unary_type == UnaryOpOperation_SQUARE){ + shared_ptr unary(new hiai::op::Square(opName)); + (*unary).set_input_x(*xOp.get()); + mNpuBackend->setOutputOps(mOp, {unary}, outputs); + } else if (unary_type == UnaryOpOperation_LOG){ + shared_ptr unary(new hiai::op::Log(opName)); + (*unary).set_input_x(*xOp.get()); + mNpuBackend->setOutputOps(mOp, {unary}, outputs); + } else if (unary_type == UnaryOpOperation_GELU || unary_type == UnaryOpOperation_GELU_STANDARD){ + shared_ptr unary(new hiai::op::Activation(opName+ "_gelu")); + (*unary).set_input_x(*xOp.get()).set_attr_mode(15); + mNpuBackend->setOutputOps(mOp, {unary}, outputs); + } else if (unary_type == UnaryOpOperation_ERF){ + shared_ptr unary(new hiai::op::Erf(opName)); + (*unary).set_input_x(*xOp.get()); + mNpuBackend->setOutputOps(mOp, {unary}, outputs); + } else { MNN_ERROR("unary not support this case : %d \n", unary_type); } return NO_ERROR;