MNN_NPU IR op modify

alibaba · Feb 20, 2024 · 24841e7 · 24841e7
1 parent 784017d
commit 24841e7
Show file tree

Hide file tree

Showing 60 changed files with 1,913 additions and 838 deletions.
diff --git a/source/backend/hiai/backend/NPUBackend.cpp b/source/backend/hiai/backend/NPUBackend.cpp
@@ -231,8 +231,20 @@ namespace MNN {
  if (isInput && mGrapMap.find(inputIndex) == mGrapMap.end()) {
  auto opName = string("input") + to_string(inputIndex);
  shared_ptr<hiai::op::Data> data(new hiai::op::Data(opName));
- auto shape = tensorShapeFormat(inputTensor);
- ge::TensorDesc desc(ge::Shape(shape), ge::FORMAT_NCHW, ge::DT_FLOAT);
+ vector<int64_t> dims;
+ for(int32_t i = 0; i < inputTensor->buffer().dimensions; i++) {
+ dims.push_back(inputTensor->buffer().dim[i].extent);
+ }
+ ge::TensorDesc desc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT);
+ if (TensorUtils::getDescribe(inputTensor)->dimensionFormat == MNN_DATA_FORMAT::MNN_DATA_FORMAT_NHWC) {
+ desc.SetFormat(ge::FORMAT_NHWC);
+ }
+ if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) {
+ desc.SetDataType(ge::DT_INT32);
+ }
+ if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) {
+ desc.SetDataType(ge::DT_INT64);
+ }
  data->update_input_desc_x(desc);
  // map
  vector<pair<shared_ptr<ge::Operator>, string>> ops;
@@ -248,19 +260,25 @@ namespace MNN {
  shared_ptr<hiai::op::Const> mConst(new hiai::op::Const(opName));
  {
  ge::TensorPtr filter = std::make_shared<ge::Tensor>();
- auto shape = tensorShapeFormat(inputTensor);
- ge::TensorDesc fdesc(ge::Shape(shape), ge::FORMAT_NCHW, ge::DT_FLOAT);
- filter->SetTensorDesc(fdesc);
- if (TensorUtils::getDescribe(inputTensor)->dimensionFormat == MNN::MNN_DATA_FORMAT_NCHW) {
- filter->SetData((uint8_t *)inputTensor->host<float>(), inputTensor->elementSize() * sizeof(float));
- mConst->set_attr_value(filter);
- } else {
- vector<float> temp(inputTensor->elementSize(), 0);
- NHWC2NCHW((float*)inputTensor->host<float>(), (float*)temp.data(), shape[0], shape[1], shape[2]*shape[3]);
- filter->SetData((uint8_t *)temp.data(), temp.size() * sizeof(float));
- mConst->set_attr_value(filter);
+ vector<int64_t> dims;
+ for(int32_t i = 0; i < inputTensor->buffer().dimensions; i++) {
+ dims.push_back(inputTensor->buffer().dim[i].extent);
+ }
+ ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT);
+ if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) {
+ fdesc.SetDataType(ge::DT_INT32);
+ }
+ if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) {
+ fdesc.SetDataType(ge::DT_INT64);
  }
+ filter->SetTensorDesc(fdesc);
  filter->SetData((uint8_t *)inputTensor->host<float>(), inputTensor->elementSize() * sizeof(float));
+ if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) {
+ filter->SetData((uint8_t *)inputTensor->host<int32_t>(), inputTensor->elementSize() * sizeof(int32_t));
+ }
+ if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) {
+ filter->SetData((uint8_t *)inputTensor->host<int64_t>(), inputTensor->elementSize() * sizeof(int64_t));
+ }
  mConst->set_attr_value(filter);
  }
  vector<pair<shared_ptr<ge::Operator>, string>> ops;
@@ -339,14 +357,7 @@ namespace MNN {
  auto index = mInputMap.find((unsigned long)(const_cast<Tensor*>(dstTensor)));
  MNN_ASSERT(index != mInputMap.end());
  shared_ptr<hiai::INDTensorBuffer> input = inputTensors[index->second];
- if(TensorUtils::getDescribe(srcTensor)->dimensionFormat == MNN_DATA_FORMAT_NCHW 
- ||TensorUtils::getDescribe(srcTensor)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4 ) {
- memcpy(input->GetData(), srcTensor->host<float>(), (size_t)input->GetSize());
- } else {
- shared_ptr<Tensor> tmpTensor(new Tensor(dstTensor, Tensor::DimensionType::CAFFE, true));
- tensorConvert(srcTensor, tmpTensor.get());
- memcpy(input->GetData(), tmpTensor->host<float>(), (size_t)tmpTensor->size());
- }
+ memcpy(input->GetData(), srcTensor->host<void>(), (size_t)input->GetSize());
  } else if(isOutputCopy){
  int index;
  bool flag = false;
@@ -361,18 +372,8 @@ namespace MNN {
  return;
  }
  shared_ptr<hiai::INDTensorBuffer> output = outputTensors[index];
- if(TensorUtils::getDescribe(dstTensor)->dimensionFormat == MNN_DATA_FORMAT_NCHW 
- ||TensorUtils::getDescribe(dstTensor)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4 ) {
- memcpy(dstTensor->buffer().host, output->GetData(), (size_t)output->GetSize());
- } else {
- auto tmpShape = tensorShapeFormat(srcTensor);
- vector<int> srcShape = {(int)tmpShape[0],(int)tmpShape[1],(int)tmpShape[2],(int)tmpShape[3]};
- shared_ptr<Tensor> tmpTensor(Tensor::create(srcShape,halide_type_of<float>(),
- (void*)(output->GetData()), 
- Tensor::DimensionType::CAFFE));
- auto shape = output->GetTensorDesc(); 
- tensorConvert(tmpTensor.get(), dstTensor);
- }
+ Tensor* tmpTensor = const_cast<Tensor*>(dstTensor);
+ memcpy(tmpTensor->buffer().host, output->GetData(), (size_t)output->GetSize());
  }
 #ifdef HIAI_DEBUG
  ATrace_endSection();
@@ -420,7 +421,7 @@ namespace MNN {
  model->SetGraph(graph);
 
  hiai::ModelBuildOptions buildOptions;
-
+ buildOptions.formatMode = hiai::FormatMode::USE_ORIGIN;
  std::ifstream file("quant_param", std::ios::binary | std::ios::ate);
  if (!file.is_open()) {
  MNN_PRINT("no quant_param config file, build non-quantized model.\n");
@@ -507,7 +508,7 @@ namespace MNN {
 
  void NPUBackend::setOutputOps(const Op *op, vector<shared_ptr<ge::Operator>>&& HIAI_op,
  const std::vector<Tensor *> &outputs){
- if(op->type() == OpType_Slice){
+ if(op->type() == OpType_Slice || op->type() == OpType_TopKV2){
  for (size_t i = 0; i < op->outputIndexes()->size(); i++){
  auto index = op->outputIndexes()->data()[i];
  mSclipMap[index] = i;

diff --git a/source/backend/hiai/backend/NPUBackend.hpp b/source/backend/hiai/backend/NPUBackend.hpp
@@ -43,6 +43,41 @@ namespace MNN {
  typedef void *(*fp_ATrace_endSection) (void);
 #endif
  void NHWC2NCHW(const float* source, float* dest, int b, int c, int area);
+
+ static ge::DataType mapDataType(DataType src) {
+ ge::DataType retVal = ge::DataType::DT_UNDEFINED;
+ switch (src) {
+ case DataType_DT_FLOAT:
+ retVal = ge::DataType::DT_FLOAT;
+ break;
+ case DataType_DT_DOUBLE:
+ retVal = ge::DataType::DT_DOUBLE;
+ break;
+ case DataType_DT_INT32:
+ retVal = ge::DataType::DT_INT32;
+ break;
+ case DataType_DT_UINT8:
+ retVal = ge::DataType::DT_UINT8;
+ break;
+ case DataType_DT_INT16:
+ retVal = ge::DataType::DT_INT16;
+ break;
+ case DataType_DT_INT8:
+ retVal = ge::DataType::DT_INT8;
+ break;
+ case DataType_DT_INT64:
+ retVal = ge::DataType::DT_INT64;
+ break;
+ case DataType_DT_VARIANT:
+ retVal = ge::DataType::DT_FLOAT;
+ break;
+ default:
+ MNN_ASSERT(false);
+ printf("cast Datatype : %d \n", src);
+ break;
+ }
+ return retVal;
+ }
  inline std::vector<int64_t> tensorShapeFormat(const Tensor *input, const Tensor *broadCastInput=nullptr) {
  auto dimSize = input->buffer().dimensions;
  if(broadCastInput != nullptr) {

diff --git a/source/backend/hiai/execution/NPUActivation.cpp b/source/backend/hiai/execution/NPUActivation.cpp
@@ -21,39 +21,84 @@ NPUActivation::NPUActivation(Backend *b, const Op *op, const std::vector<Tensor
 ErrorCode NPUActivation::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
  mNpuBackend->setNetworkInput(inputs, mOp);
  auto opName = mOp->name()->str();
-
-
-
  auto xOp = mNpuBackend->getInputOps(mOp);
-
- if(mType == 5){
- shared_ptr<hiai::op::PRelu> prelu(new hiai::op::PRelu(opName + "_prelu"));
- auto slopePtr = mOp->main_as_PRelu()->slope()->data();
- auto slopeSize = mOp->main_as_PRelu()->slope()->size();
-
- mConst_w = hiai::op::Const(opName + "_w_const");
- {
- ge::TensorDesc fdesc(ge::Shape({1, slopeSize, 1, 1}), ge::FORMAT_NCHW,
- ge::DT_FLOAT); // in o h w ?
+ auto inputIndex = mOp->inputIndexes()->data()[0];
+ auto iops = mNpuBackend->mGrapMap[inputIndex];
+ xOp = iops.back().first;
+ if (mType == OpType_PReLU && mOp->main_as_PRelu()->slope() != nullptr) {
+ if (mOp->main_as_PRelu()->slope()->size() == 1) {
+ const float* slopePtr = mOp->main_as_PRelu()->slope()->data();
+ shared_ptr<hiai::op::Activation> relu(new hiai::op::Activation(opName + "_relu"));
+ if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
+ (*relu).set_input_x(*xOp.get());
+ } else {
+ (*relu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
+ }
+ (*relu)
+ .set_attr_coef(.000000)
+ .set_attr_negative_slope(*slopePtr)
+ .set_attr_mode(mType);
+ mNpuBackend->setOutputOps(mOp, {relu}, outputs); 
+ } else {
+ shared_ptr<hiai::op::PRelu> prelu(new hiai::op::PRelu(opName + "_prelu"));
+ auto slopePtr = mOp->main_as_PRelu()->slope()->data();
+ auto slopeSize = mOp->main_as_PRelu()->slope()->size();
+ mConst_w = hiai::op::Const(opName + "_w_const");
+ ge::TensorDesc fdesc(ge::Shape({1, slopeSize, 1, 1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
  ge::TensorPtr filter = std::make_shared<ge::Tensor>();
  filter->SetTensorDesc(fdesc);
  filter->SetData((uint8_t *)slopePtr, slopeSize * sizeof(float));
  mConst_w.set_attr_value(filter);
+ if (inputs[0]->buffer().dimensions < 4) {
+ std::vector<int32_t> shape;
+ for (int32_t i = 0; i < inputs[0]->buffer().dimensions; i++) {
+ shape.push_back(inputs[0]->buffer().dim[i].extent);
+ }
+ for (int32_t i = inputs[0]->buffer().dimensions; i < 4; i++) {
+ shape.push_back(1);
+ }
+ shapeConst = hiai::op::Const(opName +"_reshapeConst");
+ {
+ ge::TensorDesc fdesc(ge::Shape({static_cast<int64_t>(shape.size())}), ge::FORMAT_NCHW, ge::DT_INT32);
+ ge::TensorPtr filter = std::make_shared<ge::Tensor>();
+ filter->SetTensorDesc(fdesc);
+ filter->SetData((uint8_t *)shape.data(), shape.size() * sizeof(int32_t));
+ shapeConst.set_attr_value(filter);
+ }
+ shared_ptr<hiai::op::Reshape> reshape(new hiai::op::Reshape(opName + "_reshape"));
+ if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
+ (*reshape).set_input_x(*xOp.get());
+ } else {
+ (*reshape).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
+ }
+ (*reshape).set_input_shape(shapeConst);
+ (*prelu).set_input_x(*reshape.get()).set_input_weight(mConst_w);
+ mNpuBackend->setOutputOps(mOp, {reshape, prelu}, outputs);
+ } else {
+ if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
+ (*prelu).set_input_x(*xOp.get());
+ } else {
+ (*prelu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
+ }
+ (*prelu).set_input_weight(mConst_w);
+ mNpuBackend->setOutputOps(mOp, {prelu}, outputs);
+ }
  }
-
- (*prelu)
- .set_input_x(*xOp.get()).set_input_weight(mConst_w);
- mNpuBackend->setOutputOps(mOp, {prelu}, outputs);
  }else{
  float slope = 0.0;
  if (mOp->type() == OpType_ReLU) {
  slope = mOp->main_as_Relu()->slope();
- mType = 5;
+ if (slope != 0.0) {
+ mType = 5;
+ }
  }
-
  shared_ptr<hiai::op::Activation> relu(new hiai::op::Activation(opName + "_relu"));
+ if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
+ (*relu).set_input_x(*xOp.get());
+ } else {
+ (*relu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
+ }
  (*relu)
- .set_input_x(*xOp.get())
  .set_attr_coef(.000000)
  .set_attr_negative_slope(slope)
  .set_attr_mode(mType);

diff --git a/source/backend/hiai/execution/NPUActivation.hpp b/source/backend/hiai/execution/NPUActivation.hpp
@@ -20,6 +20,7 @@ class NPUActivation : public NPUCommonExecution {
  virtual ~NPUActivation() = default;
 private:
  hiai::op::Const mConst_w;
+ hiai::op::Const shapeConst;
  int mType;
 };
 

diff --git a/source/backend/hiai/execution/NPUArgMax.cpp b/source/backend/hiai/execution/NPUArgMax.cpp
@@ -28,7 +28,7 @@ ErrorCode NPUArgMax::onResize(const std::vector<Tensor *> &inputs, const std::ve
  // om input weight const op
  mConst_axis = hiai::op::Const(opName + "_w_const");
  {
- auto aixs = axisFormat(inputs[0], argMaxParam->axis());
+ auto aixs = argMaxParam->axis();
  ge::TensorDesc fdesc(ge::Shape({1}),ge::DT_INT32); 
  ge::TensorPtr axis = std::make_shared<ge::Tensor>();
  axis->SetTensorDesc(fdesc);

diff --git a/source/backend/hiai/execution/NPUBatchMatMul.cpp b/source/backend/hiai/execution/NPUBatchMatMul.cpp
@@ -0,0 +1,93 @@
+//
+// NPUBatchMatMul.cpp
+// MNN
+//
+// Created by MNN on b'2020/10/15'.
+// Copyright © 2018, Alibaba Group Holding Limited
+//
+
+#include "NPUBatchMatMul.hpp"
+#include "NPUBackend.hpp"
+
+using namespace std;
+
+namespace MNN {
+
+NPUBatchMatMul::NPUBatchMatMul(MNN::Backend *b, const MNN::Op *op, const std::vector<Tensor *> &inputs, const std::vector<MNN::Tensor *> &outputs) : NPUCommonExecution(b, op) {
+ auto opName = mOp->name()->str();
+
+ bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
+ bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
+
+ Tensor* input = nullptr;
+ if (isConst0 && !isConst1){
+ input = inputs[0];
+ }
+ if (!isConst0 && isConst1){
+ input = inputs[1];
+ }
+ if (input != nullptr) {
+ mConst = ge::op::Const(opName + "_w_const");
+ ge::TensorPtr filter = std::make_shared<ge::Tensor>();
+ vector<int64_t> dims;
+ for (int32_t i = 0; i < input->buffer().dimensions; i++) {
+ dims.push_back(input->buffer().dim[i].extent);
+ }
+ ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT);
+ if (input->getType().code == halide_type_int && input->getType().bits == 32) {
+ fdesc.SetDataType(ge::DT_INT32);
+ filter->SetData((uint8_t *)input->host<int32_t>(), input->elementSize() * sizeof(int32_t));
+ } else {
+ filter->SetData((uint8_t *)input->host<float>(), input->elementSize() * sizeof(float));
+ }
+ filter->SetTensorDesc(fdesc);
+ mConst.set_attr_value(filter);
+ }
+
+}
+
+ErrorCode NPUBatchMatMul::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
+ mNpuBackend->setNetworkInput(inputs, mOp);
+ auto opName = mOp->name()->str();
+ bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
+ bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
+ auto param = mOp->main_as_BatchMatMulParam();
+ shared_ptr<hiai::op::BatchMatMul> batchMatMul(new hiai::op::BatchMatMul(opName));
+ if (isConst0 && !isConst1) {
+ auto inputIndex1 = mOp->inputIndexes()->data()[1];
+ auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; 
+ auto xOp1 = iops1.back().first;
+ (*batchMatMul)
+ .set_input_x1(mConst)
+ .set_input_x2(*xOp1.get())
+ .set_attr_adj_x1(param->adjX()) 
+ .set_attr_adj_x2(param->adjY());
+ } else if (!isConst0 && isConst1) {
+ auto inputIndex = mOp->inputIndexes()->data()[0];
+ auto iops = mNpuBackend->mGrapMap[inputIndex]; 
+ auto xOp = iops.back().first;
+ (*batchMatMul)
+ .set_input_x1(*xOp.get())
+ .set_input_x2(mConst)
+ .set_attr_adj_x1(param->adjX()) 
+ .set_attr_adj_x2(param->adjY());
+ } else {
+ auto inputIndex = mOp->inputIndexes()->data()[0];
+ auto iops = mNpuBackend->mGrapMap[inputIndex]; 
+ auto xOp = iops.back().first;
+ auto inputIndex1 = mOp->inputIndexes()->data()[1];
+ auto iops1 = mNpuBackend->mGrapMap[inputIndex1]; 
+ auto xOp1 = iops1.back().first;
+ (*batchMatMul)
+ .set_input_x1(*xOp.get())
+ .set_input_x2(*xOp1.get())
+ .set_attr_adj_x1(param->adjX()) 
+ .set_attr_adj_x2(param->adjY());
+ }
+ mNpuBackend->setOutputOps(mOp, {batchMatMul}, outputs);
+ return NO_ERROR;
+}
+
+NPUCreatorRegister<TypedCreator<NPUBatchMatMul>> __BatchMatMul_op(OpType_BatchMatMul);
+
+} // namespace MNN