Skip to content

Commit

Permalink
MNN_NPU IR op modify
Browse files Browse the repository at this point in the history
  • Loading branch information
946166920 committed Feb 20, 2024
1 parent 784017d commit 24841e7
Show file tree
Hide file tree
Showing 60 changed files with 1,913 additions and 838 deletions.
71 changes: 36 additions & 35 deletions source/backend/hiai/backend/NPUBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,20 @@ namespace MNN {
if (isInput && mGrapMap.find(inputIndex) == mGrapMap.end()) {
auto opName = string("input") + to_string(inputIndex);
shared_ptr<hiai::op::Data> data(new hiai::op::Data(opName));
auto shape = tensorShapeFormat(inputTensor);
ge::TensorDesc desc(ge::Shape(shape), ge::FORMAT_NCHW, ge::DT_FLOAT);
vector<int64_t> dims;
for(int32_t i = 0; i < inputTensor->buffer().dimensions; i++) {
dims.push_back(inputTensor->buffer().dim[i].extent);
}
ge::TensorDesc desc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT);
if (TensorUtils::getDescribe(inputTensor)->dimensionFormat == MNN_DATA_FORMAT::MNN_DATA_FORMAT_NHWC) {
desc.SetFormat(ge::FORMAT_NHWC);
}
if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) {
desc.SetDataType(ge::DT_INT32);
}
if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) {
desc.SetDataType(ge::DT_INT64);
}
data->update_input_desc_x(desc);
// map
vector<pair<shared_ptr<ge::Operator>, string>> ops;
Expand All @@ -248,19 +260,25 @@ namespace MNN {
shared_ptr<hiai::op::Const> mConst(new hiai::op::Const(opName));
{
ge::TensorPtr filter = std::make_shared<ge::Tensor>();
auto shape = tensorShapeFormat(inputTensor);
ge::TensorDesc fdesc(ge::Shape(shape), ge::FORMAT_NCHW, ge::DT_FLOAT);
filter->SetTensorDesc(fdesc);
if (TensorUtils::getDescribe(inputTensor)->dimensionFormat == MNN::MNN_DATA_FORMAT_NCHW) {
filter->SetData((uint8_t *)inputTensor->host<float>(), inputTensor->elementSize() * sizeof(float));
mConst->set_attr_value(filter);
} else {
vector<float> temp(inputTensor->elementSize(), 0);
NHWC2NCHW((float*)inputTensor->host<float>(), (float*)temp.data(), shape[0], shape[1], shape[2]*shape[3]);
filter->SetData((uint8_t *)temp.data(), temp.size() * sizeof(float));
mConst->set_attr_value(filter);
vector<int64_t> dims;
for(int32_t i = 0; i < inputTensor->buffer().dimensions; i++) {
dims.push_back(inputTensor->buffer().dim[i].extent);
}
ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT);
if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) {
fdesc.SetDataType(ge::DT_INT32);
}
if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) {
fdesc.SetDataType(ge::DT_INT64);
}
filter->SetTensorDesc(fdesc);
filter->SetData((uint8_t *)inputTensor->host<float>(), inputTensor->elementSize() * sizeof(float));
if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 32) {
filter->SetData((uint8_t *)inputTensor->host<int32_t>(), inputTensor->elementSize() * sizeof(int32_t));
}
if (inputTensor->getType().code == halide_type_int && inputTensor->getType().bits == 64) {
filter->SetData((uint8_t *)inputTensor->host<int64_t>(), inputTensor->elementSize() * sizeof(int64_t));
}
mConst->set_attr_value(filter);
}
vector<pair<shared_ptr<ge::Operator>, string>> ops;
Expand Down Expand Up @@ -339,14 +357,7 @@ namespace MNN {
auto index = mInputMap.find((unsigned long)(const_cast<Tensor*>(dstTensor)));
MNN_ASSERT(index != mInputMap.end());
shared_ptr<hiai::INDTensorBuffer> input = inputTensors[index->second];
if(TensorUtils::getDescribe(srcTensor)->dimensionFormat == MNN_DATA_FORMAT_NCHW
||TensorUtils::getDescribe(srcTensor)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4 ) {
memcpy(input->GetData(), srcTensor->host<float>(), (size_t)input->GetSize());
} else {
shared_ptr<Tensor> tmpTensor(new Tensor(dstTensor, Tensor::DimensionType::CAFFE, true));
tensorConvert(srcTensor, tmpTensor.get());
memcpy(input->GetData(), tmpTensor->host<float>(), (size_t)tmpTensor->size());
}
memcpy(input->GetData(), srcTensor->host<void>(), (size_t)input->GetSize());
} else if(isOutputCopy){
int index;
bool flag = false;
Expand All @@ -361,18 +372,8 @@ namespace MNN {
return;
}
shared_ptr<hiai::INDTensorBuffer> output = outputTensors[index];
if(TensorUtils::getDescribe(dstTensor)->dimensionFormat == MNN_DATA_FORMAT_NCHW
||TensorUtils::getDescribe(dstTensor)->dimensionFormat == MNN_DATA_FORMAT_NC4HW4 ) {
memcpy(dstTensor->buffer().host, output->GetData(), (size_t)output->GetSize());
} else {
auto tmpShape = tensorShapeFormat(srcTensor);
vector<int> srcShape = {(int)tmpShape[0],(int)tmpShape[1],(int)tmpShape[2],(int)tmpShape[3]};
shared_ptr<Tensor> tmpTensor(Tensor::create(srcShape,halide_type_of<float>(),
(void*)(output->GetData()),
Tensor::DimensionType::CAFFE));
auto shape = output->GetTensorDesc();
tensorConvert(tmpTensor.get(), dstTensor);
}
Tensor* tmpTensor = const_cast<Tensor*>(dstTensor);
memcpy(tmpTensor->buffer().host, output->GetData(), (size_t)output->GetSize());
}
#ifdef HIAI_DEBUG
ATrace_endSection();
Expand Down Expand Up @@ -420,7 +421,7 @@ namespace MNN {
model->SetGraph(graph);

hiai::ModelBuildOptions buildOptions;

buildOptions.formatMode = hiai::FormatMode::USE_ORIGIN;
std::ifstream file("quant_param", std::ios::binary | std::ios::ate);
if (!file.is_open()) {
MNN_PRINT("no quant_param config file, build non-quantized model.\n");
Expand Down Expand Up @@ -507,7 +508,7 @@ namespace MNN {

void NPUBackend::setOutputOps(const Op *op, vector<shared_ptr<ge::Operator>>&& HIAI_op,
const std::vector<Tensor *> &outputs){
if(op->type() == OpType_Slice){
if(op->type() == OpType_Slice || op->type() == OpType_TopKV2){
for (size_t i = 0; i < op->outputIndexes()->size(); i++){
auto index = op->outputIndexes()->data()[i];
mSclipMap[index] = i;
Expand Down
35 changes: 35 additions & 0 deletions source/backend/hiai/backend/NPUBackend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,41 @@ namespace MNN {
typedef void *(*fp_ATrace_endSection) (void);
#endif
void NHWC2NCHW(const float* source, float* dest, int b, int c, int area);

static ge::DataType mapDataType(DataType src) {
ge::DataType retVal = ge::DataType::DT_UNDEFINED;
switch (src) {
case DataType_DT_FLOAT:
retVal = ge::DataType::DT_FLOAT;
break;
case DataType_DT_DOUBLE:
retVal = ge::DataType::DT_DOUBLE;
break;
case DataType_DT_INT32:
retVal = ge::DataType::DT_INT32;
break;
case DataType_DT_UINT8:
retVal = ge::DataType::DT_UINT8;
break;
case DataType_DT_INT16:
retVal = ge::DataType::DT_INT16;
break;
case DataType_DT_INT8:
retVal = ge::DataType::DT_INT8;
break;
case DataType_DT_INT64:
retVal = ge::DataType::DT_INT64;
break;
case DataType_DT_VARIANT:
retVal = ge::DataType::DT_FLOAT;
break;
default:
MNN_ASSERT(false);
printf("cast Datatype : %d \n", src);
break;
}
return retVal;
}
inline std::vector<int64_t> tensorShapeFormat(const Tensor *input, const Tensor *broadCastInput=nullptr) {
auto dimSize = input->buffer().dimensions;
if(broadCastInput != nullptr) {
Expand Down
85 changes: 65 additions & 20 deletions source/backend/hiai/execution/NPUActivation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,39 +21,84 @@ NPUActivation::NPUActivation(Backend *b, const Op *op, const std::vector<Tensor
ErrorCode NPUActivation::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
mNpuBackend->setNetworkInput(inputs, mOp);
auto opName = mOp->name()->str();



auto xOp = mNpuBackend->getInputOps(mOp);

if(mType == 5){
shared_ptr<hiai::op::PRelu> prelu(new hiai::op::PRelu(opName + "_prelu"));
auto slopePtr = mOp->main_as_PRelu()->slope()->data();
auto slopeSize = mOp->main_as_PRelu()->slope()->size();

mConst_w = hiai::op::Const(opName + "_w_const");
{
ge::TensorDesc fdesc(ge::Shape({1, slopeSize, 1, 1}), ge::FORMAT_NCHW,
ge::DT_FLOAT); // in o h w ?
auto inputIndex = mOp->inputIndexes()->data()[0];
auto iops = mNpuBackend->mGrapMap[inputIndex];
xOp = iops.back().first;
if (mType == OpType_PReLU && mOp->main_as_PRelu()->slope() != nullptr) {
if (mOp->main_as_PRelu()->slope()->size() == 1) {
const float* slopePtr = mOp->main_as_PRelu()->slope()->data();
shared_ptr<hiai::op::Activation> relu(new hiai::op::Activation(opName + "_relu"));
if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
(*relu).set_input_x(*xOp.get());
} else {
(*relu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
}
(*relu)
.set_attr_coef(.000000)
.set_attr_negative_slope(*slopePtr)
.set_attr_mode(mType);
mNpuBackend->setOutputOps(mOp, {relu}, outputs);
} else {
shared_ptr<hiai::op::PRelu> prelu(new hiai::op::PRelu(opName + "_prelu"));
auto slopePtr = mOp->main_as_PRelu()->slope()->data();
auto slopeSize = mOp->main_as_PRelu()->slope()->size();
mConst_w = hiai::op::Const(opName + "_w_const");
ge::TensorDesc fdesc(ge::Shape({1, slopeSize, 1, 1}), ge::FORMAT_NCHW, ge::DT_FLOAT);
ge::TensorPtr filter = std::make_shared<ge::Tensor>();
filter->SetTensorDesc(fdesc);
filter->SetData((uint8_t *)slopePtr, slopeSize * sizeof(float));
mConst_w.set_attr_value(filter);
if (inputs[0]->buffer().dimensions < 4) {
std::vector<int32_t> shape;
for (int32_t i = 0; i < inputs[0]->buffer().dimensions; i++) {
shape.push_back(inputs[0]->buffer().dim[i].extent);
}
for (int32_t i = inputs[0]->buffer().dimensions; i < 4; i++) {
shape.push_back(1);
}
shapeConst = hiai::op::Const(opName +"_reshapeConst");
{
ge::TensorDesc fdesc(ge::Shape({static_cast<int64_t>(shape.size())}), ge::FORMAT_NCHW, ge::DT_INT32);
ge::TensorPtr filter = std::make_shared<ge::Tensor>();
filter->SetTensorDesc(fdesc);
filter->SetData((uint8_t *)shape.data(), shape.size() * sizeof(int32_t));
shapeConst.set_attr_value(filter);
}
shared_ptr<hiai::op::Reshape> reshape(new hiai::op::Reshape(opName + "_reshape"));
if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
(*reshape).set_input_x(*xOp.get());
} else {
(*reshape).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
}
(*reshape).set_input_shape(shapeConst);
(*prelu).set_input_x(*reshape.get()).set_input_weight(mConst_w);
mNpuBackend->setOutputOps(mOp, {reshape, prelu}, outputs);
} else {
if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
(*prelu).set_input_x(*xOp.get());
} else {
(*prelu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
}
(*prelu).set_input_weight(mConst_w);
mNpuBackend->setOutputOps(mOp, {prelu}, outputs);
}
}

(*prelu)
.set_input_x(*xOp.get()).set_input_weight(mConst_w);
mNpuBackend->setOutputOps(mOp, {prelu}, outputs);
}else{
float slope = 0.0;
if (mOp->type() == OpType_ReLU) {
slope = mOp->main_as_Relu()->slope();
mType = 5;
if (slope != 0.0) {
mType = 5;
}
}

shared_ptr<hiai::op::Activation> relu(new hiai::op::Activation(opName + "_relu"));
if (mNpuBackend->mSclipMap.find(inputIndex) == mNpuBackend->mSclipMap.end()) {
(*relu).set_input_x(*xOp.get());
} else {
(*relu).set_input_x(xOp->GetOutput(mNpuBackend->mSclipMap[inputIndex]));
}
(*relu)
.set_input_x(*xOp.get())
.set_attr_coef(.000000)
.set_attr_negative_slope(slope)
.set_attr_mode(mType);
Expand Down
1 change: 1 addition & 0 deletions source/backend/hiai/execution/NPUActivation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class NPUActivation : public NPUCommonExecution {
virtual ~NPUActivation() = default;
private:
hiai::op::Const mConst_w;
hiai::op::Const shapeConst;
int mType;
};

Expand Down
2 changes: 1 addition & 1 deletion source/backend/hiai/execution/NPUArgMax.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ ErrorCode NPUArgMax::onResize(const std::vector<Tensor *> &inputs, const std::ve
// om input weight const op
mConst_axis = hiai::op::Const(opName + "_w_const");
{
auto aixs = axisFormat(inputs[0], argMaxParam->axis());
auto aixs = argMaxParam->axis();
ge::TensorDesc fdesc(ge::Shape({1}),ge::DT_INT32);
ge::TensorPtr axis = std::make_shared<ge::Tensor>();
axis->SetTensorDesc(fdesc);
Expand Down
93 changes: 93 additions & 0 deletions source/backend/hiai/execution/NPUBatchMatMul.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
//
// NPUBatchMatMul.cpp
// MNN
//
// Created by MNN on b'2020/10/15'.
// Copyright © 2018, Alibaba Group Holding Limited
//

#include "NPUBatchMatMul.hpp"
#include "NPUBackend.hpp"

using namespace std;

namespace MNN {

NPUBatchMatMul::NPUBatchMatMul(MNN::Backend *b, const MNN::Op *op, const std::vector<Tensor *> &inputs, const std::vector<MNN::Tensor *> &outputs) : NPUCommonExecution(b, op) {
auto opName = mOp->name()->str();

bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT;

Tensor* input = nullptr;
if (isConst0 && !isConst1){
input = inputs[0];
}
if (!isConst0 && isConst1){
input = inputs[1];
}
if (input != nullptr) {
mConst = ge::op::Const(opName + "_w_const");
ge::TensorPtr filter = std::make_shared<ge::Tensor>();
vector<int64_t> dims;
for (int32_t i = 0; i < input->buffer().dimensions; i++) {
dims.push_back(input->buffer().dim[i].extent);
}
ge::TensorDesc fdesc(ge::Shape(dims), ge::FORMAT_NCHW, ge::DT_FLOAT);
if (input->getType().code == halide_type_int && input->getType().bits == 32) {
fdesc.SetDataType(ge::DT_INT32);
filter->SetData((uint8_t *)input->host<int32_t>(), input->elementSize() * sizeof(int32_t));
} else {
filter->SetData((uint8_t *)input->host<float>(), input->elementSize() * sizeof(float));
}
filter->SetTensorDesc(fdesc);
mConst.set_attr_value(filter);
}

}

ErrorCode NPUBatchMatMul::onResize(const std::vector<Tensor *> &inputs, const std::vector<Tensor *> &outputs) {
mNpuBackend->setNetworkInput(inputs, mOp);
auto opName = mOp->name()->str();
bool isConst0 = TensorUtils::getDescribe(inputs[0])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
bool isConst1 = TensorUtils::getDescribe(inputs[1])->usage==Tensor::InsideDescribe::Usage::CONSTANT;
auto param = mOp->main_as_BatchMatMulParam();
shared_ptr<hiai::op::BatchMatMul> batchMatMul(new hiai::op::BatchMatMul(opName));
if (isConst0 && !isConst1) {
auto inputIndex1 = mOp->inputIndexes()->data()[1];
auto iops1 = mNpuBackend->mGrapMap[inputIndex1];
auto xOp1 = iops1.back().first;
(*batchMatMul)
.set_input_x1(mConst)
.set_input_x2(*xOp1.get())
.set_attr_adj_x1(param->adjX())
.set_attr_adj_x2(param->adjY());
} else if (!isConst0 && isConst1) {
auto inputIndex = mOp->inputIndexes()->data()[0];
auto iops = mNpuBackend->mGrapMap[inputIndex];
auto xOp = iops.back().first;
(*batchMatMul)
.set_input_x1(*xOp.get())
.set_input_x2(mConst)
.set_attr_adj_x1(param->adjX())
.set_attr_adj_x2(param->adjY());
} else {
auto inputIndex = mOp->inputIndexes()->data()[0];
auto iops = mNpuBackend->mGrapMap[inputIndex];
auto xOp = iops.back().first;
auto inputIndex1 = mOp->inputIndexes()->data()[1];
auto iops1 = mNpuBackend->mGrapMap[inputIndex1];
auto xOp1 = iops1.back().first;
(*batchMatMul)
.set_input_x1(*xOp.get())
.set_input_x2(*xOp1.get())
.set_attr_adj_x1(param->adjX())
.set_attr_adj_x2(param->adjY());
}
mNpuBackend->setOutputOps(mOp, {batchMatMul}, outputs);
return NO_ERROR;
}

NPUCreatorRegister<TypedCreator<NPUBatchMatMul>> __BatchMatMul_op(OpType_BatchMatMul);

} // namespace MNN
Loading

0 comments on commit 24841e7

Please sign in to comment.