[MNN:Sync] Sync Internal 2.8.1

alibaba · Dec 27, 2023 · 3b978d9 · 3b978d9
1 parent 1a5609b
commit 3b978d9
Show file tree

Hide file tree

Showing 282 changed files with 9,504 additions and 4,740 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -489,6 +489,7 @@ IF(MNN_COREML)
 
  IF(MNN_SEP_BUILD)
  list(APPEND MNN_DEPS MNNCoreML)
+ list(APPEND MNN_EXTRA_DEPENDS MNNCoreML)
  ELSE()
  list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNNCoreML>)
  ENDIF()
@@ -552,6 +553,7 @@ IF(MNN_OPENCL)
  IF(MNN_SEP_BUILD)
  list(APPEND MNN_DEPS MNN_CL)
  ELSE()
+ add_definitions(-DMNN_OPENCL_ENABLED=1)
  list(APPEND MNN_TARGETS MNN_CL)
  list(APPEND MNN_OBJECTS_TO_LINK $<TARGET_OBJECTS:MNN_CL>)
  list(APPEND MNN_EXTRA_DEPENDS ${MNN_OCL_LIBS})

diff --git a/MNN_Render.podspec b/MNN_Render.podspec
@@ -0,0 +1,82 @@
+Pod::Spec.new do |s|
+ s.name = "MNN"
+ s.version = "2.2.0"
+ s.summary = "MNN"
+
+ s.description = <<-DESC
+ MNN is a lightweight deep neural network inference framework. It loads models and do inference on devices.
+ DESC
+
+ s.homepage = "https://github.com/alibaba/MNN"
+ s.license = {
+ :type => 'Apache License, Version 2.0',
+ :text => <<-LICENSE
+ Copyright © 2018, Alibaba Group Holding Limited
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ LICENSE
+ }
+
+ s.author = { "MNN" => "MNN@alibaba-inc.com" }
+ s.platform = :ios
+ s.ios.deployment_target = '8.0'
+ s.requires_arc = true
+
+ #s.source = { :git => "git@github.com:alibaba/MNN.git", :branch => 'master' }
+ s.source = {:git => "/Users/zhang/Development/AliNNPrivate/",:branch=> 'head'}
+ s.frameworks = 'Metal', 'Accelerate', 'CoreML'
+ s.library = 'c++'
+ s.source_files = \
+ 'include/MNN/*.{h,hpp}',\
+ 'include/MNN/expr/*.{h,hpp}',\
+ 'schema/current/*.{h}',\
+ '3rd_party/flatbuffers/include/flatbuffers/*.{h}',\
+ 'source/internal/logging/*.{hpp,cpp}',\
+ 'source/internal/logging/ios/*.{h,c,m,mm,cc,hpp,cpp}',\
+ 'source/internal/logging/aliyun-log-c-sdk/src/*.{h,c,m,mm,cc,hpp,cpp}',\
+ 'source/core/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+ 'source/common/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+ 'source/utils/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+ 'source/geometry/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+ 'source/cv/**/*.{h,c,m,mm,cc,hpp,cpp}',\
+ 'source/math/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+ 'source/shape/*.{h,c,m,mm,cc,hpp,cpp}',\
+ 'source/shape/render/*.{h,c,m,mm,cc,hpp,cpp}',\
+ #'source/backend/arm82/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+ #'source/backend/arm82/asm/**/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+ 'source/backend/cpu/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+ 'source/backend/cpu/render/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+ 'source/backend/cpu/bf16/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+ 'source/backend/cpu/arm/**/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+ 'source/backend/cpu/compute/*.{h,c,m,mm,cc,S,hpp,cpp}',\
+ 'source/backend/metal/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+ 'source/backend/metal/render/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+ 'source/backend/coreml/backend/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+ 'source/backend/coreml/execution/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+ 'source/backend/coreml/mlmodel/src/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+ 'express/**/*.{hpp,cpp}',\
+ 'tools/cv/include/**/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+ 'tools/cv/source/imgproc/*.{h,c,m,mm,cc,hpp,cpp,metal}',\
+ 'tools/cv/source/calib3d/*.{h,c,m,mm,cc,hpp,cpp,metal}'
+
+ s.header_mappings_dir = 'include'
+ s.subspec 'cv' do |sp|
+ sp.source_files = 'tools/cv/include/**/*.hpp'
+ sp.header_mappings_dir = 'tools/cv/include'
+ sp.xcconfig = { 'ALWAYS_SEARCH_USER_PATHS' => 'NO' }
+ end
+
+ s.compiler_flags = '-arch arm64 -march=armv8.2-a+simd+fp16'
+ s.pod_target_xcconfig = {'METAL_LIBRARY_FILE_BASE' => 'mnn', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include" "$(PODS_TARGET_SRCROOT)/3rd_party/flatbuffers/include" "$(PODS_TARGET_SRCROOT)/source" "$(PODS_TARGET_SRCROOT)/3rd_party/half" "$(PODS_TARGET_SRCROOT)/source/backend/coreml/mlmodel/include" "$(PODS_TARGET_SRCROOT)/tools/cv/include"', 'GCC_PREPROCESSOR_DEFINITIONS' => '$(inherited) MNN_CODEGEN_REGISTER=1 MNN_SUPPORT_TFLITE_QUAN=1 MNN_METAL_ENABLED=1 MNN_METAL_FULL_PRECISION=1 MNN_SUPPORT_RENDER=1 MNN_SUPPORT_BF16=1 MNN_COREML_ENABLED=1 USE_LZ4_FLAG=1 MNN_INTERNAL_ENABLED=1 MNN_USE_SPARSE_COMPUTE=1'}
+ s.user_target_xcconfig = { 'OTHER_LDFLAGS' => '-force_load $(BUILD_DIR)/$(CONFIGURATION)$(EFFECTIVE_PLATFORM_NAME)/MNN/libMNN.a', 'HEADER_SEARCH_PATHS' => '"$(PODS_TARGET_SRCROOT)/include"' }
+end
diff --git a/docs/compile/tools.md b/docs/compile/tools.md
@@ -55,14 +55,10 @@
  - `checkInvalidValue.out` 检测输出目录里的数据
  - `timeProfile.out` 测试模型在指定后端上执行的时间，并获取每层的执行时间占比
  - `testTrain.out` 测试训练功能
- - `aoa_nlu_encoder.out` 测试NLU编码
- - `aoa_nlu_decoder1.out` 测试NLU解码1
- - `aoa_nlu_decoder2.out` 测试NLU解码2
  - `checkDir.out` 测试两个文件夹是否一致
  - `checkFile.out` 测试两个文件是否一致
  - `winogradExample.out` winograd示例
- - `winogradGenerateGLSL.out` winograd生成GLSL
- - `winogradGenerateCL.out` winograd生成CL
+ - `fuseTest` 测试 GPU 自定义算子的功能，目前仅支持 Vulkan Buffer 模式
 ## Benchmark工具
 - 相关编译选项
  - `MNN_BUILD_BENCHMARK` 是否编译Benchmark工具

diff --git a/docs/pymnn/expr.md b/docs/pymnn/expr.md
@@ -2195,6 +2195,25 @@ array([[[[0., 1.]],
  [[6., 7.]]]], dtype=float32)
 ```
 
+---
+### `reverse(x, axis)`
+在输入x变量在axis[0]维度进行翻转
+
+参数：
+- `x : var_like` 输入变量
+- `axis : var_like` 输入变量
+
+返回：反转序列的值
+
+返回类型：`Var`
+
+示例：
+
+```python
+>>> expr.reverse(expr.range(-4., 4., 1.), [0])
+array([ 3., 2., 1., 0., -1., -2., -3., -4.], dtype=float32)
+```
+
 ---
 ### `reverse_sequence(x, y, batch_dim, seq_dim)`
 沿着batch_dim维度对x进行切片并反转维度seq_dim上的y[i]元素

diff --git a/docs/tools/test.md b/docs/tools/test.md
@@ -457,3 +457,14 @@ Matrix:
 0.0000000 0.0000000 1.0000000
 ```
 
+## fuseTest
+### 功能
+测试 GPU 自定义算子的功能，目前仅支持 Vulkan Buffer 模式
+
+### 参数
+`Usage: ./fuseTest user.spirv config.json`
+- `user.spirv:str`：SPIRV文件路径，可以用 glslangValidator -V user.comp -o user.spirv 编译获得
+- `config.json:str`: 配置文件路径
+### 示例
+```bash
+$ ./fuseTest user.spirv user.json
diff --git a/express/Executor.cpp b/express/Executor.cpp
@@ -120,7 +120,7 @@ Executor::Requirement Executor::getRequirement(Expr* expr) const {
  return req;
  }
  for (int i = 0; i < inputSize; ++i) {
- req.contentNeedContent[i] = OpCommonUtils::opNeedContent(op->type(), i);
+ req.contentNeedContent[i] = OpCommonUtils::opNeedContent(op, i);
  req.shapeNeedContent[i] = false;
  }
  auto needIndexId = SizeComputer::needInputContent(op, inputSize);

diff --git a/express/Expr.cpp b/express/Expr.cpp
@@ -192,6 +192,17 @@ EXPRP Expr::create(std::shared_ptr<BufferStorage> extra, std::vector<VARP>&& inp
  EXPRP expr(new Expr(outputSize));
  expr->mStorage = extra;
  expr->mOp = flatbuffers::GetRoot<Op>(extra->buffer());
+ switch (expr->mOp->type()) {
+ case OpType_Const:
+ expr->mType = VARP::CONSTANT;
+ break;
+ case OpType_TrainableParam:
+ expr->mType = VARP::TRAINABLE;
+ break;
+ default:
+ expr->mType = VARP::INPUT;
+ break;
+ }
  expr->mInputs = std::move(inputs);
  auto exe = ExecutorScope::Current();
  expr->mInside->mReq = exe->getRequirement(expr.get());

diff --git a/express/NeuralNetWorkOp.cpp b/express/NeuralNetWorkOp.cpp
@@ -626,6 +626,13 @@ VARP _ChannelShuffle(VARP x, int group) {
  x = _Convert(x, NC4HW4);
  return x;
 }
+
+VARP _Reverse(VARP x, VARP axis) {
+ std::unique_ptr<MNN::OpT> op(new MNN::OpT);
+ op->type = MNN::OpType_Reverse;
+ return (Variable::create(Expr::create(op.get(), {x, axis})));
+}
+
 VARP _ReverseSequence(VARP x, VARP y, int batchDim, int seqDim) {
  std::unique_ptr<OpT> op(new OpT);
  op->type = OpType_ReverseSequence;
@@ -1710,19 +1717,10 @@ VARP _GridSample(VARP input, VARP grid, InterpolationMethod mode, GridSamplePadd
 }
 
 VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue/*For future*/) {
- auto xInfo = x->getInfo();
  auto scaleInfo = scale->getInfo();
  auto scalePtr = scale->readMap<float>();
- if (nullptr == scalePtr || nullptr == xInfo || nullptr == scaleInfo) {
- MNN_ERROR("Error for FloatToInt8 because var not ready\n");
- return nullptr;
- }
- if (xInfo->order != NC4HW4 || xInfo->type.code != halide_type_float) {
- MNN_ERROR("Not Support Input for FloatToInt8 because var not NC4HW4 or not float\n");
- return nullptr;
- }
- if ((scaleInfo->size != xInfo->dim[1]) && (scaleInfo->size != 1)) {
- MNN_ERROR("Scale's size not match input's channel: %d - %d\n", scaleInfo->size, xInfo->dim[1]);
+ if (nullptr == scalePtr || nullptr == scaleInfo) {
+ MNN_ERROR("Error for FloatToInt8 because scale not ready\n");
  return nullptr;
  }
  std::unique_ptr<OpT> op(new OpT);
@@ -1735,21 +1733,12 @@ VARP _FloatToInt8(VARP x, VARP scale, char minValue/*For future*/, char maxValue
 }
 
 VARP _FloatToInt8(VARP x, VARP scale, int8_t minValue, int8_t maxValue, int8_t zeroPoint) {
- auto xInfo = x->getInfo();
  auto scaleInfo = scale->getInfo();
  auto scalePtr = scale->readMap<float>();
- if (nullptr == scalePtr || nullptr == xInfo || nullptr == scaleInfo) {
+ if (nullptr == scalePtr || nullptr == scaleInfo) {
  MNN_ERROR("Error for FloatToInt8 because var not ready\n");
  return nullptr;
  }
- if (xInfo->order != NC4HW4 || xInfo->type.code != halide_type_float) {
- MNN_ERROR("Not Support Input for FloatToInt8 because var not NC4HW4 or not float\n");
- return nullptr;
- }
- if ((scaleInfo->size != xInfo->dim[1]) && (scaleInfo->size != 1)) {
- MNN_ERROR("Scale's size not match input's channel: %d - %d\n", scaleInfo->size, xInfo->dim[1]);
- return nullptr;
- }
  std::unique_ptr<OpT> op(new OpT);
  op->type = OpType_FloatToInt8;
  op->main.type = OpParameter_QuantizedFloatParam;

diff --git a/express/module/PipelineModule.cpp b/express/module/PipelineModule.cpp
@@ -58,6 +58,10 @@ ExprModule::ExprModule(EXPRP expr) {
  break;
  }
  }
+ // TODO: Optimize the logic
+ if (!mExpr->mCanDecompose) {
+ ExecutorScope::Current()->setLazyComputeMode(Executor::LAZY_CONTENT);
+ }
 }
 
 std::vector<VARP> ExprModule::onForward(const std::vector<VARP>& inputs) {
@@ -72,6 +76,14 @@ std::vector<VARP> ExprModule::onForward(const std::vector<VARP>& inputs) {
  std::vector<VARP> outputVars;
  auto newExpr = Expr::create(mExpr->extra(), std::move(tempInputs), mExpr->outputSize());
  newExpr->setName(mExpr->name());
+ if (!mExpr->mCanDecompose) {
+ // Set tensor shape from net
+ newExpr->mCanDecompose = false;
+ for (int index = 0; index < mExpr->outputSize(); ++index) {
+ TensorUtils::copyShape(mExpr->inside()->mOutputTensors[index], newExpr->inside()->mOutputTensors[index], true, true);
+ Utils::copyTensorToInfo(newExpr->inside()->mOutputInfos.data() + index, newExpr->inside()->mOutputTensors[index]);
+ }
+ }
  for (int i = 0; i < mExpr->outputSize(); ++i) {
  outputVars.emplace_back(Variable::create(newExpr, i));
  }
@@ -562,6 +574,23 @@ Module* PipelineModule::load(const std::vector<std::string>& inputs, const std::
  config = &defaultConfig;
  }
  auto subGraphs = net->subgraphs();
+ if (config->dynamic) {
+ // TODO: Support subgraph
+ if (nullptr == subGraphs) {
+ auto varMap = MNN::Express::Variable::loadMap(buffer, length);
+ std::vector<MNN::Express::VARP> inputsVar(inputs.size());
+ for (int i=0; i<inputs.size(); ++i) {
+ inputsVar[i] = varMap[inputs[i]];
+ }
+ std::vector<MNN::Express::VARP> outputsVar(outputs.size());
+ for (int i=0; i<outputs.size(); ++i) {
+ outputsVar[i] = varMap[outputs[i]];
+ }
+ return extract(inputsVar, outputsVar, false);
+ } else {
+ MNN_ERROR("Don't support subgraph for dynamic load, turn back to static load\n");
+ }
+ }
  std::map<std::string, SubGraph> subGraphMap;
  _createSubGraph(net, rtMgr, config, subGraphMap);
  std::shared_ptr<BufferStorage> bufferStorage(new BufferStorage);

diff --git a/include/MNN/MNNDefine.h b/include/MNN/MNNDefine.h
@@ -69,6 +69,6 @@ MNN_ERROR("Check failed: %s ==> %s\n", #success, #log); \
 #define STR(x) STR_IMP(x)
 #define MNN_VERSION_MAJOR 2
 #define MNN_VERSION_MINOR 8
-#define MNN_VERSION_PATCH 0
+#define MNN_VERSION_PATCH 1
 #define MNN_VERSION STR(MNN_VERSION_MAJOR) "." STR(MNN_VERSION_MINOR) "." STR(MNN_VERSION_PATCH)
 #endif /* MNNDefine_h */
diff --git a/include/MNN/MNNSharedContext.h b/include/MNN/MNNSharedContext.h
@@ -24,6 +24,15 @@ struct MNNVulkanContext {
  uint32_t iQueueFamilyIndex;
 };
 
+struct MNNVulkanTensorContent {
+ VkBuffer buffer;
+ VkDeviceSize size;
+ VkDeviceSize offset;
+
+ halide_type_t realType;
+ int32_t mask; // For future usage
+};
+
 #endif
 
 #ifdef MNN_METAL
@@ -36,6 +45,9 @@ struct MNNMetalTensorContent {
  id<MTLBuffer> buffer;
  int32_t offset;
  id<MTLTexture> texture;
+
+ halide_type_t type;
+ int32_t mask;
  int32_t forFuture[8];
 };
 

diff --git a/include/MNN/Tensor.hpp b/include/MNN/Tensor.hpp
@@ -275,6 +275,12 @@ class MNN_PUBLIC Tensor {
  mBuffer.dim[index].extent = length;
  }
 
+ /**
+ * @brief For GPU and Other Device, get memory directly, see MNNSharedContext for detail
+ * @return Success or not. If type != tensor's backend's type or type is cpu , return false
+ */
+ bool getDeviceInfo(void* dst, int forwardType) const;
+
 public:
  /**
  * @brief print tensor data. for DEBUG use only.

diff --git a/include/MNN/expr/Expr.hpp b/include/MNN/expr/Expr.hpp
@@ -267,6 +267,7 @@ class MNN_PUBLIC Expr {
  bool mVisited = false;
  std::vector<WeakEXPRP> mTo;
  bool mCanDecompose = true;
+ friend class ExprModule;
 
 };
 } // namespace Express

diff --git a/include/MNN/expr/NeuralNetWorkOp.hpp b/include/MNN/expr/NeuralNetWorkOp.hpp
@@ -77,6 +77,7 @@ MNN_PUBLIC VARP _ChangeInputFormat(VARP input, Dimensionformat format);
 MNN_PUBLIC VARP _Conv2DBackPropFilter(VARP input, VARP inputGrad, INTS kernelSize, PaddingMode pad = VALID, INTS stride = {1, 1}, INTS dilate = {1, 1}, int group = 1, INTS pads = {0, 0});
 MNN_PUBLIC VARP _PoolGrad(VARP originInput, VARP originOutput, VARP inputGrad, INTS kernel, INTS stride, PoolingMode type, PaddingMode pad = VALID, INTS pads= {0, 0});
 // FIXME: move the api to Array Ops
+MNN_PUBLIC VARP _Reverse(VARP x, VARP axis);
 MNN_PUBLIC VARP _ReverseSequence(VARP x, VARP y, int batchDim, int seqDim);
 // FIXME: move the api to Image Ops
 MNN_PUBLIC VARP _Crop(VARP images, VARP size, int axis, INTS offset);