Skip to content

Commit

Permalink
Merge pull request #2450 from wangzhaode/feature/sync_low_memory
Browse files Browse the repository at this point in the history
[MNN:Sync] Sync Internal code, support low_memory for conv.
  • Loading branch information
wangzhaode committed Jun 27, 2023
2 parents 551974a + 67eceb8 commit 24a2e4e
Show file tree
Hide file tree
Showing 57 changed files with 8,566 additions and 226 deletions.
8 changes: 5 additions & 3 deletions benchmark/benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,12 @@ std::vector<float> doBench(Model& model, int loop, int warmup = 10, int forward
int numberThread = 4, int precision = 2, float sparsity = 0.0f, int sparseBlockOC = 1, bool testQuantModel=false) {
auto revertor = std::unique_ptr<Revert>(new Revert(model.model_file.c_str()));
if (testQuantModel) {
float scale = 0.003, offset = 0.f;
revertor->writeExtraDescribeTensor(&scale, &offset);
printf("Auto set sparsity=0 when test quantized model in benchmark...\n");
revertor->initialize(0, sparseBlockOC, false, true);
} else {
revertor->initialize(sparsity, sparseBlockOC);
}
revertor->initialize(sparsity, sparseBlockOC);

auto modelBuffer = revertor->getBuffer();
const auto bufferSize = revertor->getBufferSize();
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromBuffer(modelBuffer, bufferSize));
Expand Down
1 change: 1 addition & 0 deletions docs/compile/cmake.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,4 @@ MNN使用CMake构建项目,CMake中的宏定义列表如下:
| MNN_OPENCV_TEST | 构建MNN的OpenCV功能是否开启单元测试,默认为`OFF` |
| MNN_OPENCV_BENCH | 构建MNN的OpenCV功能是否开启性能benchmark,默认为`OFF` |
| MNN_VULKAN_IMAGE | 构建MNN的Vulkan后端时采用Image内存模式,以便支持FP16和部分移动端上GPU的加速,默认为`ON` |
| MNN_LOW_MEMORY | 是否支持低内存模式,支持低内存模式使用权值量化模型并设置`low_memory`则会使用计算时反量化,默认为`OFF` |
2 changes: 1 addition & 1 deletion source/backend/arm82/Arm82Backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ bool Arm82Backend::addArm82Creator(OpType t, Arm82Creator* ct) {
return true;
}

Arm82Backend::Arm82Backend(const CPURuntime* runtime) : CPUBackend(runtime, BackendConfig::Precision_Low, BackendConfig::Memory_Normal, MNN_FORWARD_CPU_EXTENSION) {
Arm82Backend::Arm82Backend(const CPURuntime* runtime, BackendConfig::MemoryMode memory) : CPUBackend(runtime, BackendConfig::Precision_Low, memory, MNN_FORWARD_CPU_EXTENSION) {
mCoreFunctions = Arm82Functions::get();
}

Expand Down
2 changes: 1 addition & 1 deletion source/backend/arm82/Arm82Backend.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ namespace MNN {
class Arm82Backend : public CPUBackend {
public:
virtual ~Arm82Backend();
Arm82Backend(const CPURuntime* runtime);
Arm82Backend(const CPURuntime* runtime, BackendConfig::MemoryMode memory);
virtual Execution* onCreate(const std::vector<Tensor*>& inputs, const std::vector<Tensor*>& outputs,
const MNN::Op* op) override;
virtual Backend::MemObj* onAcquire(const Tensor* nativeTensor, StorageType storageType) override;
Expand Down
15 changes: 14 additions & 1 deletion source/backend/arm82/Arm82Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,14 @@ void MNNPackedMatMulFP16(float* C, const float* A, const float* B, const size_t*

// C(UP_DIV(h,8), e, h8) = B(UP_DIV(h,hP), l, hP) * A(l, e), hP = 24, e >= 1
// parameter: [aStride, l, h, cStride, bExtraStride]
void MNNPackedMatMulRemainFP16(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias);
void MNNPackedMatMulRemainFP16(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b);

#ifdef MNN_LOW_MEMORY
void MNNPackedMatMulFP16_int4(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b);
void MNNPackedMatMulRemainFP16_int4(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b);
void MNNPackedMatMulFP16_int8(float* C, const float* A, const float* B, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b);
void MNNPackedMatMulRemainFP16_int8(float* C, const float* A, const float* B, size_t eSize, const size_t* parameter, const float* postParameters, const float* bias, const float* k, const float* b);
#endif

void MNNConvDwF23MulTransUnitFP16(FLOAT16 **cacheLine, const FLOAT16 *weight, FLOAT16 *dest, size_t ow);

Expand Down Expand Up @@ -700,6 +707,12 @@ bool Arm82Functions::init() {
// MatMul
FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMul, MNNPackedMatMulFP16);
FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMulRemain, MNNPackedMatMulRemainFP16);
#ifdef MNN_LOW_MEMORY
FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMul_int4, MNNPackedMatMulFP16_int4);
FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMulRemain_int4, MNNPackedMatMulRemainFP16_int4);
FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMul_int8, MNNPackedMatMulFP16_int8);
FUNC_PTR_ASSIGN(gInstance->MNNPackedMatMulRemain_int8, MNNPackedMatMulRemainFP16_int8);
#endif
FUNC_PTR_ASSIGN(gInstance->MNNPackC4ForMatMul_A, Arm82MNNPackForMatMul_A);
FUNC_PTR_ASSIGN(gInstance->MNNGetMatMulPackMode, Arm82MNNGetMatMulPackMode);
FUNC_PTR_ASSIGN(gInstance->MNNPackForMatMul_B, Arm82MNNPackForMatMul_B);
Expand Down
6 changes: 6 additions & 0 deletions source/backend/arm82/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^armv7" OR ARCHS MATCHES "^armv7(;armv7s)?")
target_compile_options(MNN_Arm82 PRIVATE -march=armv8.2-a+fp16 -mfpu=neon-fp-armv8 -mfloat-abi=softfp -DENABLE_ARMV82)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64" OR ARCHS STREQUAL "arm64")
file(GLOB MNN_ARM82_SRCS_ASM "${CMAKE_CURRENT_LIST_DIR}/asm/arm64/*")
if (MNN_LOW_MEMORY)
file(GLOB MNN_ARM82_SRCS_ASM ${MNN_ARM82_SRCS_ASM} ${CMAKE_CURRENT_LIST_DIR}/asm/arm64/low_memory/*)
endif()
add_library(MNN_Arm82 OBJECT ${MNN_ARM82_SRCS} ${MNN_ARM82_SRCS_ASM})
if (MNN_LOW_MEMORY)
target_compile_options(MNN_Arm82 PRIVATE -DMNN_LOW_MEMORY)
endif()
target_compile_options(MNN_Arm82 PRIVATE -march=armv8.2-a+fp16 -DENABLE_ARMV82)
else()
# Building fat binary requires multiple separate builds and lipo-by-hand under CMake's design
Expand Down
Loading

0 comments on commit 24a2e4e

Please sign in to comment.