Skip to content

Commit

Permalink
Add convert fp32 ONNX model to fp16 ONNX func (#1021)
Browse files Browse the repository at this point in the history
* conver fp32 model to fp16

* add fp32tofp16 func

* add export_fp16_model option

* update code

* Custom op

* add node type func

* save code

* update code for custom op

* fix test

* update code

* fix ci

* save code

* save code

* update code and fix bugs when params file is big

* fix bug

* update code

* fix acc error bug

* enable fp16 in tests

* rotal delta

* delta

* update test

* update version to 1.0.6

* add keep_type_tensors

* update code

* add mv in mappers_registry
  • Loading branch information
yeliang2258 committed Mar 13, 2023
1 parent b726ed3 commit 0af1092
Show file tree
Hide file tree
Showing 23 changed files with 1,417 additions and 151 deletions.
3 changes: 2 additions & 1 deletion README.md
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ paddle2onnx --model_dir saved_inference_model \
|--deploy_backend |**[可选]** 量化模型部署的推理引擎,支持 onnxruntime、tensorrt 或 others,当选择 others 时,所有的量化信息存储于 max_range.txt 文件中,默认为 onnxruntime |
|--save_calibration_file |**[可选]** TensorRT 8.X版本部署量化模型需要读取的 cache 文件的保存路径,默认为 calibration.cache |
|--version |**[可选]** 查看 paddle2onnx 版本 |
|--external_filename |**[可选]** 当导出的ONNX模型大于 2G 时,需要设置 external data 的存储路径,推荐设置为:external_data |
|--external_filename |**[可选]** 当导出的 ONNX 模型大于 2G 时,需要设置 external data 的存储路径,推荐设置为:external_data |
|--export_fp16_model |**[可选]** 是否将导出的 ONNX 的模型转换为 FP16 格式,并用 ONNXRuntime-GPU 加速推理,默认为 False |

- 使用 onnxruntime 验证转换模型, 请注意安装最新版本(最低要求 1.10.0)

Expand Down
2 changes: 1 addition & 1 deletion VERSION_NUMBER
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.5
1.0.6
8 changes: 5 additions & 3 deletions paddle2onnx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,21 @@ def export(model_file,
custom_op_info=None,
deploy_backend="onnxruntime",
calibration_file="",
external_file=""):
external_file="",
export_fp16_model=False):
import paddle2onnx.paddle2onnx_cpp2py_export as c_p2o
deploy_backend = deploy_backend.lower()
if custom_op_info is None:
onnx_model_str = c_p2o.export(
model_file, params_file, opset_version, auto_upgrade_opset, verbose,
enable_onnx_checker, enable_experimental_op, enable_optimize, {},
deploy_backend, calibration_file, external_file)
deploy_backend, calibration_file, external_file, export_fp16_model)
else:
onnx_model_str = c_p2o.export(
model_file, params_file, opset_version, auto_upgrade_opset, verbose,
enable_onnx_checker, enable_experimental_op, enable_optimize,
custom_op_info, deploy_backend, calibration_file, external_file)
custom_op_info, deploy_backend, calibration_file, external_file,
export_fp16_model)
if save_file is not None:
with open(save_file, "wb") as f:
f.write(onnx_model_str)
Expand Down
13 changes: 10 additions & 3 deletions paddle2onnx/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ def arg_parser():
type=_text_type,
default=None,
help="The filename of external_data when the model is bigger than 2G.")
parser.add_argument(
"--export_fp16_model",
type=ast.literal_eval,
default=False,
help="Whether export FP16 model for ORT-GPU, default False")
return parser


Expand All @@ -138,12 +143,13 @@ def c_paddle_to_onnx(model_file,
enable_optimize=True,
deploy_backend="onnxruntime",
calibration_file="",
external_file=""):
external_file="",
export_fp16_model=False):
import paddle2onnx.paddle2onnx_cpp2py_export as c_p2o
onnx_model_str = c_p2o.export(
model_file, params_file, opset_version, auto_upgrade_opset, verbose,
enable_onnx_checker, enable_experimental_op, enable_optimize, {},
deploy_backend, calibration_file, external_file)
deploy_backend, calibration_file, external_file, export_fp16_model)
if save_file is not None:
with open(save_file, "wb") as f:
f.write(onnx_model_str)
Expand Down Expand Up @@ -242,7 +248,8 @@ def main():
enable_optimize=True,
deploy_backend=args.deploy_backend,
calibration_file=calibration_file,
external_file=external_file)
external_file=external_file,
export_fp16_model=args.export_fp16_model)
logging.info("===============Make PaddlePaddle Better!================")
logging.info("A little survey: https://iwenjuan.baidu.com/?code=r8hu2s")
return
Expand Down
9 changes: 6 additions & 3 deletions paddle2onnx/convert.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,23 @@ def dygraph2onnx(layer, save_file, input_spec=None, opset_version=9, **configs):
import paddle2onnx
import paddle
dirname = os.path.split(save_file)[0]
paddle_model_dir = os.path.join(dirname, "paddle_model_static_onnx_temp_dir")
paddle_model_dir = os.path.join(dirname,
"paddle_model_static_onnx_temp_dir")
model_file = os.path.join(paddle_model_dir, "model.pdmodel")
params_file = os.path.join(paddle_model_dir, "model.pdiparams")

if os.path.exists(paddle_model_dir):
if os.path.isfile(paddle_model_dir):
logging.info("File {} exists, will remove it.".format(paddle_model_dir))
logging.info("File {} exists, will remove it.".format(
paddle_model_dir))
os.remove(paddle_model_dir)
if os.path.isfile(model_file):
os.remove(model_file)
if os.path.isfile(params_file):
os.remove(params_file)
paddle.jit.save(layer, os.path.join(paddle_model_dir, "model"), input_spec)
logging.info("Static PaddlePaddle model saved in {}.".format(paddle_model_dir))
logging.info("Static PaddlePaddle model saved in {}.".format(
paddle_model_dir))
if not os.path.isfile(params_file):
params_file = ""

Expand Down
38 changes: 38 additions & 0 deletions paddle2onnx/convert_to_fp16.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import

import argparse
import sys
from paddle2onnx.utils import logging


def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument(
'--input_model_path',
required=True,
help='The path of input onnx model file.')
parser.add_argument(
'--output_model_path',
required=True,
help='The file path to write optimized onnx model file.')
return parser.parse_args()


if __name__ == '__main__':
args = parse_arguments()
import paddle2onnx.paddle2onnx_cpp2py_export as c_p2o
c_p2o.convert_to_fp16(args.input_model_path, args.output_model_path)
logging.info("FP16 model saved in {}.".format(args.output_model_path))
75 changes: 55 additions & 20 deletions paddle2onnx/converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <string>

#include "paddle2onnx/mapper/exporter.h"
#include "paddle2onnx/optimizer/convert_fp32_to_fp16.h"

namespace paddle2onnx {

Expand Down Expand Up @@ -128,15 +129,13 @@ PADDLE2ONNX_DECL bool IsExportable(const void* model_buffer, int model_size,
return true;
}

PADDLE2ONNX_DECL bool Export(const char* model, const char* params, char** out,
int* out_size, int32_t opset_version,
bool auto_upgrade_opset, bool verbose,
bool enable_onnx_checker,
bool enable_experimental_op, bool enable_optimize,
CustomOp* ops, int op_count,
const char* deploy_backend,
char** calibration_cache, int* calibration_size,
const char* external_file, bool* save_external) {
PADDLE2ONNX_DECL bool Export(
const char* model, const char* params, char** out, int* out_size,
int32_t opset_version, bool auto_upgrade_opset, bool verbose,
bool enable_onnx_checker, bool enable_experimental_op, bool enable_optimize,
CustomOp* ops, int op_count, const char* deploy_backend,
char** calibration_cache, int* calibration_size, const char* external_file,
bool* save_external, bool export_fp16_model) {
auto parser = PaddleParser();
P2OLogger(verbose) << "Start to parsing Paddle model..." << std::endl;
if (!parser.Init(model, params)) {
Expand All @@ -159,10 +158,10 @@ PADDLE2ONNX_DECL bool Export(const char* model, const char* params, char** out,
}

std::string calibration_str;
std::string result =
me.Run(parser, opset_version, auto_upgrade_opset, verbose,
enable_onnx_checker, enable_experimental_op, enable_optimize,
deploy_backend, &calibration_str, external_file, save_external);
std::string result = me.Run(
parser, opset_version, auto_upgrade_opset, verbose, enable_onnx_checker,
enable_experimental_op, enable_optimize, deploy_backend, &calibration_str,
external_file, save_external, export_fp16_model);
if (result.empty()) {
P2OLogger(verbose) << "The exported ONNX model is invalid!" << std::endl;
return false;
Expand All @@ -185,12 +184,13 @@ PADDLE2ONNX_DECL bool Export(const char* model, const char* params, char** out,
}

PADDLE2ONNX_DECL bool Export(
const void* model_buffer, int model_size, const void* params_buffer,
int params_size, char** out, int* out_size, int32_t opset_version,
const void* model_buffer, int64_t model_size, const void* params_buffer,
int64_t params_size, char** out, int* out_size, int32_t opset_version,
bool auto_upgrade_opset, bool verbose, bool enable_onnx_checker,
bool enable_experimental_op, bool enable_optimize, CustomOp* ops,
int op_count, const char* deploy_backend, char** calibration_cache,
int* calibration_size, const char* external_file, bool* save_external) {
int* calibration_size, const char* external_file, bool* save_external,
bool export_fp16_model) {
auto parser = PaddleParser();
P2OLogger(verbose) << "Start to parsing Paddle model..." << std::endl;
if (!parser.Init(model_buffer, model_size, params_buffer, params_size)) {
Expand All @@ -212,10 +212,10 @@ PADDLE2ONNX_DECL bool Export(
}
}
std::string calibration_str;
std::string result =
me.Run(parser, opset_version, auto_upgrade_opset, verbose,
enable_onnx_checker, enable_experimental_op, enable_optimize,
deploy_backend, &calibration_str, external_file, save_external);
std::string result = me.Run(
parser, opset_version, auto_upgrade_opset, verbose, enable_onnx_checker,
enable_experimental_op, enable_optimize, deploy_backend, &calibration_str,
external_file, save_external, export_fp16_model);
if (result.empty()) {
P2OLogger(verbose) << "The exported ONNX model is invalid!" << std::endl;
return false;
Expand All @@ -237,6 +237,41 @@ PADDLE2ONNX_DECL bool Export(
return true;
}

PADDLE2ONNX_DECL bool ConvertFP32ToFP16(const char* onnx_model, int model_size,
char** out_model, int* out_model_size) {
std::string onnx_proto(onnx_model, onnx_model + model_size);
ONNX_NAMESPACE::ModelProto model;
model.ParseFromString(onnx_proto);

P2OLogger(true) << "Convert FP32 ONNX model to FP16." << std::endl;
ConvertFp32ToFp16 convert;
convert.Convert(&model);
// save external data file for big model
std::string external_data_file;
if (model.ByteSizeLong() > INT_MAX) {
external_data_file = "external_data";
}
paddle2onnx::ModelExporter me;
if (external_data_file.size()) {
me.SaveExternalData(model.mutable_graph(), external_data_file);
}
// check model
me.ONNXChecker(model, true);

std::string result;
if (!model.SerializeToString(&result)) {
P2OLogger(true)
<< "Error happenedd while optimizing the exported ONNX model."
<< std::endl;
return false;
}

*out_model_size = result.size();
*out_model = new char[*out_model_size]();
memcpy(*out_model, result.data(), *out_model_size);
return true;
}

ModelTensorInfo::~ModelTensorInfo() {
if (shape != nullptr) {
delete[] shape;
Expand Down
13 changes: 9 additions & 4 deletions paddle2onnx/converter.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,19 @@ PADDLE2ONNX_DECL bool Export(
CustomOp* ops = nullptr, int op_count = 0,
const char* deploy_backend = "onnxruntime",
char** calibration_cache = nullptr, int* calibration_size = 0,
const char* external_file = "", bool* save_external = nullptr);
const char* external_file = "", bool* save_external = nullptr,
bool export_fp16_model = false);

PADDLE2ONNX_DECL bool Export(
const void* model_buffer, int model_size, const void* params_buffer,
int params_size, char** out, int* out_size, int32_t opset_version = 11,
const void* model_buffer, int64_t model_size, const void* params_buffer,
int64_t params_size, char** out, int* out_size, int32_t opset_version = 11,
bool auto_upgrade_opset = true, bool verbose = false,
bool enable_onnx_checker = true, bool enable_experimental_op = false,
bool enable_optimize = true, CustomOp* ops = nullptr, int op_count = 0,
const char* deploy_backend = "onnxruntime",
char** calibration_cache = nullptr, int* calibration_size = 0,
const char* external_file = "", bool* save_external = nullptr);
const char* external_file = "", bool* save_external = nullptr,
bool export_fp16_model = false);

// Following are inside usage, will remove it maybe
struct PADDLE2ONNX_DECL ModelTensorInfo {
Expand Down Expand Up @@ -109,6 +111,9 @@ PADDLE2ONNX_DECL bool RemoveMultiClassNMS(const char* onnx_model,
int model_size, char** out_model,
int* out_model_size);

PADDLE2ONNX_DECL bool ConvertFP32ToFP16(const char* onnx_model, int model_size,
char** out_model, int* out_model_size);

struct PADDLE2ONNX_DECL PaddleReader {
PaddleReader(const char* model_buffer, int buffer_size);
// suppose the maximum number of inputs/outputs is 100
Expand Down
16 changes: 13 additions & 3 deletions paddle2onnx/cpp2py_export.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ PYBIND11_MODULE(paddle2onnx_cpp2py_export, m) {
const CustomOpInfo& info = CustomOpInfo(),
const std::string& deploy_backend = "onnxruntime",
const std::string& calibration_file = "",
const std::string& external_file = "") {
const std::string& external_file = "",
const bool& export_fp16_model = false) {
P2OLogger(verbose) << "Start to parse PaddlePaddle model..." << std::endl;
P2OLogger(verbose) << "Model file path: " << model_filename << std::endl;
P2OLogger(verbose) << "Paramters file path: " << params_filename
Expand All @@ -46,11 +47,13 @@ PYBIND11_MODULE(paddle2onnx_cpp2py_export, m) {
int size = 0;
char* calibration_cache = nullptr;
int cache_size = 0;
bool save_external;
if (!Export(model_filename.c_str(), params_filename.c_str(), &out, &size,
opset_version, auto_upgrade_opset, verbose,
enable_onnx_checker, enable_experimental_op, enable_optimize,
nullptr, 0, deploy_backend.c_str(), &calibration_cache,
&cache_size, external_file.c_str())) {
&cache_size, external_file.c_str(), &save_external,
export_fp16_model)) {
P2OLogger(verbose) << "Paddle model convert failed." << std::endl;
return pybind11::bytes("");
}
Expand Down Expand Up @@ -83,11 +86,13 @@ PYBIND11_MODULE(paddle2onnx_cpp2py_export, m) {
int size = 0;
char* calibration_cache = nullptr;
int cache_size = 0;
bool save_external;
if (!Export(model_filename.c_str(), params_filename.c_str(), &out, &size,
opset_version, auto_upgrade_opset, verbose, enable_onnx_checker,
enable_experimental_op, enable_optimize, ops.data(),
info.size(), deploy_backend.c_str(), &calibration_cache,
&cache_size, external_file.c_str())) {
&cache_size, external_file.c_str(), &save_external,
export_fp16_model)) {
P2OLogger(verbose) << "Paddle model convert failed." << std::endl;
return pybind11::bytes("");
}
Expand All @@ -114,5 +119,10 @@ PYBIND11_MODULE(paddle2onnx_cpp2py_export, m) {
ONNX_NAMESPACE::optimization::OptimizePaddle2ONNX(
model_path, optimized_model_path, shape_infos);
});
m.def("convert_to_fp16", [](const std::string& fp32_model_path,
const std::string& fp16_model_path) {
paddle2onnx::optimization::Paddle2ONNXFP32ToFP16(fp32_model_path,
fp16_model_path);
});
}
} // namespace paddle2onnx
Loading

0 comments on commit 0af1092

Please sign in to comment.