Add convert fp32 ONNX model to fp16 ONNX func (#1021)

* conver fp32 model to fp16 * add fp32tofp16 func * add export_fp16_model option * update code * Custom op * add node type func * save code * update code for custom op * fix test * update code * fix ci * save code * save code * update code and fix bugs when params file is big * fix bug * update code * fix acc error bug * enable fp16 in tests * rotal delta * delta * update test * update version to 1.0.6 * add keep_type_tensors * update code * add mv in mappers_registry
PaddlePaddle · Mar 13, 2023 · 0af1092 · 0af1092
1 parent b726ed3
commit 0af1092
Show file tree

Hide file tree

Showing 23 changed files with 1,417 additions and 151 deletions.
diff --git a/README.md b/README.md
@@ -68,7 +68,8 @@ paddle2onnx --model_dir saved_inference_model \
 |--deploy_backend |**[可选]** 量化模型部署的推理引擎，支持 onnxruntime、tensorrt 或 others，当选择 others 时，所有的量化信息存储于 max_range.txt 文件中，默认为 onnxruntime |
 |--save_calibration_file |**[可选]** TensorRT 8.X版本部署量化模型需要读取的 cache 文件的保存路径，默认为 calibration.cache |
 |--version |**[可选]** 查看 paddle2onnx 版本 |
-|--external_filename |**[可选]** 当导出的ONNX模型大于 2G 时，需要设置 external data 的存储路径，推荐设置为：external_data |
+|--external_filename |**[可选]** 当导出的 ONNX 模型大于 2G 时，需要设置 external data 的存储路径，推荐设置为：external_data |
+|--export_fp16_model |**[可选]** 是否将导出的 ONNX 的模型转换为 FP16 格式，并用 ONNXRuntime-GPU 加速推理，默认为 False |
 
 - 使用 onnxruntime 验证转换模型, 请注意安装最新版本（最低要求 1.10.0）
 

diff --git a/VERSION_NUMBER b/VERSION_NUMBER
@@ -1 +1 @@
-1.0.5
+1.0.6
diff --git a/paddle2onnx/__init__.py b/paddle2onnx/__init__.py
@@ -43,19 +43,21 @@ def export(model_file,
  custom_op_info=None,
  deploy_backend="onnxruntime",
  calibration_file="",
- external_file=""):
+ external_file="",
+ export_fp16_model=False):
  import paddle2onnx.paddle2onnx_cpp2py_export as c_p2o
  deploy_backend = deploy_backend.lower()
  if custom_op_info is None:
  onnx_model_str = c_p2o.export(
  model_file, params_file, opset_version, auto_upgrade_opset, verbose,
  enable_onnx_checker, enable_experimental_op, enable_optimize, {},
- deploy_backend, calibration_file, external_file)
+ deploy_backend, calibration_file, external_file, export_fp16_model)
  else:
  onnx_model_str = c_p2o.export(
  model_file, params_file, opset_version, auto_upgrade_opset, verbose,
  enable_onnx_checker, enable_experimental_op, enable_optimize,
- custom_op_info, deploy_backend, calibration_file, external_file)
+ custom_op_info, deploy_backend, calibration_file, external_file,
+ export_fp16_model)
  if save_file is not None:
  with open(save_file, "wb") as f:
  f.write(onnx_model_str)

diff --git a/paddle2onnx/command.py b/paddle2onnx/command.py
@@ -124,6 +124,11 @@ def arg_parser():
  type=_text_type,
  default=None,
  help="The filename of external_data when the model is bigger than 2G.")
+ parser.add_argument(
+ "--export_fp16_model",
+ type=ast.literal_eval,
+ default=False,
+ help="Whether export FP16 model for ORT-GPU, default False")
  return parser
 
 
@@ -138,12 +143,13 @@ def c_paddle_to_onnx(model_file,
  enable_optimize=True,
  deploy_backend="onnxruntime",
  calibration_file="",
- external_file=""):
+ external_file="",
+ export_fp16_model=False):
  import paddle2onnx.paddle2onnx_cpp2py_export as c_p2o
  onnx_model_str = c_p2o.export(
  model_file, params_file, opset_version, auto_upgrade_opset, verbose,
  enable_onnx_checker, enable_experimental_op, enable_optimize, {},
- deploy_backend, calibration_file, external_file)
+ deploy_backend, calibration_file, external_file, export_fp16_model)
  if save_file is not None:
  with open(save_file, "wb") as f:
  f.write(onnx_model_str)
@@ -242,7 +248,8 @@ def main():
  enable_optimize=True,
  deploy_backend=args.deploy_backend,
  calibration_file=calibration_file,
- external_file=external_file)
+ external_file=external_file,
+ export_fp16_model=args.export_fp16_model)
  logging.info("===============Make PaddlePaddle Better!================")
  logging.info("A little survey: https://iwenjuan.baidu.com/?code=r8hu2s")
  return

diff --git a/paddle2onnx/convert.py b/paddle2onnx/convert.py
@@ -39,20 +39,23 @@ def dygraph2onnx(layer, save_file, input_spec=None, opset_version=9, **configs):
  import paddle2onnx
  import paddle
  dirname = os.path.split(save_file)[0]
- paddle_model_dir = os.path.join(dirname, "paddle_model_static_onnx_temp_dir")
+ paddle_model_dir = os.path.join(dirname,
+ "paddle_model_static_onnx_temp_dir")
  model_file = os.path.join(paddle_model_dir, "model.pdmodel")
  params_file = os.path.join(paddle_model_dir, "model.pdiparams")
 
  if os.path.exists(paddle_model_dir):
  if os.path.isfile(paddle_model_dir):
- logging.info("File {} exists, will remove it.".format(paddle_model_dir))
+ logging.info("File {} exists, will remove it.".format(
+ paddle_model_dir))
  os.remove(paddle_model_dir)
  if os.path.isfile(model_file):
  os.remove(model_file)
  if os.path.isfile(params_file):
  os.remove(params_file)
  paddle.jit.save(layer, os.path.join(paddle_model_dir, "model"), input_spec)
- logging.info("Static PaddlePaddle model saved in {}.".format(paddle_model_dir))
+ logging.info("Static PaddlePaddle model saved in {}.".format(
+ paddle_model_dir))
  if not os.path.isfile(params_file):
  params_file = ""
 

diff --git a/paddle2onnx/convert_to_fp16.py b/paddle2onnx/convert_to_fp16.py
@@ -0,0 +1,38 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+
+import argparse
+import sys
+from paddle2onnx.utils import logging
+
+
+def parse_arguments():
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--input_model_path',
+ required=True,
+ help='The path of input onnx model file.')
+ parser.add_argument(
+ '--output_model_path',
+ required=True,
+ help='The file path to write optimized onnx model file.')
+ return parser.parse_args()
+
+
+if __name__ == '__main__':
+ args = parse_arguments()
+ import paddle2onnx.paddle2onnx_cpp2py_export as c_p2o
+ c_p2o.convert_to_fp16(args.input_model_path, args.output_model_path)
+ logging.info("FP16 model saved in {}.".format(args.output_model_path))
diff --git a/paddle2onnx/converter.cc b/paddle2onnx/converter.cc
@@ -20,6 +20,7 @@
 #include <string>
 
 #include "paddle2onnx/mapper/exporter.h"
+#include "paddle2onnx/optimizer/convert_fp32_to_fp16.h"
 
 namespace paddle2onnx {
 
@@ -128,15 +129,13 @@ PADDLE2ONNX_DECL bool IsExportable(const void* model_buffer, int model_size,
  return true;
 }
 
-PADDLE2ONNX_DECL bool Export(const char* model, const char* params, char** out,
- int* out_size, int32_t opset_version,
- bool auto_upgrade_opset, bool verbose,
- bool enable_onnx_checker,
- bool enable_experimental_op, bool enable_optimize,
- CustomOp* ops, int op_count,
- const char* deploy_backend,
- char** calibration_cache, int* calibration_size,
- const char* external_file, bool* save_external) {
+PADDLE2ONNX_DECL bool Export(
+ const char* model, const char* params, char** out, int* out_size,
+ int32_t opset_version, bool auto_upgrade_opset, bool verbose,
+ bool enable_onnx_checker, bool enable_experimental_op, bool enable_optimize,
+ CustomOp* ops, int op_count, const char* deploy_backend,
+ char** calibration_cache, int* calibration_size, const char* external_file,
+ bool* save_external, bool export_fp16_model) {
  auto parser = PaddleParser();
  P2OLogger(verbose) << "Start to parsing Paddle model..." << std::endl;
  if (!parser.Init(model, params)) {
@@ -159,10 +158,10 @@ PADDLE2ONNX_DECL bool Export(const char* model, const char* params, char** out,
  }
 
  std::string calibration_str;
- std::string result =
- me.Run(parser, opset_version, auto_upgrade_opset, verbose,
-  enable_onnx_checker, enable_experimental_op, enable_optimize,
-  deploy_backend, &calibration_str, external_file, save_external);
+ std::string result = me.Run(
+ parser, opset_version, auto_upgrade_opset, verbose, enable_onnx_checker,
+ enable_experimental_op, enable_optimize, deploy_backend, &calibration_str,
+ external_file, save_external, export_fp16_model);
  if (result.empty()) {
  P2OLogger(verbose) << "The exported ONNX model is invalid!" << std::endl;
  return false;
@@ -185,12 +184,13 @@ PADDLE2ONNX_DECL bool Export(const char* model, const char* params, char** out,
 }
 
 PADDLE2ONNX_DECL bool Export(
- const void* model_buffer, int model_size, const void* params_buffer,
- int params_size, char** out, int* out_size, int32_t opset_version,
+ const void* model_buffer, int64_t model_size, const void* params_buffer,
+ int64_t params_size, char** out, int* out_size, int32_t opset_version,
  bool auto_upgrade_opset, bool verbose, bool enable_onnx_checker,
  bool enable_experimental_op, bool enable_optimize, CustomOp* ops,
  int op_count, const char* deploy_backend, char** calibration_cache,
- int* calibration_size, const char* external_file, bool* save_external) {
+ int* calibration_size, const char* external_file, bool* save_external,
+ bool export_fp16_model) {
  auto parser = PaddleParser();
  P2OLogger(verbose) << "Start to parsing Paddle model..." << std::endl;
  if (!parser.Init(model_buffer, model_size, params_buffer, params_size)) {
@@ -212,10 +212,10 @@ PADDLE2ONNX_DECL bool Export(
  }
  }
  std::string calibration_str;
- std::string result =
- me.Run(parser, opset_version, auto_upgrade_opset, verbose,
-  enable_onnx_checker, enable_experimental_op, enable_optimize,
-  deploy_backend, &calibration_str, external_file, save_external);
+ std::string result = me.Run(
+ parser, opset_version, auto_upgrade_opset, verbose, enable_onnx_checker,
+ enable_experimental_op, enable_optimize, deploy_backend, &calibration_str,
+ external_file, save_external, export_fp16_model);
  if (result.empty()) {
  P2OLogger(verbose) << "The exported ONNX model is invalid!" << std::endl;
  return false;
@@ -237,6 +237,41 @@ PADDLE2ONNX_DECL bool Export(
  return true;
 }
 
+PADDLE2ONNX_DECL bool ConvertFP32ToFP16(const char* onnx_model, int model_size,
+ char** out_model, int* out_model_size) {
+ std::string onnx_proto(onnx_model, onnx_model + model_size);
+ ONNX_NAMESPACE::ModelProto model;
+ model.ParseFromString(onnx_proto);
+
+ P2OLogger(true) << "Convert FP32 ONNX model to FP16." << std::endl;
+ ConvertFp32ToFp16 convert;
+ convert.Convert(&model);
+ // save external data file for big model
+ std::string external_data_file;
+ if (model.ByteSizeLong() > INT_MAX) {
+ external_data_file = "external_data";
+ }
+ paddle2onnx::ModelExporter me;
+ if (external_data_file.size()) {
+ me.SaveExternalData(model.mutable_graph(), external_data_file);
+ }
+ // check model
+ me.ONNXChecker(model, true);
+
+ std::string result;
+ if (!model.SerializeToString(&result)) {
+ P2OLogger(true)
+ << "Error happenedd while optimizing the exported ONNX model."
+ << std::endl;
+ return false;
+ }
+
+ *out_model_size = result.size();
+ *out_model = new char[*out_model_size]();
+ memcpy(*out_model, result.data(), *out_model_size);
+ return true;
+}
+
 ModelTensorInfo::~ModelTensorInfo() {
  if (shape != nullptr) {
  delete[] shape;

diff --git a/paddle2onnx/converter.h b/paddle2onnx/converter.h
@@ -56,17 +56,19 @@ PADDLE2ONNX_DECL bool Export(
  CustomOp* ops = nullptr, int op_count = 0,
  const char* deploy_backend = "onnxruntime",
  char** calibration_cache = nullptr, int* calibration_size = 0,
- const char* external_file = "", bool* save_external = nullptr);
+ const char* external_file = "", bool* save_external = nullptr,
+ bool export_fp16_model = false);
 
 PADDLE2ONNX_DECL bool Export(
- const void* model_buffer, int model_size, const void* params_buffer,
- int params_size, char** out, int* out_size, int32_t opset_version = 11,
+ const void* model_buffer, int64_t model_size, const void* params_buffer,
+ int64_t params_size, char** out, int* out_size, int32_t opset_version = 11,
  bool auto_upgrade_opset = true, bool verbose = false,
  bool enable_onnx_checker = true, bool enable_experimental_op = false,
  bool enable_optimize = true, CustomOp* ops = nullptr, int op_count = 0,
  const char* deploy_backend = "onnxruntime",
  char** calibration_cache = nullptr, int* calibration_size = 0,
- const char* external_file = "", bool* save_external = nullptr);
+ const char* external_file = "", bool* save_external = nullptr,
+ bool export_fp16_model = false);
 
 // Following are inside usage, will remove it maybe
 struct PADDLE2ONNX_DECL ModelTensorInfo {
@@ -109,6 +111,9 @@ PADDLE2ONNX_DECL bool RemoveMultiClassNMS(const char* onnx_model,
  int model_size, char** out_model,
  int* out_model_size);
 
+PADDLE2ONNX_DECL bool ConvertFP32ToFP16(const char* onnx_model, int model_size,
+ char** out_model, int* out_model_size);
+
 struct PADDLE2ONNX_DECL PaddleReader {
  PaddleReader(const char* model_buffer, int buffer_size);
  // suppose the maximum number of inputs/outputs is 100

diff --git a/paddle2onnx/cpp2py_export.cc b/paddle2onnx/cpp2py_export.cc
@@ -36,7 +36,8 @@ PYBIND11_MODULE(paddle2onnx_cpp2py_export, m) {
  const CustomOpInfo& info = CustomOpInfo(),
  const std::string& deploy_backend = "onnxruntime",
  const std::string& calibration_file = "",
- const std::string& external_file = "") {
+ const std::string& external_file = "",
+ const bool& export_fp16_model = false) {
  P2OLogger(verbose) << "Start to parse PaddlePaddle model..." << std::endl;
  P2OLogger(verbose) << "Model file path: " << model_filename << std::endl;
  P2OLogger(verbose) << "Paramters file path: " << params_filename
@@ -46,11 +47,13 @@ PYBIND11_MODULE(paddle2onnx_cpp2py_export, m) {
  int size = 0;
  char* calibration_cache = nullptr;
  int cache_size = 0;
+ bool save_external;
  if (!Export(model_filename.c_str(), params_filename.c_str(), &out, &size,
  opset_version, auto_upgrade_opset, verbose,
  enable_onnx_checker, enable_experimental_op, enable_optimize,
  nullptr, 0, deploy_backend.c_str(), &calibration_cache,
- &cache_size, external_file.c_str())) {
+ &cache_size, external_file.c_str(), &save_external,
+ export_fp16_model)) {
  P2OLogger(verbose) << "Paddle model convert failed." << std::endl;
  return pybind11::bytes("");
  }
@@ -83,11 +86,13 @@ PYBIND11_MODULE(paddle2onnx_cpp2py_export, m) {
  int size = 0;
  char* calibration_cache = nullptr;
  int cache_size = 0;
+ bool save_external;
  if (!Export(model_filename.c_str(), params_filename.c_str(), &out, &size,
  opset_version, auto_upgrade_opset, verbose, enable_onnx_checker,
  enable_experimental_op, enable_optimize, ops.data(),
  info.size(), deploy_backend.c_str(), &calibration_cache,
- &cache_size, external_file.c_str())) {
+ &cache_size, external_file.c_str(), &save_external,
+ export_fp16_model)) {
  P2OLogger(verbose) << "Paddle model convert failed." << std::endl;
  return pybind11::bytes("");
  }
@@ -114,5 +119,10 @@ PYBIND11_MODULE(paddle2onnx_cpp2py_export, m) {
  ONNX_NAMESPACE::optimization::OptimizePaddle2ONNX(
  model_path, optimized_model_path, shape_infos);
  });
+ m.def("convert_to_fp16", [](const std::string& fp32_model_path,
+ const std::string& fp16_model_path) {
+ paddle2onnx::optimization::Paddle2ONNXFP32ToFP16(fp32_model_path,
+ fp16_model_path);
+ });
 }
 } // namespace paddle2onnx