Skip to content

Commit

Permalink
Feat (notebook): add example for dynamic quantization to ONNX export (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
fabianandresgrob authored Mar 6, 2024
1 parent 2fdcb42 commit 4e82c7b
Showing 1 changed file with 102 additions and 10 deletions.
112 changes: 102 additions & 10 deletions notebooks/ONNX_export_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@
" "
],
"text/plain": [
"<IPython.lib.display.IFrame at 0x7f2a6afb6f50>"
"<IPython.lib.display.IFrame at 0x7fb62ae3fe50>"
]
},
"execution_count": 4,
Expand Down Expand Up @@ -331,7 +331,7 @@
" "
],
"text/plain": [
"<IPython.lib.display.IFrame at 0x7f2a6afa9e90>"
"<IPython.lib.display.IFrame at 0x7fb734383710>"
]
},
"execution_count": 6,
Expand Down Expand Up @@ -460,7 +460,7 @@
" "
],
"text/plain": [
"<IPython.lib.display.IFrame at 0x7f2a69d7ff10>"
"<IPython.lib.display.IFrame at 0x7fb629e8a010>"
]
},
"execution_count": 8,
Expand Down Expand Up @@ -605,7 +605,7 @@
" "
],
"text/plain": [
"<IPython.lib.display.IFrame at 0x7f2a69d2e710>"
"<IPython.lib.display.IFrame at 0x7fb62ae37190>"
]
},
"execution_count": 10,
Expand Down Expand Up @@ -704,7 +704,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"2024-03-01 03:24:07.215804006 [W:onnxruntime:, graph.cc:1283 Graph] Initializer linear.bias appears in graph inputs and will not be treated as constant value/weight. This may prevent some of the graph optimizations, like const folding. Move it out of graph inputs if there is no need to override it, by either re-generating the model with latest exporter/converter or with the tool onnxruntime/tools/python/remove_initializer_from_input.py.\n"
"2024-03-06 02:12:47.492497092 [W:onnxruntime:, graph.cc:1283 Graph] Initializer linear.bias appears in graph inputs and will not be treated as constant value/weight. This may prevent some of the graph optimizations, like const folding. Move it out of graph inputs if there is no need to override it, by either re-generating the model with latest exporter/converter or with the tool onnxruntime/tools/python/remove_initializer_from_input.py.\n"
]
}
],
Expand Down Expand Up @@ -842,18 +842,18 @@
},
"outputs": [
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"True\n"
"/scratch/fabian/brevitas/src/brevitas/export/onnx/standard/manager.py:26: UserWarning: ONNX opset version set to 13, override with opset_version=\n",
" warnings.warn(f\"ONNX opset version set to {DEFAULT_OPSET}, override with {ka}=\")\n"
]
},
{
"name": "stderr",
"name": "stdout",
"output_type": "stream",
"text": [
"/scratch/fabian/brevitas/src/brevitas/export/onnx/standard/manager.py:26: UserWarning: ONNX opset version set to 13, override with opset_version=\n",
" warnings.warn(f\"ONNX opset version set to {DEFAULT_OPSET}, override with {ka}=\")\n"
"True\n"
]
}
],
Expand Down Expand Up @@ -912,6 +912,98 @@
"\n",
"Due to differences in how the computation is performed between Brevitas and ONNX Runtime, it might happen the two results are slightly different (since Brevitas uses a style closer to QCDQ, rather than operating between integers), thus we added a tolerance for off-by-1 errors."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Export Dynamically Quantized Models to ONNX "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can also export dynamically quantized models to ONNX, but there are some limitations. The ONNX DynamicQuantizeLinear requires the following settings:\n",
"- Asymmetric quantization (and therefore *unsigned*)\n",
"- Min-max scaling\n",
"- Rounding to nearest\n",
"- Per tensor scaling\n",
"- Bit width set to 8\n",
"\n",
"This is shown in the following example:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"from brevitas_examples.common.generative.quantizers import ShiftedUint8DynamicActPerTensorFloat\n",
"\n",
"IN_CH = 3\n",
"IMG_SIZE = 128\n",
"OUT_CH = 128\n",
"BATCH_SIZE = 1\n",
"\n",
"class Model(torch.nn.Module):\n",
" def __init__(self) -> None:\n",
" super().__init__()\n",
" self.linear = qnn.QuantLinear(IN_CH, OUT_CH, bias=True, weight_bit_width=8, input_quant=ShiftedUint8DynamicActPerTensorFloat)\n",
" self.act = qnn.QuantReLU(input_quant=ShiftedUint8DynamicActPerTensorFloat)\n",
" \n",
" def forward(self, inp):\n",
" inp = self.linear(inp)\n",
" inp = self.act(inp)\n",
" return inp\n",
"\n",
"inp = torch.randn(BATCH_SIZE, IN_CH)\n",
"model = Model() \n",
"model.eval()\n",
"path = 'dynamic_quant_model_qcdq.onnx'\n",
"\n",
"exported_model = export_onnx_qcdq(model, args=inp, export_path=path, opset_version=13)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Serving 'dynamic_quant_model_qcdq.onnx' at http://localhost:8086\n"
]
},
{
"data": {
"text/html": [
"\n",
" <iframe\n",
" width=\"100%\"\n",
" height=\"400\"\n",
" src=\"http://localhost:8086/\"\n",
" frameborder=\"0\"\n",
" allowfullscreen\n",
" \n",
" ></iframe>\n",
" "
],
"text/plain": [
"<IPython.lib.display.IFrame at 0x7fb62856ccd0>"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"show_netron(\"dynamic_quant_model_qcdq.onnx\", 8086)"
]
}
],
"metadata": {
Expand Down

0 comments on commit 4e82c7b

Please sign in to comment.