From 8dc755a359459047a26719bd86abae7c4d2f5753 Mon Sep 17 00:00:00 2001
From: tripleMu <gpu@163.com>
Date: Tue, 9 Aug 2022 12:26:05 +0800
Subject: [PATCH] Add grid concat and fuse such operators (#389)

* Add grid concat and fuse so many op

* Fix model

* Fix other detector

* Update yolo.py

* Update yolo.py

Co-authored-by: Alexey <AlexeyAB@users.noreply.github.com>
---
 export.py      | 19 +++++++++++--------
 models/yolo.py | 34 +++++++++++++++++++++++-----------
 2 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/export.py b/export.py
index 467d71da69..0fba54142d 100644
--- a/export.py
+++ b/export.py
@@ -143,15 +143,18 @@
                     'output': {0: 'batch'},
                 }
             dynamic_axes.update(output_axes)
-        if opt.grid and opt.end2end:
-            print('\nStarting export end2end onnx model for %s...' % 'TensorRT' if opt.max_wh is None else 'onnxruntime')
-            model = End2End(model,opt.topk_all,opt.iou_thres,opt.conf_thres,opt.max_wh,device)
-            if opt.end2end and opt.max_wh is None:
-                output_names = ['num_dets', 'det_boxes', 'det_scores', 'det_classes']
-                shapes = [opt.batch_size, 1, opt.batch_size, opt.topk_all, 4,
-                          opt.batch_size, opt.topk_all, opt.batch_size, opt.topk_all]
+        if opt.grid:
+            if opt.end2end:
+                print('\nStarting export end2end onnx model for %s...' % 'TensorRT' if opt.max_wh is None else 'onnxruntime')
+                model = End2End(model,opt.topk_all,opt.iou_thres,opt.conf_thres,opt.max_wh,device)
+                if opt.end2end and opt.max_wh is None:
+                    output_names = ['num_dets', 'det_boxes', 'det_scores', 'det_classes']
+                    shapes = [opt.batch_size, 1, opt.batch_size, opt.topk_all, 4,
+                              opt.batch_size, opt.topk_all, opt.batch_size, opt.topk_all]
+                else:
+                    output_names = ['output']
             else:
-                output_names = ['output']
+                model.model[-1].concat = True
 
         torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
                           output_names=output_names,
diff --git a/models/yolo.py b/models/yolo.py
index ee5e5a7e87..95a019c6ae 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -24,7 +24,8 @@ class Detect(nn.Module):
     stride = None  # strides computed during build
     export = False  # onnx export
     end2end = False
-    include_nms = False 
+    include_nms = False
+    concat = False
 
     def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
         super(Detect, self).__init__()
@@ -55,9 +56,10 @@ def forward(self, x):
                     y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                 else:
-                    xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
-                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].data  # wh
-                    y = torch.cat((xy, wh, y[..., 4:]), -1)
+                    xy, wh, conf = y.split((2, 2, self.nc + 1), 4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
+                    xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5))  # new xy
+                    wh = wh ** 2 * (4 * self.anchor_grid[i].data)  # new wh
+                    y = torch.cat((xy, wh, conf), 4)
                 z.append(y.view(bs, -1, self.no))
 
         if self.training:
@@ -67,6 +69,8 @@ def forward(self, x):
         elif self.include_nms:
             z = self.convert(z)
             out = (z, )
+        elif self.concat:
+            out = torch.cat(z, 1)
         else:
             out = (torch.cat(z, 1), x)
 
@@ -94,7 +98,8 @@ class IDetect(nn.Module):
     stride = None  # strides computed during build
     export = False  # onnx export
     end2end = False
-    include_nms = False 
+    include_nms = False
+    concat = False
 
     def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
         super(IDetect, self).__init__()
@@ -150,9 +155,10 @@ def fuseforward(self, x):
                     y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                 else:
-                    xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
-                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].data  # wh
-                    y = torch.cat((xy, wh, y[..., 4:]), -1)
+                    xy, wh, conf = y.split((2, 2, self.nc + 1), 4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
+                    xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5))  # new xy
+                    wh = wh ** 2 * (4 * self.anchor_grid[i].data)  # new wh
+                    y = torch.cat((xy, wh, conf), 4)
                 z.append(y.view(bs, -1, self.no))
 
         if self.training:
@@ -162,6 +168,8 @@ def fuseforward(self, x):
         elif self.include_nms:
             z = self.convert(z)
             out = (z, )
+        elif self.concat:
+            out = torch.cat(z, 1)            
         else:
             out = (torch.cat(z, 1), x)
 
@@ -305,6 +313,7 @@ class IAuxDetect(nn.Module):
     export = False  # onnx export
     end2end = False
     include_nms = False
+    concat = False
 
     def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
         super(IAuxDetect, self).__init__()
@@ -344,9 +353,10 @@ def forward(self, x):
                     y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                 else:
-                    xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
-                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].data  # wh
-                    y = torch.cat((xy, wh, y[..., 4:]), -1)
+                    xy, wh, conf = y.split((2, 2, self.nc + 1), 4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
+                    xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5))  # new xy
+                    wh = wh ** 2 * (4 * self.anchor_grid[i].data)  # new wh
+                    y = torch.cat((xy, wh, conf), 4)
                 z.append(y.view(bs, -1, self.no))
 
         return x if self.training else (torch.cat(z, 1), x[:self.nl])
@@ -381,6 +391,8 @@ def fuseforward(self, x):
         elif self.include_nms:
             z = self.convert(z)
             out = (z, )
+        elif self.concat:
+            out = torch.cat(z, 1)            
         else:
             out = (torch.cat(z, 1), x)