separate PPYOLOE architecture from YOLOv3 (#7634)

* add ppyoloe architectures * fix deploy ppyoloe arch * add ppyoloe arch coments, test=document_fix

separate PPYOLOE architecture from YOLOv3 (#7634)
* add ppyoloe architectures * fix deploy ppyoloe arch * add ppyoloe arch coments, test=document_fix
b7a6bb66 · Feng Ni · GitHub · c61c68d4 · b7a6bb66 · b7a6bb66
9 changed file
--- a/deploy/pptracking/python/det_infer.py
+++ b/deploy/pptracking/python/det_infer.py
@@ -39,6 +39,7 @@ from mot_utils import argsparser, Timer, get_current_memory_mb
 # Global dictionary
 SUPPORT_MODELS = {
    'YOLO',
+    'PPYOLOE',
    'PicoDet',
    'JDE',
    'FairMOT',

--- a/deploy/python/infer.py
+++ b/deploy/python/infer.py
@@ -40,10 +40,10 @@ from utils import argsparser, Timer, get_current_memory_mb, multiclass_nms, coco
 # Global dictionary
 SUPPORT_MODELS = {
-    'YOLO', 'RCNN', 'SSD', 'Face', 'FCOS', 'SOLOv2', 'TTFNet', 'S2ANet', 'JDE',
+    'YOLO', 'PPYOLOE', 'RCNN', 'SSD', 'Face', 'FCOS', 'SOLOv2', 'TTFNet',
-    'FairMOT', 'DeepSORT', 'GFL', 'PicoDet', 'CenterNet', 'TOOD', 'RetinaNet',
+    'S2ANet', 'JDE', 'FairMOT', 'DeepSORT', 'GFL', 'PicoDet', 'CenterNet',
-    'StrongBaseline', 'STGCN', 'YOLOX', 'YOLOF', 'PPHGNet', 'PPLCNet', 'DETR',
+    'TOOD', 'RetinaNet', 'StrongBaseline', 'STGCN', 'YOLOX', 'YOLOF', 'PPHGNet',
-    'CenterTrack'
+    'PPLCNet', 'DETR', 'CenterTrack'
 }
 TUNED_TRT_DYNAMIC_MODELS = {'DETR'}

--- a/deploy/serving/python/web_service.py
+++ b/deploy/serving/python/web_service.py
@@ -30,9 +30,9 @@ import yaml
 # Global dictionary
 SUPPORT_MODELS = {
-    'YOLO', 'RCNN', 'SSD', 'Face', 'FCOS', 'SOLOv2', 'TTFNet', 'S2ANet', 'JDE',
+    'YOLO', 'PPYOLOE', 'RCNN', 'SSD', 'Face', 'FCOS', 'SOLOv2', 'TTFNet',
-    'FairMOT', 'DeepSORT', 'GFL', 'PicoDet', 'CenterNet', 'TOOD', 'RetinaNet',
+    'S2ANet', 'JDE', 'FairMOT', 'DeepSORT', 'GFL', 'PicoDet', 'CenterNet',
-    'StrongBaseline', 'STGCN', 'YOLOX', 'HRNet'
+    'TOOD', 'RetinaNet', 'StrongBaseline', 'STGCN', 'YOLOX', 'HRNet'
 }
 GLOBAL_VAR = {}

--- a/deploy/third_engine/demo_onnx_trt/trt_infer.py
+++ b/deploy/third_engine/demo_onnx_trt/trt_infer.py
@@ -51,9 +51,9 @@ TRT_LOGGER = trt.Logger()
 trt.init_libnvinfer_plugins(TRT_LOGGER, namespace="")
 # Global dictionary
 SUPPORT_MODELS = {
-    'YOLO', 'RCNN', 'SSD', 'Face', 'FCOS', 'SOLOv2', 'TTFNet', 'S2ANet', 'JDE',
+    'YOLO', 'PPYOLOE', 'RCNN', 'SSD', 'Face', 'FCOS', 'SOLOv2', 'TTFNet',
-    'FairMOT', 'DeepSORT', 'GFL', 'PicoDet', 'CenterNet', 'TOOD', 'RetinaNet',
+    'S2ANet', 'JDE', 'FairMOT', 'DeepSORT', 'GFL', 'PicoDet', 'CenterNet',
-    'StrongBaseline', 'STGCN', 'YOLOX', 'HRNet'
+    'TOOD', 'RetinaNet', 'StrongBaseline', 'STGCN', 'YOLOX', 'HRNet'
 }
@@ -205,8 +205,8 @@ def create_trt_bindings(engine, context):
            "is_input": True if engine.binding_is_input(name) else False
        }
        if engine.binding_is_input(name):
-            bindings[name]['cpu_data'] = np.random.randn(
+            bindings[name]['cpu_data'] = np.random.randn(*shape).astype(
-                *shape).astype(np.float32)
+                np.float32)
            bindings[name]['cuda_ptr'] = cuda.mem_alloc(bindings[name][
                'cpu_data'].nbytes)
        else:

--- a/deploy/third_engine/onnx/infer.py
+++ b/deploy/third_engine/onnx/infer.py
@@ -23,9 +23,9 @@ from preprocess import Compose
 # Global dictionary
 SUPPORT_MODELS = {
-    'YOLO', 'RCNN', 'SSD', 'Face', 'FCOS', 'SOLOv2', 'TTFNet', 'S2ANet', 'JDE',
+    'YOLO', 'PPYOLOE', 'RCNN', 'SSD', 'Face', 'FCOS', 'SOLOv2', 'TTFNet',
-    'FairMOT', 'DeepSORT', 'GFL', 'PicoDet', 'CenterNet', 'TOOD', 'RetinaNet',
+    'S2ANet', 'JDE', 'FairMOT', 'DeepSORT', 'GFL', 'PicoDet', 'CenterNet',
-    'StrongBaseline', 'STGCN', 'YOLOX', 'HRNet'
+    'TOOD', 'RetinaNet', 'StrongBaseline', 'STGCN', 'YOLOX', 'HRNet'
 }
 parser = argparse.ArgumentParser(description=__doc__)

--- a/ppdet/engine/export_utils.py
+++ b/ppdet/engine/export_utils.py
@@ -29,6 +29,7 @@ logger = setup_logger('ppdet.engine')
 # Global dictionary
 TRT_MIN_SUBGRAPH = {
    'YOLO': 3,
+    'PPYOLOE': 3,
    'SSD': 60,
    'RCNN': 40,
    'RetinaNet': 40,
@@ -193,7 +194,7 @@ def _dump_infer_config(config, path, image_shape, model):
            arch_state = True
            break
-    if infer_arch in ['YOLOX', 'YOLOF']:
+    if infer_arch in ['PPYOLOE', 'YOLOX', 'YOLOF']:
        infer_cfg['arch'] = infer_arch
        infer_cfg['min_subgraph_size'] = TRT_MIN_SUBGRAPH[infer_arch]
        arch_state = True

--- a/ppdet/modeling/architectures/__init__.py
+++ b/ppdet/modeling/architectures/__init__.py
@@ -16,6 +16,7 @@ from . import meta_arch
 from . import faster_rcnn
 from . import mask_rcnn
 from . import yolo
+from . import ppyoloe
 from . import cascade_rcnn
 from . import ssd
 from . import fcos
@@ -44,6 +45,7 @@ from .meta_arch import *
 from .faster_rcnn import *
 from .mask_rcnn import *
 from .yolo import *
+from .ppyoloe import *
 from .cascade_rcnn import *
 from .ssd import *
 from .fcos import *

--- a/ppdet/modeling/architectures/ppyoloe.py
+++ b/ppdet/modeling/architectures/ppyoloe.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 
+#   
+# Licensed under the Apache License, Version 2.0 (the "License");   
+# you may not use this file except in compliance with the License.  
+# You may obtain a copy of the License at   
+#   
+#     http://www.apache.org/licenses/LICENSE-2.0    
+#   
+# Unless required by applicable law or agreed to in writing, software   
+# distributed under the License is distributed on an "AS IS" BASIS, 
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
+# See the License for the specific language governing permissions and   
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from ppdet.core.workspace import register, create
+from .meta_arch import BaseArch
+__all__ = ['PPYOLOE']
+# PP-YOLOE and PP-YOLOE+ are recommended to use this architecture
+# PP-YOLOE and PP-YOLOE+ can also use the same architecture of YOLOv3 in yolo.py
+@register
+class PPYOLOE(BaseArch):
+    __category__ = 'architecture'
+    __inject__ = ['post_process']
+    def __init__(self,
+                 backbone='CSPResNet',
+                 neck='CustomCSPPAN',
+                 yolo_head='PPYOLOEHead',
+                 post_process='BBoxPostProcess',
+                 for_mot=False):
+        """
+        PPYOLOE network, see https://arxiv.org/abs/2203.16250
+        Args:
+            backbone (nn.Layer): backbone instance
+            neck (nn.Layer): neck instance
+            yolo_head (nn.Layer): anchor_head instance
+            post_process (object): `BBoxPostProcess` instance
+            for_mot (bool): whether return other features for multi-object tracking
+                models, default False in pure object detection models.
+        """
+        super(PPYOLOE, self).__init__()
+        self.backbone = backbone
+        self.neck = neck
+        self.yolo_head = yolo_head
+        self.post_process = post_process
+        self.for_mot = for_mot
+    @classmethod
+    def from_config(cls, cfg, *args, **kwargs):
+        # backbone
+        backbone = create(cfg['backbone'])
+        # fpn
+        kwargs = {'input_shape': backbone.out_shape}
+        neck = create(cfg['neck'], **kwargs)
+        # head
+        kwargs = {'input_shape': neck.out_shape}
+        yolo_head = create(cfg['yolo_head'], **kwargs)
+        return {
+            'backbone': backbone,
+            'neck': neck,
+            "yolo_head": yolo_head,
+        }
+    def _forward(self):
+        body_feats = self.backbone(self.inputs)
+        neck_feats = self.neck(body_feats, self.for_mot)
+        if self.training:
+            yolo_losses = self.yolo_head(neck_feats, self.inputs)
+            return yolo_losses
+        else:
+            yolo_head_outs = self.yolo_head(neck_feats)
+            if self.post_process is not None:
+                bbox, bbox_num = self.post_process(
+                    yolo_head_outs, self.yolo_head.mask_anchors,
+                    self.inputs['im_shape'], self.inputs['scale_factor'])
+            else:
+                bbox, bbox_num = self.yolo_head.post_process(
+                    yolo_head_outs, self.inputs['scale_factor'])
+            output = {'bbox': bbox, 'bbox_num': bbox_num}
+            return output
+    def get_loss(self):
+        return self._forward()
+    def get_pred(self):
+        return self._forward()
--- a/ppdet/modeling/architectures/yolo.py
+++ b/ppdet/modeling/architectures/yolo.py
@@ -21,6 +21,8 @@ from .meta_arch import BaseArch
 from ..post_process import JDEBBoxPostProcess
 __all__ = ['YOLOv3']
+# YOLOv3,PP-YOLO,PP-YOLOv2,PP-YOLOE,PP-YOLOE+ use the same architecture as YOLOv3
+# PP-YOLOE and PP-YOLOE+ are recommended to use PPYOLOE architecture in ppyoloe.py
 @register
@@ -99,6 +101,7 @@ class YOLOv3(BaseArch):
            yolo_head_outs = self.yolo_head(neck_feats)
            if self.for_mot:
+                # the detection part of JDE MOT model
                boxes_idx, bbox, bbox_num, nms_keep_idx = self.post_process(
                    yolo_head_outs, self.yolo_head.mask_anchors)
                output = {
@@ -110,13 +113,16 @@ class YOLOv3(BaseArch):
                }
            else:
                if self.return_idx:
+                    # the detection part of JDE MOT model
                    _, bbox, bbox_num, _ = self.post_process(
                        yolo_head_outs, self.yolo_head.mask_anchors)
                elif self.post_process is not None:
+                    # anchor based YOLOs: YOLOv3,PP-YOLO,PP-YOLOv2 use mask_anchors
                    bbox, bbox_num = self.post_process(
                        yolo_head_outs, self.yolo_head.mask_anchors,
                        self.inputs['im_shape'], self.inputs['scale_factor'])
                else:
+                    # anchor free YOLOs: PP-YOLOE, PP-YOLOE+
                    bbox, bbox_num = self.yolo_head.post_process(
                        yolo_head_outs, self.inputs['scale_factor'])
                output = {'bbox': bbox, 'bbox_num': bbox_num}