update a lot

cdc66baa · Bubbliiiing · 0b930c81 · cdc66baa · cdc66baa · cdc66baa
9 changed file
--- a/nets/CSPdarknet.py
+++ b/nets/CSPdarknet.py
@@ -25,10 +25,10 @@ class Conv(nn.Module):
    def fuseforward(self, x):
        return self.act(self.conv(x))
    
-class RCSPDark_Block(nn.Module):
-    def __init__(self, c1, c2, c3, n=4, e=0.5, ids=[0]):
-        super(RCSPDark_Block, self).__init__()
-        c_ = int(c1 * e)
+class Block(nn.Module):
+    def __init__(self, c1, c2, c3, n=4, e=1, ids=[0]):
+        super(Block, self).__init__()
+        c_ = int(c2 * e)
        
        self.ids = ids
        self.cv1 = Conv(c1, c_, 1, 1)
@@ -58,9 +58,9 @@ class MP(nn.Module):
    def forward(self, x):
        return self.m(x)
    
-class RCSPDark_Transition(nn.Module):
+class Transition(nn.Module):
    def __init__(self, c1, c2):
-        super(RCSPDark_Transition, self).__init__()
+        super(Transition, self).__init__()
        self.cv1 = Conv(c1, c2, 1, 1)
        self.cv2 = Conv(c1, c2, 1, 1)
        self.cv3 = Conv(c2, c2, 3, 2)
@@ -76,40 +76,42 @@ class RCSPDark_Transition(nn.Module):
        
        return torch.cat([x_2, x_1], 1)
    
-class CSPDarknet(nn.Module):
-    def __init__(self, base_channels, pretrained=False):
+class Backbone(nn.Module):
+    def __init__(self, transition_channels, block_channels, n, phi, pretrained=False):
        super().__init__()
        #-----------------------------------------------#
        #   输入图片是640, 640, 3
-        #   初始的基本通道是64
        #-----------------------------------------------#
-        
+        ids = {
+            'l' : [-1, -3, -5, -6],
+            'x' : [-1, -3, -5, -7, -8], 
+        }[phi]
        self.stem = nn.Sequential(
-            Conv(3, base_channels, 3, 1),
-            Conv(base_channels, base_channels * 2, 3, 2),
-            Conv(base_channels * 2, base_channels * 2, 3, 1),
+            Conv(3, transition_channels, 3, 1),
+            Conv(transition_channels, transition_channels * 2, 3, 2),
+            Conv(transition_channels * 2, transition_channels * 2, 3, 1),
        )
        self.dark2 = nn.Sequential(
-            Conv(base_channels * 2, base_channels * 4, 3, 2),
-            RCSPDark_Block(base_channels * 4, base_channels * 2, base_channels * 8, ids=[-1, -3, -5, -6]),
+            Conv(transition_channels * 2, transition_channels * 4, 3, 2),
+            Block(transition_channels * 4, block_channels * 2, transition_channels * 8, n=n, ids=ids),
        )
        self.dark3 = nn.Sequential(
-            RCSPDark_Transition(base_channels * 8, base_channels * 4),
-            RCSPDark_Block(base_channels * 8, base_channels * 4, base_channels * 16, ids=[-1, -3, -5, -6]),
+            Transition(transition_channels * 8, transition_channels * 4),
+            Block(transition_channels * 8, block_channels * 4, transition_channels * 16, n=n, ids=ids),
        )
        self.dark4 = nn.Sequential(
-            RCSPDark_Transition(base_channels * 16, base_channels * 8),
-            RCSPDark_Block(base_channels * 16, base_channels * 8, base_channels * 32, ids=[-1, -3, -5, -6]),
+            Transition(transition_channels * 16, transition_channels * 8),
+            Block(transition_channels * 16, block_channels * 8, transition_channels * 32, n=n, ids=ids),
        )
        self.dark5 = nn.Sequential(
-            RCSPDark_Transition(base_channels * 32, base_channels * 16),
-            RCSPDark_Block(base_channels * 32, base_channels * 8, base_channels * 32, e=1/4, ids=[-1, -3, -5, -6]),
+            Transition(transition_channels * 32, transition_channels * 16),
+            Block(transition_channels * 32, block_channels * 8, transition_channels * 32, n=n, ids=ids),
        )
        
        if pretrained:
-            phi = 'l'
            url = {
-                "l" : 'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/cspdarknet_backbone.pth',
+                "l" : 'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/yolov7_backbone.pth',
+                "x" : 'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/yolov7_x_backbone.pth',
            }[phi]
            checkpoint = torch.hub.load_state_dict_from_url(url=url, map_location="cpu", model_dir="./model_data")
            self.load_state_dict(checkpoint, strict=False)

--- a/nets/yolo.py
+++ b/nets/yolo.py
@@ -2,8 +2,7 @@ import numpy as np
 import torch
 import torch.nn as nn

-from nets.CSPdarknet import (Conv, CSPDarknet, RCSPDark_Block,
-                             RCSPDark_Transition, SiLU, autopad)
+from nets.backbone import Backbone, Block, Conv, SiLU, Transition, autopad


 class SPPCSPC(nn.Module):
@@ -65,9 +64,9 @@ class RepConv(nn.Module):
        return self.act(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
    
    def get_equivalent_kernel_bias(self):
-        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
-        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
-        kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
+        kernel3x3, bias3x3  = self._fuse_bn_tensor(self.rbr_dense)
+        kernel1x1, bias1x1  = self._fuse_bn_tensor(self.rbr_1x1)
+        kernelid, biasid    = self._fuse_bn_tensor(self.rbr_identity)
        return (
            kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid,
            bias3x3 + bias1x1 + biasid,
@@ -83,12 +82,12 @@ class RepConv(nn.Module):
        if branch is None:
            return 0, 0
        if isinstance(branch, nn.Sequential):
-            kernel = branch[0].weight
+            kernel      = branch[0].weight
            running_mean = branch[1].running_mean
            running_var = branch[1].running_var
-            gamma = branch[1].weight
-            beta = branch[1].bias
-            eps = branch[1].eps
+            gamma       = branch[1].weight
+            beta        = branch[1].bias
+            eps         = branch[1].eps
        else:
            assert isinstance(branch, nn.BatchNorm2d)
            if not hasattr(self, "id_tensor"):
@@ -99,14 +98,14 @@ class RepConv(nn.Module):
                for i in range(self.in_channels):
                    kernel_value[i, i % input_dim, 1, 1] = 1
                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
-            kernel = self.id_tensor
+            kernel      = self.id_tensor
            running_mean = branch.running_mean
            running_var = branch.running_var
-            gamma = branch.weight
-            beta = branch.bias
-            eps = branch.eps
+            gamma       = branch.weight
+            beta        = branch.bias
+            eps         = branch.eps
        std = (running_var + eps).sqrt()
-        t = (gamma / std).reshape(-1, 1, 1, 1)
+        t   = (gamma / std).reshape(-1, 1, 1, 1)
        return kernel * t, beta - running_mean * gamma / std

    def repvgg_convert(self):
@@ -164,18 +163,18 @@ class RepConv(nn.Module):
            identity_conv_1x1.weight.data.fill_diagonal_(1.0)
            identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze(2).unsqueeze(3)

-            identity_conv_1x1 = self.fuse_conv_bn(identity_conv_1x1, self.rbr_identity)
-            bias_identity_expanded = identity_conv_1x1.bias
-            weight_identity_expanded = torch.nn.functional.pad(identity_conv_1x1.weight, [1, 1, 1, 1])            
+            identity_conv_1x1           = self.fuse_conv_bn(identity_conv_1x1, self.rbr_identity)
+            bias_identity_expanded      = identity_conv_1x1.bias
+            weight_identity_expanded    = torch.nn.functional.pad(identity_conv_1x1.weight, [1, 1, 1, 1])            
        else:
-            bias_identity_expanded = torch.nn.Parameter( torch.zeros_like(rbr_1x1_bias) )
-            weight_identity_expanded = torch.nn.Parameter( torch.zeros_like(weight_1x1_expanded) )            
+            bias_identity_expanded      = torch.nn.Parameter( torch.zeros_like(rbr_1x1_bias) )
+            weight_identity_expanded    = torch.nn.Parameter( torch.zeros_like(weight_1x1_expanded) )            
        
-        self.rbr_dense.weight = torch.nn.Parameter(self.rbr_dense.weight + weight_1x1_expanded + weight_identity_expanded)
-        self.rbr_dense.bias = torch.nn.Parameter(self.rbr_dense.bias + rbr_1x1_bias + bias_identity_expanded)
+        self.rbr_dense.weight   = torch.nn.Parameter(self.rbr_dense.weight + weight_1x1_expanded + weight_identity_expanded)
+        self.rbr_dense.bias     = torch.nn.Parameter(self.rbr_dense.bias + rbr_1x1_bias + bias_identity_expanded)
                
-        self.rbr_reparam = self.rbr_dense
-        self.deploy = True
+        self.rbr_reparam    = self.rbr_dense
+        self.deploy         = True

        if self.rbr_identity is not None:
            del self.rbr_identity
@@ -211,47 +210,55 @@ def fuse_conv_and_bn(conv, bn):
 #   yolo_body
 #---------------------------------------------------#
 class YoloBody(nn.Module):
-    def __init__(self, anchors_mask, num_classes, pretrained=False):
+    def __init__(self, anchors_mask, num_classes, phi, pretrained=False):
        super(YoloBody, self).__init__()
-        base_channels   = 32
+        #-----------------------------------------------#
+        #   定义了不同yolov7版本的参数
+        #-----------------------------------------------#
+        transition_channels = {'l' : 32, 'x' : 40}[phi]
+        block_channels      = 32
+        panet_channels      = {'l' : 32, 'x' : 64}[phi]
+        e       = {'l' : 2, 'x' : 1}[phi]
+        n       = {'l' : 4, 'x' : 6}[phi]
+        ids     = {'l' : [-1, -2, -3, -4, -5, -6], 'x' : [-1, -3, -5, -7, -8]}[phi]
+        conv    = {'l' : RepConv, 'x' : Conv}[phi]
        #-----------------------------------------------#
        #   输入图片是640, 640, 3
-        #   初始的基本通道是64
        #-----------------------------------------------#

        #---------------------------------------------------#   
        #   生成主干模型
        #   获得三个有效特征层，他们的shape分别是：
-        #   80,80,512
-        #   40,40,1024
-        #   20,20,1024
+        #   80, 80, 512
+        #   40, 40, 1024
+        #   20, 20, 1024
        #---------------------------------------------------#
-        self.backbone   = CSPDarknet(base_channels, pretrained=pretrained)
+        self.backbone   = Backbone(transition_channels, block_channels, n, phi, pretrained=pretrained)

        self.upsample   = nn.Upsample(scale_factor=2, mode="nearest")

-        self.sppcspc                = SPPCSPC(base_channels * 32, base_channels * 16)
-        self.conv_for_P5            = Conv(base_channels * 16, base_channels * 8)
-        self.conv_for_feat2         = Conv(base_channels * 32, base_channels * 8)
-        self.conv3_for_upsample1    = RCSPDark_Block(base_channels * 16, base_channels * 4, base_channels * 8, ids=[-1, -2, -3, -4, -5, -6])
+        self.sppcspc                = SPPCSPC(transition_channels * 32, transition_channels * 16)
+        self.conv_for_P5            = Conv(transition_channels * 16, transition_channels * 8)
+        self.conv_for_feat2         = Conv(transition_channels * 32, transition_channels * 8)
+        self.conv3_for_upsample1    = Block(transition_channels * 16, panet_channels * 4, transition_channels * 8, e=e, n=n, ids=ids)

-        self.conv_for_P4            = Conv(base_channels * 8, base_channels * 4)
-        self.conv_for_feat1         = Conv(base_channels * 16, base_channels * 4)
-        self.conv3_for_upsample2    = RCSPDark_Block(base_channels * 8, base_channels * 2, base_channels * 4, ids=[-1, -2, -3, -4, -5, -6])
+        self.conv_for_P4            = Conv(transition_channels * 8, transition_channels * 4)
+        self.conv_for_feat1         = Conv(transition_channels * 16, transition_channels * 4)
+        self.conv3_for_upsample2    = Block(transition_channels * 8, panet_channels * 2, transition_channels * 4, e=e, n=n, ids=ids)

-        self.down_sample1           = RCSPDark_Transition(base_channels * 4, base_channels * 4)
-        self.conv3_for_downsample1  = RCSPDark_Block(base_channels * 16, base_channels * 4, base_channels * 8, ids=[-1, -2, -3, -4, -5, -6])
+        self.down_sample1           = Transition(transition_channels * 4, transition_channels * 4)
+        self.conv3_for_downsample1  = Block(transition_channels * 16, panet_channels * 4, transition_channels * 8, e=e, n=n, ids=ids)

-        self.down_sample2           = RCSPDark_Transition(base_channels * 8, base_channels * 8)
-        self.conv3_for_downsample2  = RCSPDark_Block(base_channels * 32, base_channels * 8, base_channels * 16, ids=[-1, -2, -3, -4, -5, -6])
+        self.down_sample2           = Transition(transition_channels * 8, transition_channels * 8)
+        self.conv3_for_downsample2  = Block(transition_channels * 32, panet_channels * 8, transition_channels * 16, e=e, n=n, ids=ids)

-        self.rep_conv_1 = RepConv(base_channels * 4, base_channels * 8, 3, 1)
-        self.rep_conv_2 = RepConv(base_channels * 8, base_channels * 16, 3, 1)
-        self.rep_conv_3 = RepConv(base_channels * 16, base_channels * 32, 3, 1)
+        self.rep_conv_1 = conv(transition_channels * 4, transition_channels * 8, 3, 1)
+        self.rep_conv_2 = conv(transition_channels * 8, transition_channels * 16, 3, 1)
+        self.rep_conv_3 = conv(transition_channels * 16, transition_channels * 32, 3, 1)

-        self.yolo_head_P3 = nn.Conv2d(base_channels * 8, len(anchors_mask[2]) * (5 + num_classes), 1)
-        self.yolo_head_P4 = nn.Conv2d(base_channels * 16, len(anchors_mask[1]) * (5 + num_classes), 1)
-        self.yolo_head_P5 = nn.Conv2d(base_channels * 32, len(anchors_mask[0]) * (5 + num_classes), 1)
+        self.yolo_head_P3 = nn.Conv2d(transition_channels * 8, len(anchors_mask[2]) * (5 + num_classes), 1)
+        self.yolo_head_P4 = nn.Conv2d(transition_channels * 16, len(anchors_mask[1]) * (5 + num_classes), 1)
+        self.yolo_head_P5 = nn.Conv2d(transition_channels * 32, len(anchors_mask[0]) * (5 + num_classes), 1)

    def fuse(self):
        print('Fusing layers... ')
@@ -306,4 +313,4 @@ class YoloBody(nn.Module):
        #---------------------------------------------------#
        out0 = self.yolo_head_P5(P5)

-        return [out0, out1, out2]
\ No newline at end of file
+        return [out0, out1, out2]
--- a/nets/yolo_training.py
+++ b/nets/yolo_training.py
--- a/summary.py
+++ b/summary.py
@@ -11,9 +11,10 @@ if __name__ == "__main__":
    input_shape     = [640, 640]
    anchors_mask    = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    num_classes     = 80
+    phi             = 'yolov7'
    
    device  = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    m       = YoloBody(anchors_mask, num_classes, False).to(device)
+    m       = YoloBody(anchors_mask, num_classes, phi, False).to(device)
    summary(m, (3, input_shape[0], input_shape[1]))
    
    dummy_input     = torch.randn(1, 3, input_shape[0], input_shape[1]).to(device)

--- a/train.py
+++ b/train.py
@@ -98,6 +98,12 @@ if __name__ == "__main__":
    #   input_shape     输入的shape大小，一定要是32的倍数
    #------------------------------------------------------#
    input_shape     = [640, 640]
+    #------------------------------------------------------#
+    #   phi             所使用到的yolov7的版本，本仓库一共提供两个：
+    #                   l : 对应yolov7
+    #                   x : 对应yolov7_x
+    #------------------------------------------------------#
+    phi             = 'l'
    #----------------------------------------------------------------------------------------------------------------------------#
    #   pretrained      是否使用主干网络的预训练权重，此处使用的是主干的权重，因此是在模型构建的时候进行加载的。
    #                   如果设置了model_path，则主干的权值无需加载，pretrained的值无意义。
@@ -268,15 +274,15 @@ if __name__ == "__main__":
    if pretrained:
        if distributed:
            if local_rank == 0:
-                download_weights()  
+                download_weights(phi)  
            dist.barrier()
        else:
-            download_weights()
+            download_weights(phi)
            
    #------------------------------------------------------#
    #   创建yolo模型
    #------------------------------------------------------#
-    model = YoloBody(anchors_mask, num_classes, pretrained=pretrained)
+    model = YoloBody(anchors_mask, num_classes, phi, pretrained=pretrained)
    if not pretrained:
        weights_init(model)
    if model_path != '':

--- a/utils/dataloader.py
+++ b/utils/dataloader.py
@@ -57,7 +57,10 @@ class YoloDataset(Dataset):
        image       = np.transpose(preprocess_input(np.array(image, dtype=np.float32)), (2, 0, 1))
        box         = np.array(box, dtype=np.float32)
        
-        nL          = len(box)  # number of labels
+        #---------------------------------------------------#
+        #   对真实框进行预处理
+        #---------------------------------------------------#
+        nL          = len(box)
        labels_out  = np.zeros((nL, 6))
        if nL:
            #---------------------------------------------------#
@@ -73,6 +76,10 @@ class YoloDataset(Dataset):
            box[:, 2:4] = box[:, 2:4] - box[:, 0:2]
            box[:, 0:2] = box[:, 0:2] + box[:, 2:4] / 2
            
+            #---------------------------------------------------#
+            #   调整顺序，符合训练的格式
+            #   labels_out中序号为0的部分在collect时处理
+            #---------------------------------------------------#
            labels_out[:, 1] = box[:, -1]
            labels_out[:, 2:] = box[:, :4]
            

--- a/utils/utils.py
+++ b/utils/utils.py
@@ -71,13 +71,13 @@ def show_config(**kwargs):
        print('|%25s | %40s|' % (str(key), str(value)))
    print('-' * 70)
        
-def download_weights(model_dir="./model_data"):
+def download_weights(phi, model_dir="./model_data"):
    import os
    from torch.hub import load_state_dict_from_url
    
-    phi = "l"
    download_urls = {
-        "l" : 'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/cspdarknet_backbone.pth',
+        "l" : 'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/yolov7_backbone.pth',
+        "x" : 'https://github.com/bubbliiiing/yolov7-pytorch/releases/download/v1.0/yolov7_x_backbone.pth',
    }
    url = download_urls[phi]
    

--- a/utils/utils_bbox.py
+++ b/utils/utils_bbox.py
@@ -11,9 +11,9 @@ class DecodeBox():
        self.bbox_attrs     = 5 + num_classes
        self.input_shape    = input_shape
        #-----------------------------------------------------------#
-        #   20x20的特征层对应的anchor是[116,90],[156,198],[373,326]
-        #   40x40的特征层对应的anchor是[30,61],[62,45],[59,119]
-        #   80x80的特征层对应的anchor是[10,13],[16,30],[33,23]
+        #   13x13的特征层对应的anchor是[142, 110],[192, 243],[459, 401]
+        #   26x26的特征层对应的anchor是[36, 75],[76, 55],[72, 146]
+        #   52x52的特征层对应的anchor是[12, 16],[19, 36],[40, 28]
        #-----------------------------------------------------------#
        self.anchors_mask   = anchors_mask


--- a/yolo.py
+++ b/yolo.py
@@ -37,6 +37,12 @@ class YOLO(object):
        #   输入图片的大小，必须为32的倍数。
        #---------------------------------------------------------------------#
        "input_shape"       : [640, 640],
+        #------------------------------------------------------#
+        #   所使用到的yolov7的版本，本仓库一共提供两个：
+        #   l : 对应yolov7
+        #   x : 对应yolov7_x
+        #------------------------------------------------------#
+        "phi"               : 'l',
        #---------------------------------------------------------------------#
        #   只有得分大于置信度的预测框会被保留下来
        #---------------------------------------------------------------------#
@@ -97,7 +103,7 @@ class YOLO(object):
        #---------------------------------------------------#
        #   建立yolo模型，载入yolo模型的权重
        #---------------------------------------------------#
-        self.net    = YoloBody(self.anchors_mask, self.num_classes)
+        self.net    = YoloBody(self.anchors_mask, self.num_classes, self.phi)
        device      = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.net.load_state_dict(torch.load(self.model_path, map_location=device))
        self.net    = self.net.fuse().eval()