yolo.py 14.0 KB
Newer Older
Bubbliiiing's avatar
Bubbliiiing 已提交
1
import numpy as np
Bubbliiiing's avatar
Bubbliiiing 已提交
2 3 4
import torch
import torch.nn as nn

Bubbliiiing's avatar
Bubbliiiing 已提交
5
from nets.backbone import Backbone, Block, Conv, SiLU, Transition, autopad
Bubbliiiing's avatar
Bubbliiiing 已提交
6

Bubbliiiing's avatar
Bubbliiiing 已提交
7 8 9

class SPPCSPC(nn.Module):
    # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks
Bubbliiiing's avatar
Bubbliiiing 已提交
10
    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
Bubbliiiing's avatar
Bubbliiiing 已提交
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
        super(SPPCSPC, self).__init__()
        c_ = int(2 * c2 * e)  # hidden channels
        self.cv1 = Conv(c1, c_, 1, 1)
        self.cv2 = Conv(c1, c_, 1, 1)
        self.cv3 = Conv(c_, c_, 3, 1)
        self.cv4 = Conv(c_, c_, 1, 1)
        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
        self.cv5 = Conv(4 * c_, c_, 1, 1)
        self.cv6 = Conv(c_, c_, 3, 1)
        self.cv7 = Conv(2 * c_, c2, 1, 1)

    def forward(self, x):
        x1 = self.cv4(self.cv3(self.cv1(x)))
        y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
        y2 = self.cv2(x)
        return self.cv7(torch.cat((y1, y2), dim=1))

class RepConv(nn.Module):
    # Represented convolution
    # https://arxiv.org/abs/2101.03697
    def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=SiLU(), deploy=False):
        super(RepConv, self).__init__()
Bubbliiiing's avatar
Bubbliiiing 已提交
33 34 35 36 37
        self.deploy         = deploy
        self.groups         = g
        self.in_channels    = c1
        self.out_channels   = c2
        
Bubbliiiing's avatar
Bubbliiiing 已提交
38 39 40
        assert k == 3
        assert autopad(k, p) == 1

Bubbliiiing's avatar
Bubbliiiing 已提交
41 42
        padding_11  = autopad(k, p) - k // 2
        self.act    = nn.LeakyReLU(0.1, inplace=True) if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
Bubbliiiing's avatar
Bubbliiiing 已提交
43 44

        if deploy:
Bubbliiiing's avatar
Bubbliiiing 已提交
45
            self.rbr_reparam    = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=True)
Bubbliiiing's avatar
Bubbliiiing 已提交
46
        else:
Bubbliiiing's avatar
Bubbliiiing 已提交
47
            self.rbr_identity   = (nn.BatchNorm2d(num_features=c1, eps=0.001, momentum=0.03) if c2 == c1 and s == 1 else None)
Bubbliiiing's avatar
Bubbliiiing 已提交
48
            self.rbr_dense      = nn.Sequential(
Bubbliiiing's avatar
Bubbliiiing 已提交
49
                nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False),
Bubbliiiing's avatar
Bubbliiiing 已提交
50
                nn.BatchNorm2d(num_features=c2, eps=0.001, momentum=0.03),
Bubbliiiing's avatar
Bubbliiiing 已提交
51
            )
Bubbliiiing's avatar
Bubbliiiing 已提交
52
            self.rbr_1x1        = nn.Sequential(
Bubbliiiing's avatar
Bubbliiiing 已提交
53
                nn.Conv2d( c1, c2, 1, s, padding_11, groups=g, bias=False),
Bubbliiiing's avatar
Bubbliiiing 已提交
54
                nn.BatchNorm2d(num_features=c2, eps=0.001, momentum=0.03),
Bubbliiiing's avatar
Bubbliiiing 已提交
55 56 57 58 59 60 61 62 63 64 65 66
            )

    def forward(self, inputs):
        if hasattr(self, "rbr_reparam"):
            return self.act(self.rbr_reparam(inputs))
        if self.rbr_identity is None:
            id_out = 0
        else:
            id_out = self.rbr_identity(inputs)
        return self.act(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
    
    def get_equivalent_kernel_bias(self):
Bubbliiiing's avatar
Bubbliiiing 已提交
67 68 69
        kernel3x3, bias3x3  = self._fuse_bn_tensor(self.rbr_dense)
        kernel1x1, bias1x1  = self._fuse_bn_tensor(self.rbr_1x1)
        kernelid, biasid    = self._fuse_bn_tensor(self.rbr_identity)
Bubbliiiing's avatar
Bubbliiiing 已提交
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
        return (
            kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid,
            bias3x3 + bias1x1 + biasid,
        )

    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
        if kernel1x1 is None:
            return 0
        else:
            return nn.functional.pad(kernel1x1, [1, 1, 1, 1])

    def _fuse_bn_tensor(self, branch):
        if branch is None:
            return 0, 0
        if isinstance(branch, nn.Sequential):
Bubbliiiing's avatar
Bubbliiiing 已提交
85
            kernel      = branch[0].weight
Bubbliiiing's avatar
Bubbliiiing 已提交
86 87
            running_mean = branch[1].running_mean
            running_var = branch[1].running_var
Bubbliiiing's avatar
Bubbliiiing 已提交
88 89 90
            gamma       = branch[1].weight
            beta        = branch[1].bias
            eps         = branch[1].eps
Bubbliiiing's avatar
Bubbliiiing 已提交
91 92 93 94 95 96 97 98 99 100
        else:
            assert isinstance(branch, nn.BatchNorm2d)
            if not hasattr(self, "id_tensor"):
                input_dim = self.in_channels // self.groups
                kernel_value = np.zeros(
                    (self.in_channels, input_dim, 3, 3), dtype=np.float32
                )
                for i in range(self.in_channels):
                    kernel_value[i, i % input_dim, 1, 1] = 1
                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
Bubbliiiing's avatar
Bubbliiiing 已提交
101
            kernel      = self.id_tensor
Bubbliiiing's avatar
Bubbliiiing 已提交
102 103
            running_mean = branch.running_mean
            running_var = branch.running_var
Bubbliiiing's avatar
Bubbliiiing 已提交
104 105 106
            gamma       = branch.weight
            beta        = branch.bias
            eps         = branch.eps
Bubbliiiing's avatar
Bubbliiiing 已提交
107
        std = (running_var + eps).sqrt()
Bubbliiiing's avatar
Bubbliiiing 已提交
108
        t   = (gamma / std).reshape(-1, 1, 1, 1)
Bubbliiiing's avatar
Bubbliiiing 已提交
109 110 111 112 113 114 115 116 117 118
        return kernel * t, beta - running_mean * gamma / std

    def repvgg_convert(self):
        kernel, bias = self.get_equivalent_kernel_bias()
        return (
            kernel.detach().cpu().numpy(),
            bias.detach().cpu().numpy(),
        )

    def fuse_conv_bn(self, conv, bn):
Bubbliiiing's avatar
Bubbliiiing 已提交
119 120
        std     = (bn.running_var + bn.eps).sqrt()
        bias    = bn.bias - bn.running_mean * bn.weight / std
Bubbliiiing's avatar
Bubbliiiing 已提交
121

Bubbliiiing's avatar
Bubbliiiing 已提交
122
        t       = (bn.weight / std).reshape(-1, 1, 1, 1)
Bubbliiiing's avatar
Bubbliiiing 已提交
123 124
        weights = conv.weight * t

Bubbliiiing's avatar
Bubbliiiing 已提交
125 126
        bn      = nn.Identity()
        conv    = nn.Conv2d(in_channels = conv.in_channels,
Bubbliiiing's avatar
Bubbliiiing 已提交
127 128 129 130 131 132 133 134 135 136
                              out_channels = conv.out_channels,
                              kernel_size = conv.kernel_size,
                              stride=conv.stride,
                              padding = conv.padding,
                              dilation = conv.dilation,
                              groups = conv.groups,
                              bias = True,
                              padding_mode = conv.padding_mode)

        conv.weight = torch.nn.Parameter(weights)
Bubbliiiing's avatar
Bubbliiiing 已提交
137
        conv.bias   = torch.nn.Parameter(bias)
Bubbliiiing's avatar
Bubbliiiing 已提交
138 139 140 141 142 143
        return conv

    def fuse_repvgg_block(self):    
        if self.deploy:
            return
        print(f"RepConv.fuse_repvgg_block")
Bubbliiiing's avatar
Bubbliiiing 已提交
144
        self.rbr_dense  = self.fuse_conv_bn(self.rbr_dense[0], self.rbr_dense[1])
Bubbliiiing's avatar
Bubbliiiing 已提交
145
        
Bubbliiiing's avatar
Bubbliiiing 已提交
146 147
        self.rbr_1x1    = self.fuse_conv_bn(self.rbr_1x1[0], self.rbr_1x1[1])
        rbr_1x1_bias    = self.rbr_1x1.bias
Bubbliiiing's avatar
Bubbliiiing 已提交
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
        weight_1x1_expanded = torch.nn.functional.pad(self.rbr_1x1.weight, [1, 1, 1, 1])
        
        # Fuse self.rbr_identity
        if (isinstance(self.rbr_identity, nn.BatchNorm2d) or isinstance(self.rbr_identity, nn.modules.batchnorm.SyncBatchNorm)):
            identity_conv_1x1 = nn.Conv2d(
                    in_channels=self.in_channels,
                    out_channels=self.out_channels,
                    kernel_size=1,
                    stride=1,
                    padding=0,
                    groups=self.groups, 
                    bias=False)
            identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.to(self.rbr_1x1.weight.data.device)
            identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.squeeze().squeeze()
            identity_conv_1x1.weight.data.fill_(0.0)
            identity_conv_1x1.weight.data.fill_diagonal_(1.0)
            identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze(2).unsqueeze(3)

Bubbliiiing's avatar
Bubbliiiing 已提交
166 167 168
            identity_conv_1x1           = self.fuse_conv_bn(identity_conv_1x1, self.rbr_identity)
            bias_identity_expanded      = identity_conv_1x1.bias
            weight_identity_expanded    = torch.nn.functional.pad(identity_conv_1x1.weight, [1, 1, 1, 1])            
Bubbliiiing's avatar
Bubbliiiing 已提交
169
        else:
Bubbliiiing's avatar
Bubbliiiing 已提交
170 171
            bias_identity_expanded      = torch.nn.Parameter( torch.zeros_like(rbr_1x1_bias) )
            weight_identity_expanded    = torch.nn.Parameter( torch.zeros_like(weight_1x1_expanded) )            
Bubbliiiing's avatar
Bubbliiiing 已提交
172
        
Bubbliiiing's avatar
Bubbliiiing 已提交
173 174
        self.rbr_dense.weight   = torch.nn.Parameter(self.rbr_dense.weight + weight_1x1_expanded + weight_identity_expanded)
        self.rbr_dense.bias     = torch.nn.Parameter(self.rbr_dense.bias + rbr_1x1_bias + bias_identity_expanded)
Bubbliiiing's avatar
Bubbliiiing 已提交
175
                
Bubbliiiing's avatar
Bubbliiiing 已提交
176 177
        self.rbr_reparam    = self.rbr_dense
        self.deploy         = True
Bubbliiiing's avatar
Bubbliiiing 已提交
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199

        if self.rbr_identity is not None:
            del self.rbr_identity
            self.rbr_identity = None

        if self.rbr_1x1 is not None:
            del self.rbr_1x1
            self.rbr_1x1 = None

        if self.rbr_dense is not None:
            del self.rbr_dense
            self.rbr_dense = None
            
def fuse_conv_and_bn(conv, bn):
    fusedconv = nn.Conv2d(conv.in_channels,
                          conv.out_channels,
                          kernel_size=conv.kernel_size,
                          stride=conv.stride,
                          padding=conv.padding,
                          groups=conv.groups,
                          bias=True).requires_grad_(False).to(conv.weight.device)

Bubbliiiing's avatar
Bubbliiiing 已提交
200 201
    w_conv  = conv.weight.clone().view(conv.out_channels, -1)
    w_bn    = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
Bubbliiiing's avatar
Bubbliiiing 已提交
202 203
    fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))

Bubbliiiing's avatar
Bubbliiiing 已提交
204 205
    b_conv  = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
    b_bn    = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
Bubbliiiing's avatar
Bubbliiiing 已提交
206 207 208 209 210 211 212
    fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
    return fusedconv

#---------------------------------------------------#
#   yolo_body
#---------------------------------------------------#
class YoloBody(nn.Module):
Bubbliiiing's avatar
Bubbliiiing 已提交
213
    def __init__(self, anchors_mask, num_classes, phi, pretrained=False):
Bubbliiiing's avatar
Bubbliiiing 已提交
214
        super(YoloBody, self).__init__()
Bubbliiiing's avatar
Bubbliiiing 已提交
215 216 217 218 219 220 221 222 223 224
        #-----------------------------------------------#
        #   定义了不同yolov7版本的参数
        #-----------------------------------------------#
        transition_channels = {'l' : 32, 'x' : 40}[phi]
        block_channels      = 32
        panet_channels      = {'l' : 32, 'x' : 64}[phi]
        e       = {'l' : 2, 'x' : 1}[phi]
        n       = {'l' : 4, 'x' : 6}[phi]
        ids     = {'l' : [-1, -2, -3, -4, -5, -6], 'x' : [-1, -3, -5, -7, -8]}[phi]
        conv    = {'l' : RepConv, 'x' : Conv}[phi]
Bubbliiiing's avatar
Bubbliiiing 已提交
225 226 227 228 229
        #-----------------------------------------------#
        #   输入图片是640, 640, 3
        #-----------------------------------------------#

        #---------------------------------------------------#   
Bubbliiiing's avatar
Bubbliiiing 已提交
230
        #   生成主干模型
Bubbliiiing's avatar
Bubbliiiing 已提交
231
        #   获得三个有效特征层,他们的shape分别是:
Bubbliiiing's avatar
Bubbliiiing 已提交
232 233 234
        #   80, 80, 512
        #   40, 40, 1024
        #   20, 20, 1024
Bubbliiiing's avatar
Bubbliiiing 已提交
235
        #---------------------------------------------------#
Bubbliiiing's avatar
Bubbliiiing 已提交
236
        self.backbone   = Backbone(transition_channels, block_channels, n, phi, pretrained=pretrained)
Bubbliiiing's avatar
Bubbliiiing 已提交
237 238 239

        self.upsample   = nn.Upsample(scale_factor=2, mode="nearest")

Bubbliiiing's avatar
Bubbliiiing 已提交
240 241 242 243
        self.sppcspc                = SPPCSPC(transition_channels * 32, transition_channels * 16)
        self.conv_for_P5            = Conv(transition_channels * 16, transition_channels * 8)
        self.conv_for_feat2         = Conv(transition_channels * 32, transition_channels * 8)
        self.conv3_for_upsample1    = Block(transition_channels * 16, panet_channels * 4, transition_channels * 8, e=e, n=n, ids=ids)
Bubbliiiing's avatar
Bubbliiiing 已提交
244

Bubbliiiing's avatar
Bubbliiiing 已提交
245 246 247
        self.conv_for_P4            = Conv(transition_channels * 8, transition_channels * 4)
        self.conv_for_feat1         = Conv(transition_channels * 16, transition_channels * 4)
        self.conv3_for_upsample2    = Block(transition_channels * 8, panet_channels * 2, transition_channels * 4, e=e, n=n, ids=ids)
Bubbliiiing's avatar
Bubbliiiing 已提交
248

Bubbliiiing's avatar
Bubbliiiing 已提交
249 250
        self.down_sample1           = Transition(transition_channels * 4, transition_channels * 4)
        self.conv3_for_downsample1  = Block(transition_channels * 16, panet_channels * 4, transition_channels * 8, e=e, n=n, ids=ids)
Bubbliiiing's avatar
Bubbliiiing 已提交
251

Bubbliiiing's avatar
Bubbliiiing 已提交
252 253
        self.down_sample2           = Transition(transition_channels * 8, transition_channels * 8)
        self.conv3_for_downsample2  = Block(transition_channels * 32, panet_channels * 8, transition_channels * 16, e=e, n=n, ids=ids)
Bubbliiiing's avatar
Bubbliiiing 已提交
254

Bubbliiiing's avatar
Bubbliiiing 已提交
255 256 257
        self.rep_conv_1 = conv(transition_channels * 4, transition_channels * 8, 3, 1)
        self.rep_conv_2 = conv(transition_channels * 8, transition_channels * 16, 3, 1)
        self.rep_conv_3 = conv(transition_channels * 16, transition_channels * 32, 3, 1)
Bubbliiiing's avatar
Bubbliiiing 已提交
258

Bubbliiiing's avatar
Bubbliiiing 已提交
259 260 261
        self.yolo_head_P3 = nn.Conv2d(transition_channels * 8, len(anchors_mask[2]) * (5 + num_classes), 1)
        self.yolo_head_P4 = nn.Conv2d(transition_channels * 16, len(anchors_mask[1]) * (5 + num_classes), 1)
        self.yolo_head_P5 = nn.Conv2d(transition_channels * 32, len(anchors_mask[0]) * (5 + num_classes), 1)
Bubbliiiing's avatar
Bubbliiiing 已提交
262

Bubbliiiing's avatar
Bubbliiiing 已提交
263
    def fuse(self):
Bubbliiiing's avatar
Bubbliiiing 已提交
264 265 266 267 268
        print('Fusing layers... ')
        for m in self.modules():
            if isinstance(m, RepConv):
                m.fuse_repvgg_block()
            elif type(m) is Conv and hasattr(m, 'bn'):
Bubbliiiing's avatar
Bubbliiiing 已提交
269 270 271
                m.conv = fuse_conv_and_bn(m.conv, m.bn)
                delattr(m, 'bn')
                m.forward = m.fuseforward
Bubbliiiing's avatar
Bubbliiiing 已提交
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
        return self
    
    def forward(self, x):
        #  backbone
        feat1, feat2, feat3 = self.backbone.forward(x)
        
        P5          = self.sppcspc(feat3)
        P5_conv     = self.conv_for_P5(P5)
        P5_upsample = self.upsample(P5_conv)
        P4          = torch.cat([self.conv_for_feat2(feat2), P5_upsample], 1)
        P4          = self.conv3_for_upsample1(P4)

        P4_conv     = self.conv_for_P4(P4)
        P4_upsample = self.upsample(P4_conv)
        P3          = torch.cat([self.conv_for_feat1(feat1), P4_upsample], 1)
        P3          = self.conv3_for_upsample2(P3)

        P3_downsample = self.down_sample1(P3)
        P4 = torch.cat([P3_downsample, P4], 1)
        P4 = self.conv3_for_downsample1(P4)

        P4_downsample = self.down_sample2(P4)
        P5 = torch.cat([P4_downsample, P5], 1)
        P5 = self.conv3_for_downsample2(P5)
        
        P3 = self.rep_conv_1(P3)
        P4 = self.rep_conv_2(P4)
        P5 = self.rep_conv_3(P5)
        #---------------------------------------------------#
        #   第三个特征层
Bubbliiiing's avatar
Bubbliiiing 已提交
302
        #   y3=(batch_size, 75, 80, 80)
Bubbliiiing's avatar
Bubbliiiing 已提交
303 304 305 306
        #---------------------------------------------------#
        out2 = self.yolo_head_P3(P3)
        #---------------------------------------------------#
        #   第二个特征层
Bubbliiiing's avatar
Bubbliiiing 已提交
307
        #   y2=(batch_size, 75, 40, 40)
Bubbliiiing's avatar
Bubbliiiing 已提交
308 309 310 311
        #---------------------------------------------------#
        out1 = self.yolo_head_P4(P4)
        #---------------------------------------------------#
        #   第一个特征层
Bubbliiiing's avatar
Bubbliiiing 已提交
312
        #   y1=(batch_size, 75, 20, 20)
Bubbliiiing's avatar
Bubbliiiing 已提交
313 314 315
        #---------------------------------------------------#
        out0 = self.yolo_head_P5(P5)

Bubbliiiing's avatar
Bubbliiiing 已提交
316
        return [out0, out1, out2]