yolo3.py 5.0 KB
Newer Older
B
Bubbliiiing 已提交
1 2
from collections import OrderedDict

J
JiaQi Xu 已提交
3 4
import torch
import torch.nn as nn
B
Bubbliiiing 已提交
5

J
JiaQi Xu 已提交
6 7
from nets.darknet import darknet53

B
Bubbliiiing 已提交
8

J
JiaQi Xu 已提交
9 10 11 12 13 14 15 16
def conv2d(filter_in, filter_out, kernel_size):
    pad = (kernel_size - 1) // 2 if kernel_size else 0
    return nn.Sequential(OrderedDict([
        ("conv", nn.Conv2d(filter_in, filter_out, kernel_size=kernel_size, stride=1, padding=pad, bias=False)),
        ("bn", nn.BatchNorm2d(filter_out)),
        ("relu", nn.LeakyReLU(0.1)),
    ]))

B
Bubbliiiing 已提交
17 18 19 20
#------------------------------------------------------------------------#
#   make_last_layers里面一共有七个卷积,前五个用于提取特征。
#   后两个用于获得yolo网络的预测结果
#------------------------------------------------------------------------#
J
JiaQi Xu 已提交
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
def make_last_layers(filters_list, in_filters, out_filter):
    m = nn.ModuleList([
        conv2d(in_filters, filters_list[0], 1),
        conv2d(filters_list[0], filters_list[1], 3),
        conv2d(filters_list[1], filters_list[0], 1),
        conv2d(filters_list[0], filters_list[1], 3),
        conv2d(filters_list[1], filters_list[0], 1),
        conv2d(filters_list[0], filters_list[1], 3),
        nn.Conv2d(filters_list[1], out_filter, kernel_size=1,
                                        stride=1, padding=0, bias=True)
    ])
    return m

class YoloBody(nn.Module):
    def __init__(self, config):
        super(YoloBody, self).__init__()
        self.config = config
B
Bubbliiiing 已提交
38 39 40
        #---------------------------------------------------#   
        #   生成darknet53的主干模型
        #   获得三个有效特征层,他们的shape分别是:
B
Bubbliiiing 已提交
41
        #   52,52,256
B
Bubbliiiing 已提交
42 43 44
        #   26,26,512
        #   13,13,1024
        #---------------------------------------------------#
J
JiaQi Xu 已提交
45 46
        self.backbone = darknet53(None)

B
Bubbliiiing 已提交
47
        # out_filters : [64, 128, 256, 512, 1024]
J
JiaQi Xu 已提交
48
        out_filters = self.backbone.layers_out_filters
B
Bubbliiiing 已提交
49 50 51 52 53

        #------------------------------------------------------------------------#
        #   计算yolo_head的输出通道数,对于voc数据集而言
        #   final_out_filter0 = final_out_filter1 = final_out_filter2 = 75
        #------------------------------------------------------------------------#
J
JiaQi Xu 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
        final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"])
        self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0)

        final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
        self.last_layer1_conv = conv2d(512, 256, 1)
        self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1)

        final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
        self.last_layer2_conv = conv2d(256, 128, 1)
        self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)


    def forward(self, x):
        def _branch(last_layer, layer_in):
            for i, e in enumerate(last_layer):
                layer_in = e(layer_in)
                if i == 4:
                    out_branch = layer_in
            return layer_in, out_branch
B
Bubbliiiing 已提交
75 76
        #---------------------------------------------------#   
        #   获得三个有效特征层,他们的shape分别是:
B
Bubbliiiing 已提交
77
        #   52,52,256;26,26,512;13,13,1024
B
Bubbliiiing 已提交
78
        #---------------------------------------------------#
J
JiaQi Xu 已提交
79
        x2, x1, x0 = self.backbone(x)
B
Bubbliiiing 已提交
80 81 82 83 84 85

        #---------------------------------------------------#
        #   第一个特征层
        #   out0 = (batch_size,255,13,13)
        #---------------------------------------------------#
        # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
J
JiaQi Xu 已提交
86 87
        out0, out0_branch = _branch(self.last_layer0, x0)

B
Bubbliiiing 已提交
88
        # 13,13,512 -> 13,13,256 -> 26,26,256
J
JiaQi Xu 已提交
89 90
        x1_in = self.last_layer1_conv(out0_branch)
        x1_in = self.last_layer1_upsample(x1_in)
B
Bubbliiiing 已提交
91 92

        # 26,26,256 + 26,26,512 -> 26,26,768
J
JiaQi Xu 已提交
93
        x1_in = torch.cat([x1_in, x1], 1)
B
Bubbliiiing 已提交
94 95 96 97 98
        #---------------------------------------------------#
        #   第二个特征层
        #   out1 = (batch_size,255,26,26)
        #---------------------------------------------------#
        # 26,26,768 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
J
JiaQi Xu 已提交
99 100
        out1, out1_branch = _branch(self.last_layer1, x1_in)

B
Bubbliiiing 已提交
101
        # 26,26,256 -> 26,26,128 -> 52,52,128
J
JiaQi Xu 已提交
102 103
        x2_in = self.last_layer2_conv(out1_branch)
        x2_in = self.last_layer2_upsample(x2_in)
B
Bubbliiiing 已提交
104 105

        # 52,52,128 + 52,52,256 -> 52,52,384
J
JiaQi Xu 已提交
106
        x2_in = torch.cat([x2_in, x2], 1)
B
Bubbliiiing 已提交
107 108 109 110 111
        #---------------------------------------------------#
        #   第一个特征层
        #   out3 = (batch_size,255,52,52)
        #---------------------------------------------------#
        # 52,52,384 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
J
JiaQi Xu 已提交
112 113 114
        out2, _ = _branch(self.last_layer2, x2_in)
        return out0, out1, out2