yolov3.py 7.4 KB
Newer Older
D
dengkaipeng 已提交
1
#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
D
dengkaipeng 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.

from __future__ import division
from __future__ import print_function

import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay

from config import cfg

from .darknet import add_DarkNet53_conv_body
from .darknet import conv_bn_layer

u010070587's avatar
u010070587 已提交
29

D
dengkaipeng 已提交
30
def yolo_detection_block(input, channel, is_test=True, name=None):
D
dengkaipeng 已提交
31 32
    assert channel % 2 == 0, \
            "channel {} cannot be divided by 2".format(channel)
D
dengkaipeng 已提交
33 34
    conv = input
    for j in range(2):
u010070587's avatar
u010070587 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
        conv = conv_bn_layer(
            conv,
            channel,
            filter_size=1,
            stride=1,
            padding=0,
            is_test=is_test,
            name='{}.{}.0'.format(name, j))
        conv = conv_bn_layer(
            conv,
            channel * 2,
            filter_size=3,
            stride=1,
            padding=1,
            is_test=is_test,
            name='{}.{}.1'.format(name, j))
    route = conv_bn_layer(
        conv,
        channel,
        filter_size=1,
        stride=1,
        padding=0,
        is_test=is_test,
        name='{}.2'.format(name))
    tip = conv_bn_layer(
        route,
        channel * 2,
        filter_size=3,
        stride=1,
        padding=1,
        is_test=is_test,
        name='{}.tip'.format(name))
D
dengkaipeng 已提交
67 68
    return route, tip

u010070587's avatar
u010070587 已提交
69 70

def upsample(input, scale=2, name=None):
D
dengkaipeng 已提交
71
    out = fluid.layers.resize_nearest(
72
        input=input, scale=float(scale), name=name)
D
dengkaipeng 已提交
73 74
    return out

u010070587's avatar
u010070587 已提交
75

D
dengkaipeng 已提交
76
class YOLOv3(object):
u010070587's avatar
u010070587 已提交
77
    def __init__(self, is_train=True, use_random=True):
D
dengkaipeng 已提交
78 79 80 81 82 83
        self.is_train = is_train
        self.use_random = use_random
        self.outputs = []
        self.losses = []
        self.downsample = 32

D
dengkaipeng 已提交
84 85
    def build_input(self):
        self.image_shape = [3, cfg.input_size, cfg.input_size]
D
dengkaipeng 已提交
86
        if self.is_train:
D
dengkaipeng 已提交
87 88
            self.py_reader = fluid.layers.py_reader(
                capacity=64,
u010070587's avatar
u010070587 已提交
89 90
                shapes=[[-1] + self.image_shape, [-1, cfg.max_box_num, 4],
                        [-1, cfg.max_box_num], [-1, cfg.max_box_num]],
D
dengkaipeng 已提交
91 92 93
                lod_levels=[0, 0, 0, 0],
                dtypes=['float32'] * 2 + ['int32'] + ['float32'],
                use_double_buffer=True)
D
dengkaipeng 已提交
94 95
            self.image, self.gtbox, self.gtlabel, self.gtscore = \
                    fluid.layers.read_file(self.py_reader)
D
dengkaipeng 已提交
96 97
        else:
            self.image = fluid.layers.data(
u010070587's avatar
u010070587 已提交
98
                name='image', shape=self.image_shape, dtype='float32')
D
dengkaipeng 已提交
99
            self.im_shape = fluid.layers.data(
u010070587's avatar
u010070587 已提交
100
                name="im_shape", shape=[2], dtype='int32')
D
dengkaipeng 已提交
101
            self.im_id = fluid.layers.data(
u010070587's avatar
u010070587 已提交
102 103
                name="im_id", shape=[1], dtype='int32')

D
dengkaipeng 已提交
104 105 106 107
    def feeds(self):
        if not self.is_train:
            return [self.image, self.im_id, self.im_shape]
        return [self.image, self.gtbox, self.gtlabel, self.gtscore]
D
dengkaipeng 已提交
108

D
dengkaipeng 已提交
109
    def build_model(self):
D
dengkaipeng 已提交
110 111 112 113 114 115 116 117 118
        self.build_input()

        self.outputs = []
        self.boxes = []
        self.scores = []

        blocks = add_DarkNet53_conv_body(self.image, not self.is_train)
        for i, block in enumerate(blocks):
            if i > 0:
u010070587's avatar
u010070587 已提交
119 120 121 122 123 124
                block = fluid.layers.concat(input=[route, block], axis=1)
            route, tip = yolo_detection_block(
                block,
                channel=512 // (2**i),
                is_test=(not self.is_train),
                name="yolo_block.{}".format(i))
D
dengkaipeng 已提交
125 126 127

            # out channel number = mask_num * (5 + class_num)
            num_filters = len(cfg.anchor_masks[i]) * (cfg.class_num + 5)
D
dengkaipeng 已提交
128 129
            block_out = fluid.layers.conv2d(
                input=tip,
D
dengkaipeng 已提交
130
                num_filters=num_filters,
D
dengkaipeng 已提交
131 132 133 134
                filter_size=1,
                stride=1,
                padding=0,
                act=None,
u010070587's avatar
u010070587 已提交
135 136 137 138 139 140 141
                param_attr=ParamAttr(
                    initializer=fluid.initializer.Normal(0., 0.02),
                    name="yolo_output.{}.conv.weights".format(i)),
                bias_attr=ParamAttr(
                    initializer=fluid.initializer.Constant(0.0),
                    regularizer=L2Decay(0.),
                    name="yolo_output.{}.conv.bias".format(i)))
D
dengkaipeng 已提交
142 143 144 145 146
            self.outputs.append(block_out)

            if i < len(blocks) - 1:
                route = conv_bn_layer(
                    input=route,
u010070587's avatar
u010070587 已提交
147
                    ch_out=256 // (2**i),
D
dengkaipeng 已提交
148 149 150 151 152 153 154 155
                    filter_size=1,
                    stride=1,
                    padding=0,
                    is_test=(not self.is_train),
                    name="yolo_transition.{}".format(i))
                # upsample
                route = upsample(route)

D
dengkaipeng 已提交
156 157
        for i, out in enumerate(self.outputs):
            anchor_mask = cfg.anchor_masks[i]
D
dengkaipeng 已提交
158 159 160

            if self.is_train:
                loss = fluid.layers.yolov3_loss(
u010070587's avatar
u010070587 已提交
161 162 163 164 165 166 167 168 169
                    x=out,
                    gt_box=self.gtbox,
                    gt_label=self.gtlabel,
                    gt_score=self.gtscore,
                    anchors=cfg.anchors,
                    anchor_mask=anchor_mask,
                    class_num=cfg.class_num,
                    ignore_thresh=cfg.ignore_thresh,
                    downsample_ratio=self.downsample,
K
Kaipeng Deng 已提交
170
                    use_label_smooth=bool(cfg.label_smooth),
u010070587's avatar
u010070587 已提交
171
                    name="yolo_loss" + str(i))
D
dengkaipeng 已提交
172 173
                self.losses.append(fluid.layers.reduce_mean(loss))
            else:
u010070587's avatar
u010070587 已提交
174
                mask_anchors = []
D
dengkaipeng 已提交
175 176 177
                for m in anchor_mask:
                    mask_anchors.append(cfg.anchors[2 * m])
                    mask_anchors.append(cfg.anchors[2 * m + 1])
D
dengkaipeng 已提交
178
                boxes, scores = fluid.layers.yolo_box(
u010070587's avatar
u010070587 已提交
179 180 181 182 183 184 185
                    x=out,
                    img_size=self.im_shape,
                    anchors=mask_anchors,
                    class_num=cfg.class_num,
                    conf_thresh=cfg.valid_thresh,
                    downsample_ratio=self.downsample,
                    name="yolo_box" + str(i))
D
dengkaipeng 已提交
186
                self.boxes.append(boxes)
u010070587's avatar
u010070587 已提交
187 188 189
                self.scores.append(
                    fluid.layers.transpose(
                        scores, perm=[0, 2, 1]))
D
dengkaipeng 已提交
190

u010070587's avatar
u010070587 已提交
191
            self.downsample //= 2
D
dengkaipeng 已提交
192 193 194 195 196 197 198 199

    def loss(self):
        return sum(self.losses)

    def get_pred(self):
        yolo_boxes = fluid.layers.concat(self.boxes, axis=1)
        yolo_scores = fluid.layers.concat(self.scores, axis=2)
        return fluid.layers.multiclass_nms(
u010070587's avatar
u010070587 已提交
200 201 202 203 204 205 206 207
            bboxes=yolo_boxes,
            scores=yolo_scores,
            score_threshold=cfg.valid_thresh,
            nms_top_k=cfg.nms_topk,
            keep_top_k=cfg.nms_posk,
            nms_threshold=cfg.nms_thresh,
            background_label=-1,
            name="multiclass_nms")