fpn.py 9.2 KB
Newer Older
Q
qingqing01 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
#   
# Licensed under the Apache License, Version 2.0 (the "License");   
# you may not use this file except in compliance with the License.  
# You may obtain a copy of the License at   
#   
#     http://www.apache.org/licenses/LICENSE-2.0    
#   
# Unless required by applicable law or agreed to in writing, software   
# distributed under the License is distributed on an "AS IS" BASIS, 
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
# See the License for the specific language governing permissions and   
# limitations under the License.

F
Feng Ni 已提交
15
import paddle.nn as nn
Q
qingqing01 已提交
16 17 18
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import XavierUniform
M
Manuel Garcia 已提交
19

Q
qingqing01 已提交
20
from ppdet.core.workspace import register, serializable
F
Feng Ni 已提交
21
from ppdet.modeling.layers import ConvNormLayer
22
from ..shape_spec import ShapeSpec
Q
qingqing01 已提交
23

24 25
__all__ = ['FPN']

Q
qingqing01 已提交
26 27 28

@register
@serializable
F
Feng Ni 已提交
29
class FPN(nn.Layer):
W
wangguanzhong 已提交
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
    """
    Feature Pyramid Network, see https://arxiv.org/abs/1612.03144

    Args:
        in_channels (list[int]): input channels of each level which can be 
            derived from the output shape of backbone by from_config
        out_channel (list[int]): output channel of each level
        spatial_scales (list[float]): the spatial scales between input feature
            maps and original input image which can be derived from the output 
            shape of backbone by from_config
        has_extra_convs (bool): whether to add extra conv to the last level.
            default False
        extra_stage (int): the number of extra stages added to the last level.
            default 1
        use_c5 (bool): Whether to use c5 as the input of extra stage, 
            otherwise p5 is used. default True
        norm_type (string|None): The normalization type in FPN module. If 
            norm_type is None, norm will not be used after conv and if 
            norm_type is string, bn, gn, sync_bn are available. default None
        norm_decay (float): weight decay for normalization layer weights.
            default 0.
        freeze_norm (bool): whether to freeze normalization layer.  
            default False
        relu_before_extra_convs (bool): whether to add relu before extra convs.
            default False
        
    """

Q
qingqing01 已提交
58 59 60
    def __init__(self,
                 in_channels,
                 out_channel,
61
                 spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
F
Feng Ni 已提交
62
                 has_extra_convs=False,
63
                 extra_stage=1,
F
Feng Ni 已提交
64
                 use_c5=True,
F
Feng Ni 已提交
65 66 67
                 norm_type=None,
                 norm_decay=0.,
                 freeze_norm=False,
F
Feng Ni 已提交
68
                 relu_before_extra_convs=True):
Q
qingqing01 已提交
69
        super(FPN, self).__init__()
70 71 72 73
        self.out_channel = out_channel
        for s in range(extra_stage):
            spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
        self.spatial_scales = spatial_scales
F
Feng Ni 已提交
74
        self.has_extra_convs = has_extra_convs
75
        self.extra_stage = extra_stage
F
Feng Ni 已提交
76 77
        self.use_c5 = use_c5
        self.relu_before_extra_convs = relu_before_extra_convs
F
Feng Ni 已提交
78 79 80
        self.norm_type = norm_type
        self.norm_decay = norm_decay
        self.freeze_norm = freeze_norm
F
Feng Ni 已提交
81

Q
qingqing01 已提交
82 83 84 85
        self.lateral_convs = []
        self.fpn_convs = []
        fan = out_channel * 3 * 3

F
Feng Ni 已提交
86 87 88 89 90
        # stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
        # 0 <= st_stage < ed_stage <= 3
        st_stage = 4 - len(in_channels)
        ed_stage = st_stage + len(in_channels) - 1
        for i in range(st_stage, ed_stage + 1):
Q
qingqing01 已提交
91 92 93 94
            if i == 3:
                lateral_name = 'fpn_inner_res5_sum'
            else:
                lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
F
Feng Ni 已提交
95
            in_c = in_channels[i - st_stage]
W
wangguanzhong 已提交
96
            if self.norm_type is not None:
F
Feng Ni 已提交
97 98 99 100 101 102 103 104 105 106
                lateral = self.add_sublayer(
                    lateral_name,
                    ConvNormLayer(
                        ch_in=in_c,
                        ch_out=out_channel,
                        filter_size=1,
                        stride=1,
                        norm_type=self.norm_type,
                        norm_decay=self.norm_decay,
                        freeze_norm=self.freeze_norm,
107
                        initializer=XavierUniform(fan_out=in_c)))
F
Feng Ni 已提交
108 109 110 111 112 113 114 115 116
            else:
                lateral = self.add_sublayer(
                    lateral_name,
                    nn.Conv2D(
                        in_channels=in_c,
                        out_channels=out_channel,
                        kernel_size=1,
                        weight_attr=ParamAttr(
                            initializer=XavierUniform(fan_out=in_c))))
Q
qingqing01 已提交
117 118 119
            self.lateral_convs.append(lateral)

            fpn_name = 'fpn_res{}_sum'.format(i + 2)
W
wangguanzhong 已提交
120
            if self.norm_type is not None:
F
Feng Ni 已提交
121 122 123 124 125 126 127 128 129 130
                fpn_conv = self.add_sublayer(
                    fpn_name,
                    ConvNormLayer(
                        ch_in=out_channel,
                        ch_out=out_channel,
                        filter_size=3,
                        stride=1,
                        norm_type=self.norm_type,
                        norm_decay=self.norm_decay,
                        freeze_norm=self.freeze_norm,
131
                        initializer=XavierUniform(fan_out=fan)))
F
Feng Ni 已提交
132 133 134 135 136 137 138 139 140 141
            else:
                fpn_conv = self.add_sublayer(
                    fpn_name,
                    nn.Conv2D(
                        in_channels=out_channel,
                        out_channels=out_channel,
                        kernel_size=3,
                        padding=1,
                        weight_attr=ParamAttr(
                            initializer=XavierUniform(fan_out=fan))))
Q
qingqing01 已提交
142 143
            self.fpn_convs.append(fpn_conv)

F
Feng Ni 已提交
144
        # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
145
        if self.has_extra_convs:
F
Feng Ni 已提交
146 147 148
            for i in range(self.extra_stage):
                lvl = ed_stage + 1 + i
                if i == 0 and self.use_c5:
149
                    in_c = in_channels[-1]
F
Feng Ni 已提交
150 151 152
                else:
                    in_c = out_channel
                extra_fpn_name = 'fpn_{}'.format(lvl + 2)
W
wangguanzhong 已提交
153
                if self.norm_type is not None:
F
Feng Ni 已提交
154 155 156 157 158 159 160 161 162 163
                    extra_fpn_conv = self.add_sublayer(
                        extra_fpn_name,
                        ConvNormLayer(
                            ch_in=in_c,
                            ch_out=out_channel,
                            filter_size=3,
                            stride=2,
                            norm_type=self.norm_type,
                            norm_decay=self.norm_decay,
                            freeze_norm=self.freeze_norm,
164
                            initializer=XavierUniform(fan_out=fan)))
F
Feng Ni 已提交
165 166 167 168 169 170 171 172 173 174 175
                else:
                    extra_fpn_conv = self.add_sublayer(
                        extra_fpn_name,
                        nn.Conv2D(
                            in_channels=in_c,
                            out_channels=out_channel,
                            kernel_size=3,
                            stride=2,
                            padding=1,
                            weight_attr=ParamAttr(
                                initializer=XavierUniform(fan_out=fan))))
F
Feng Ni 已提交
176 177
                self.fpn_convs.append(extra_fpn_conv)

178 179 180 181 182 183 184
    @classmethod
    def from_config(cls, cfg, input_shape):
        return {
            'in_channels': [i.channels for i in input_shape],
            'spatial_scales': [1.0 / i.stride for i in input_shape],
        }

Q
qingqing01 已提交
185 186
    def forward(self, body_feats):
        laterals = []
187 188
        num_levels = len(body_feats)
        for i in range(num_levels):
F
Feng Ni 已提交
189
            laterals.append(self.lateral_convs[i](body_feats[i]))
Q
qingqing01 已提交
190

191 192
        for i in range(1, num_levels):
            lvl = num_levels - i
Q
qingqing01 已提交
193
            upsample = F.interpolate(
194
                laterals[lvl],
Q
qingqing01 已提交
195 196
                scale_factor=2.,
                mode='nearest', )
197
            laterals[lvl - 1] += upsample
Q
qingqing01 已提交
198 199

        fpn_output = []
200 201
        for lvl in range(num_levels):
            fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
Q
qingqing01 已提交
202

203
        if self.extra_stage > 0:
F
Feng Ni 已提交
204 205
            # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
            if not self.has_extra_convs:
206
                assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
F
Feng Ni 已提交
207 208 209 210 211 212 213
                fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
            # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
            else:
                if self.use_c5:
                    extra_source = body_feats[-1]
                else:
                    extra_source = fpn_output[-1]
214 215 216
                fpn_output.append(self.fpn_convs[num_levels](extra_source))

                for i in range(1, self.extra_stage):
F
Feng Ni 已提交
217
                    if self.relu_before_extra_convs:
218 219
                        fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
                            fpn_output[-1])))
F
Feng Ni 已提交
220
                    else:
221 222 223 224 225 226 227 228 229 230 231
                        fpn_output.append(self.fpn_convs[num_levels + i](
                            fpn_output[-1]))
        return fpn_output

    @property
    def out_shape(self):
        return [
            ShapeSpec(
                channels=self.out_channel, stride=1. / s)
            for s in self.spatial_scales
        ]