icnet.py 6.7 KB
Newer Older
W
wuzewu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
# coding: utf8
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle.fluid as fluid
from utils.config import cfg
from models.libs.model_libs import scope
from models.libs.model_libs import bn, avg_pool, conv
from models.backbone.resnet import ResNet as resnet_backbone
import numpy as np


def interp(input, out_shape):
    out_shape = list(out_shape.astype("int32"))
    return fluid.layers.resize_bilinear(input, out_shape=out_shape)


def pyramis_pooling(input, input_shape):
    shape = np.ceil(input_shape / 32).astype("int32")
    h, w = shape
    pool1 = avg_pool(input, [h, w], [h, w])
    pool1_interp = interp(pool1, shape)
    pool2 = avg_pool(input, [h // 2, w // 2], [h // 2, w // 2])
    pool3 = avg_pool(input, [h // 3, w // 3], [h // 3, w // 3])
    pool4 = avg_pool(input, [h // 4, w // 4], [h // 4, w // 4])
    # official caffe repo eval use following hyparam
    # pool2 = avg_pool(input, [17, 33], [16, 32])
    # pool3 = avg_pool(input, [13, 25], [10, 20])
    # pool4 = avg_pool(input, [8, 15], [5, 10])
    pool2_interp = interp(pool2, shape)
    pool3_interp = interp(pool3, shape)
    pool4_interp = interp(pool4, shape)
    conv5_3_sum = input + pool4_interp + pool3_interp + pool2_interp + pool1_interp
    return conv5_3_sum


def zero_padding(input, padding):
    return fluid.layers.pad(input,
                            [0, 0, 0, 0, padding, padding, padding, padding])


def sub_net_4(input, input_shape):
    tmp = pyramis_pooling(input, input_shape)
    with scope("conv5_4_k1"):
        tmp = conv(tmp, 256, 1, 1)
        tmp = bn(tmp, act='relu')
    tmp = interp(tmp, out_shape=np.ceil(input_shape / 16))
    return tmp


def sub_net_2(input):
    with scope("conv3_1_sub2_proj"):
        tmp = conv(input, 128, 1, 1)
        tmp = bn(tmp)
    return tmp


def sub_net_1(input):
    with scope("conv1_sub1"):
        tmp = conv(input, 32, 3, 2, padding=1)
        tmp = bn(tmp, act='relu')
    with scope("conv2_sub1"):
        tmp = conv(tmp, 32, 3, 2, padding=1)
        tmp = bn(tmp, act='relu')
    with scope("conv3_sub1"):
        tmp = conv(tmp, 64, 3, 2, padding=1)
        tmp = bn(tmp, act='relu')
    with scope("conv3_sub1_proj"):
        tmp = conv(tmp, 128, 1, 1)
        tmp = bn(tmp)
    return tmp


def CCF24(sub2_out, sub4_out, input_shape):
    with scope("conv_sub4"):
        tmp = conv(sub4_out, 128, 3, dilation=2, padding=2)
        tmp = bn(tmp)
    tmp = tmp + sub2_out
    tmp = fluid.layers.relu(tmp)
    tmp = interp(tmp, np.ceil(input_shape / 8))
    return tmp


def CCF124(sub1_out, sub24_out, input_shape):
    tmp = zero_padding(sub24_out, padding=2)
    with scope("conv_sub2"):
        tmp = conv(tmp, 128, 3, dilation=2)
        tmp = bn(tmp)
    tmp = tmp + sub1_out
    tmp = fluid.layers.relu(tmp)
    tmp = interp(tmp, input_shape // 4)
    return tmp


def resnet(input):
    # ICNET backbone: resnet, 默认resnet50
    # end_points: resnet终止层数
    # decode_point: backbone引出分支所在层数
    # resize_point:backbone所在的该层卷积尺寸缩小至1/2
    # dilation_dict: resnet block数及对应的膨胀卷积尺度
    scale = cfg.MODEL.ICNET.DEPTH_MULTIPLIER
    layers = cfg.MODEL.ICNET.LAYERS
    model = resnet_backbone(scale=scale, layers=layers, stem='icnet')
    end_points = 49
    decode_point = 13
    resize_point = 13
    dilation_dict = {2: 2, 3: 4}
    data, decode_shortcuts = model.net(
        input,
        end_points=end_points,
        decode_points=decode_point,
        resize_points=resize_point,
        dilation_dict=dilation_dict)
    return data, decode_shortcuts[decode_point]


def encoder(data13, data49, input, input_shape):
    # ICENT encoder配置
    # sub_net_4:对resnet49层数据进行pyramis_pooling操作
    # sub_net_2:对resnet13层数据进行卷积操作
    # sub_net_1: 对原始尺寸图像进行3次下采样卷积操作
    sub4_out = sub_net_4(data49, input_shape)
    sub2_out = sub_net_2(data13)
    sub1_out = sub_net_1(input)
    return sub1_out, sub2_out, sub4_out


def decoder(sub1_out, sub2_out, sub4_out, input_shape):
    # ICENT decoder配置
    # CCF: Cascade Feature Fusion 级联特征融合
    sub24_out = CCF24(sub2_out, sub4_out, input_shape)
    sub124_out = CCF124(sub1_out, sub24_out, input_shape)
    return sub24_out, sub124_out


def get_logit(data, num_classes, name="logit"):
    param_attr = fluid.ParamAttr(
        name=name + 'weights',
        regularizer=fluid.regularizer.L2DecayRegularizer(
            regularization_coeff=0.0),
        initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.01))

    with scope(name):
        data = conv(
            data,
            num_classes,
            1,
            stride=1,
            padding=0,
            param_attr=param_attr,
            bias_attr=True)
    return data


def icnet(input, num_classes):
    # Backbone resnet: 输入 image_sub2: 图片尺寸缩小至1/2
    #                  输出 data49: resnet第49层数据,原始尺寸1/32
    #                       data13:resnet第13层数据, 原始尺寸1/16
    input_shape = input.shape[2:]
    input_shape = np.array(input_shape).astype("float32")
    image_sub2 = interp(input, out_shape=np.ceil(input_shape * 0.5))
    data49, data13 = resnet(image_sub2)

    # encoder:输入:input, data13, data49,分别进行下采样,卷积和金字塔pooling操作
    #          输出:分别对应sub1_out, sub2_out, sub4_out
    sub1_out, sub2_out, sub4_out = encoder(data13, data49, input, input_shape)

    # decoder: 对编码器三个分支结果进行级联特征融合
    sub24_out, sub124_out = decoder(sub1_out, sub2_out, sub4_out, input_shape)

    # get_logit: 根据类别数决定最后一层卷积输出
    logit124 = get_logit(sub124_out, num_classes, "logit124")
    logit4 = get_logit(sub4_out, num_classes, "logit4")
    logit24 = get_logit(sub24_out, num_classes, "logit24")
    return logit124, logit24, logit4


if __name__ == '__main__':
    image_shape = [3, 320, 320]
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    logit = icnet(image, 4)
    print("logit:", logit.shape)