model.py 11.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.

import paddle
H
huangjun12 已提交
16 17
import paddle.nn.functional as F
from paddle import ParamAttr
18 19 20 21 22 23 24 25 26
import numpy as np
import math

from bmn_utils import get_interp1d_mask

DATATYPE = 'float32'


# Net
H
huangjun12 已提交
27
class Conv1D(paddle.nn.Layer):
28 29 30 31 32 33 34 35 36 37 38 39 40
    def __init__(self,
                 prefix,
                 num_channels=256,
                 num_filters=256,
                 size_k=3,
                 padding=1,
                 groups=1,
                 act="relu"):
        super(Conv1D, self).__init__()
        fan_in = num_channels * size_k * 1
        k = 1. / math.sqrt(fan_in)
        param_attr = ParamAttr(
            name=prefix + "_w",
H
huangjun12 已提交
41
            initializer=paddle.nn.initializer.Uniform(
42 43 44
                low=-k, high=k))
        bias_attr = ParamAttr(
            name=prefix + "_b",
H
huangjun12 已提交
45
            initializer=paddle.nn.initializer.Uniform(
46 47
                low=-k, high=k))

H
huangjun12 已提交
48 49 50 51
        self._conv2d = paddle.nn.Conv2d(
            in_channels=num_channels,
            out_channels=num_filters,
            kernel_size=(1, size_k),
52 53 54
            stride=1,
            padding=(0, padding),
            groups=groups,
H
huangjun12 已提交
55
            weight_attr=param_attr,
56
            bias_attr=bias_attr)
H
huangjun12 已提交
57 58 59 60
        if act == "relu":
            self._act = paddle.nn.ReLU()
        elif act == "sigmoid":
            self._act = paddle.nn.Sigmoid()
61 62

    def forward(self, x):
H
huangjun12 已提交
63
        x = paddle.unsqueeze(x, axis=[2])
64
        x = self._conv2d(x)
H
huangjun12 已提交
65 66
        x = self._act(x)
        x = paddle.squeeze(x, axis=[2])
67 68 69
        return x


H
huangjun12 已提交
70
class BMN(paddle.nn.Layer):
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
    def __init__(self, cfg):
        super(BMN, self).__init__()

        #init config
        self.tscale = cfg.MODEL.tscale
        self.dscale = cfg.MODEL.dscale
        self.prop_boundary_ratio = cfg.MODEL.prop_boundary_ratio
        self.num_sample = cfg.MODEL.num_sample
        self.num_sample_perbin = cfg.MODEL.num_sample_perbin

        self.hidden_dim_1d = 256
        self.hidden_dim_2d = 128
        self.hidden_dim_3d = 512

        # Base Module
        self.b_conv1 = Conv1D(
            prefix="Base_1",
            num_channels=400,
            num_filters=self.hidden_dim_1d,
            size_k=3,
            padding=1,
            groups=4,
            act="relu")
        self.b_conv2 = Conv1D(
            prefix="Base_2",
            num_filters=self.hidden_dim_1d,
            size_k=3,
            padding=1,
            groups=4,
            act="relu")

        # Temporal Evaluation Module
        self.ts_conv1 = Conv1D(
            prefix="TEM_s1",
            num_filters=self.hidden_dim_1d,
            size_k=3,
            padding=1,
            groups=4,
            act="relu")
        self.ts_conv2 = Conv1D(
            prefix="TEM_s2", num_filters=1, size_k=1, padding=0, act="sigmoid")
        self.te_conv1 = Conv1D(
            prefix="TEM_e1",
            num_filters=self.hidden_dim_1d,
            size_k=3,
            padding=1,
            groups=4,
            act="relu")
        self.te_conv2 = Conv1D(
            prefix="TEM_e2", num_filters=1, size_k=1, padding=0, act="sigmoid")

        #Proposal Evaluation Module
        self.p_conv1 = Conv1D(
            prefix="PEM_1d",
            num_filters=self.hidden_dim_2d,
            size_k=3,
            padding=1,
            act="relu")

        # init to speed up
        sample_mask = get_interp1d_mask(self.tscale, self.dscale,
                                        self.prop_boundary_ratio,
                                        self.num_sample, self.num_sample_perbin)
H
huangjun12 已提交
134
        self.sample_mask = paddle.to_tensor(sample_mask)
135 136
        self.sample_mask.stop_gradient = True

H
huangjun12 已提交
137 138 139 140
        self.p_conv3d1 = paddle.nn.Conv3d(
            in_channels=128,
            out_channels=self.hidden_dim_3d,
            kernel_size=(self.num_sample, 1, 1),
141 142
            stride=(self.num_sample, 1, 1),
            padding=0,
H
huangjun12 已提交
143
            weight_attr=ParamAttr(name="PEM_3d1_w"),
144
            bias_attr=ParamAttr(name="PEM_3d1_b"))
H
huangjun12 已提交
145
        self.p_conv3d1_act = paddle.nn.ReLU()
146

H
huangjun12 已提交
147 148 149 150
        self.p_conv2d1 = paddle.nn.Conv2d(
            in_channels=512,
            out_channels=self.hidden_dim_2d,
            kernel_size=1,
151 152
            stride=1,
            padding=0,
H
huangjun12 已提交
153
            weight_attr=ParamAttr(name="PEM_2d1_w"),
154
            bias_attr=ParamAttr(name="PEM_2d1_b"))
H
huangjun12 已提交
155 156 157 158 159 160
        self.p_conv2d1_act = paddle.nn.ReLU()

        self.p_conv2d2 = paddle.nn.Conv2d(
            in_channels=128,
            out_channels=self.hidden_dim_2d,
            kernel_size=3,
161 162
            stride=1,
            padding=1,
H
huangjun12 已提交
163
            weight_attr=ParamAttr(name="PEM_2d2_w"),
164
            bias_attr=ParamAttr(name="PEM_2d2_b"))
H
huangjun12 已提交
165 166 167 168 169 170
        self.p_conv2d2_act = paddle.nn.ReLU()

        self.p_conv2d3 = paddle.nn.Conv2d(
            in_channels=128,
            out_channels=self.hidden_dim_2d,
            kernel_size=3,
171 172
            stride=1,
            padding=1,
H
huangjun12 已提交
173
            weight_attr=ParamAttr(name="PEM_2d3_w"),
174
            bias_attr=ParamAttr(name="PEM_2d3_b"))
H
huangjun12 已提交
175 176 177 178 179 180
        self.p_conv2d3_act = paddle.nn.ReLU()

        self.p_conv2d4 = paddle.nn.Conv2d(
            in_channels=128,
            out_channels=2,
            kernel_size=1,
181 182
            stride=1,
            padding=0,
H
huangjun12 已提交
183
            weight_attr=ParamAttr(name="PEM_2d4_w"),
184
            bias_attr=ParamAttr(name="PEM_2d4_b"))
H
huangjun12 已提交
185
        self.p_conv2d4_act = paddle.nn.Sigmoid()
186 187 188 189 190 191 192 193 194

    def forward(self, x):
        #Base Module
        x = self.b_conv1(x)
        x = self.b_conv2(x)

        #TEM
        xs = self.ts_conv1(x)
        xs = self.ts_conv2(xs)
H
huangjun12 已提交
195
        xs = paddle.squeeze(xs, axis=[1])
196 197
        xe = self.te_conv1(x)
        xe = self.te_conv2(xe)
H
huangjun12 已提交
198
        xe = paddle.squeeze(xe, axis=[1])
199 200 201 202

        #PEM
        xp = self.p_conv1(x)
        #BM layer
H
huangjun12 已提交
203 204
        xp = paddle.matmul(xp, self.sample_mask)
        xp = paddle.reshape(xp, shape=[0, 0, -1, self.dscale, self.tscale])
205 206

        xp = self.p_conv3d1(xp)
H
huangjun12 已提交
207 208
        xp = self.p_conv3d1_act(xp)
        xp = paddle.squeeze(xp, axis=[2])
209
        xp = self.p_conv2d1(xp)
H
huangjun12 已提交
210
        xp = self.p_conv2d1_act(xp)
211
        xp = self.p_conv2d2(xp)
H
huangjun12 已提交
212
        xp = self.p_conv2d2_act(xp)
213
        xp = self.p_conv2d3(xp)
H
huangjun12 已提交
214
        xp = self.p_conv2d3_act(xp)
215
        xp = self.p_conv2d4(xp)
H
huangjun12 已提交
216
        xp = self.p_conv2d4_act(xp)
217 218 219 220 221 222 223 224 225 226 227 228 229 230
        return xp, xs, xe


def bmn_loss_func(pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end,
                  cfg):
    def _get_mask(cfg):
        dscale = cfg.MODEL.dscale
        tscale = cfg.MODEL.tscale
        bm_mask = []
        for idx in range(dscale):
            mask_vector = [1 for i in range(tscale - idx)
                           ] + [0 for i in range(idx)]
            bm_mask.append(mask_vector)
        bm_mask = np.array(bm_mask, dtype=np.float32)
H
huangjun12 已提交
231 232 233
        bm_mask = paddle.to_tensor(bm_mask)
        bm_mask.stop_gradient = True
        return bm_mask
234 235 236

    def tem_loss_func(pred_start, pred_end, gt_start, gt_end):
        def bi_loss(pred_score, gt_label):
H
huangjun12 已提交
237 238
            pred_score = paddle.reshape(x=pred_score, shape=[-1])
            gt_label = paddle.reshape(x=gt_label, shape=[-1])
239
            gt_label.stop_gradient = True
H
huangjun12 已提交
240 241 242
            pmask = paddle.cast(x=(gt_label > 0.5), dtype=DATATYPE)
            num_entries = paddle.cast(paddle.shape(pmask), dtype=DATATYPE)
            num_positive = paddle.cast(paddle.reduce_sum(pmask), dtype=DATATYPE)
243 244 245 246
            ratio = num_entries / num_positive
            coef_0 = 0.5 * ratio / (ratio - 1)
            coef_1 = 0.5 * ratio
            epsilon = 0.000001
H
huangjun12 已提交
247 248 249 250 251 252
            temp = paddle.log(pred_score + epsilon)
            loss_pos = paddle.multiply(paddle.log(pred_score + epsilon), pmask)
            loss_pos = coef_1 * paddle.reduce_mean(loss_pos)
            loss_neg = paddle.multiply(
                paddle.log(1.0 - pred_score + epsilon), (1.0 - pmask))
            loss_neg = coef_0 * paddle.reduce_mean(loss_neg)
253 254 255 256 257 258 259 260 261 262
            loss = -1 * (loss_pos + loss_neg)
            return loss

        loss_start = bi_loss(pred_start, gt_start)
        loss_end = bi_loss(pred_end, gt_end)
        loss = loss_start + loss_end
        return loss

    def pem_reg_loss_func(pred_score, gt_iou_map, mask):

H
huangjun12 已提交
263
        gt_iou_map = paddle.multiply(gt_iou_map, mask)
264

H
huangjun12 已提交
265 266 267 268 269 270
        u_hmask = paddle.cast(x=gt_iou_map > 0.7, dtype=DATATYPE)
        u_mmask = paddle.logical_and(gt_iou_map <= 0.7, gt_iou_map > 0.3)
        u_mmask = paddle.cast(x=u_mmask, dtype=DATATYPE)
        u_lmask = paddle.logical_and(gt_iou_map <= 0.3, gt_iou_map >= 0.)
        u_lmask = paddle.cast(x=u_lmask, dtype=DATATYPE)
        u_lmask = paddle.multiply(u_lmask, mask)
271

H
huangjun12 已提交
272 273 274
        num_h = paddle.cast(paddle.reduce_sum(u_hmask), dtype=DATATYPE)
        num_m = paddle.cast(paddle.reduce_sum(u_mmask), dtype=DATATYPE)
        num_l = paddle.cast(paddle.reduce_sum(u_lmask), dtype=DATATYPE)
275 276

        r_m = num_h / num_m
H
huangjun12 已提交
277
        u_smmask = paddle.uniform(
278 279 280 281
            shape=[gt_iou_map.shape[1], gt_iou_map.shape[2]],
            dtype=DATATYPE,
            min=0.0,
            max=1.0)
H
huangjun12 已提交
282 283
        u_smmask = paddle.multiply(u_mmask, u_smmask)
        u_smmask = paddle.cast(x=(u_smmask > (1. - r_m)), dtype=DATATYPE)
284 285

        r_l = num_h / num_l
H
huangjun12 已提交
286
        u_slmask = paddle.uniform(
287 288 289 290
            shape=[gt_iou_map.shape[1], gt_iou_map.shape[2]],
            dtype=DATATYPE,
            min=0.0,
            max=1.0)
H
huangjun12 已提交
291 292
        u_slmask = paddle.multiply(u_lmask, u_slmask)
        u_slmask = paddle.cast(x=(u_slmask > (1. - r_l)), dtype=DATATYPE)
293 294 295

        weights = u_hmask + u_smmask + u_slmask
        weights.stop_gradient = True
H
huangjun12 已提交
296 297 298
        loss = F.square_error_cost(pred_score, gt_iou_map)
        loss = paddle.multiply(loss, weights)
        loss = 0.5 * paddle.reduce_sum(loss) / paddle.reduce_sum(weights)
299 300 301 302

        return loss

    def pem_cls_loss_func(pred_score, gt_iou_map, mask):
H
huangjun12 已提交
303
        gt_iou_map = paddle.multiply(gt_iou_map, mask)
304
        gt_iou_map.stop_gradient = True
H
huangjun12 已提交
305 306 307
        pmask = paddle.cast(x=(gt_iou_map > 0.9), dtype=DATATYPE)
        nmask = paddle.cast(x=(gt_iou_map <= 0.9), dtype=DATATYPE)
        nmask = paddle.multiply(nmask, mask)
308

H
huangjun12 已提交
309 310
        num_positive = paddle.reduce_sum(pmask)
        num_entries = num_positive + paddle.reduce_sum(nmask)
311 312 313 314
        ratio = num_entries / num_positive
        coef_0 = 0.5 * ratio / (ratio - 1)
        coef_1 = 0.5 * ratio
        epsilon = 0.000001
H
huangjun12 已提交
315 316 317 318 319
        loss_pos = paddle.multiply(paddle.log(pred_score + epsilon), pmask)
        loss_pos = coef_1 * paddle.reduce_sum(loss_pos)
        loss_neg = paddle.multiply(
            paddle.log(1.0 - pred_score + epsilon), nmask)
        loss_neg = coef_0 * paddle.reduce_sum(loss_neg)
320 321 322
        loss = -1 * (loss_pos + loss_neg) / num_entries
        return loss

H
huangjun12 已提交
323 324 325 326 327 328
    pred_bm_reg = paddle.squeeze(
        paddle.slice(
            pred_bm, axes=[1], starts=[0], ends=[1]), axis=[1])
    pred_bm_cls = paddle.squeeze(
        paddle.slice(
            pred_bm, axes=[1], starts=[1], ends=[2]), axis=[1])
329 330 331 332 333 334 335 336 337 338

    bm_mask = _get_mask(cfg)

    pem_reg_loss = pem_reg_loss_func(pred_bm_reg, gt_iou_map, bm_mask)
    pem_cls_loss = pem_cls_loss_func(pred_bm_cls, gt_iou_map, bm_mask)

    tem_loss = tem_loss_func(pred_start, pred_end, gt_start, gt_end)

    loss = tem_loss + 10 * pem_reg_loss + pem_cls_loss
    return loss, tem_loss, pem_reg_loss, pem_cls_loss