diff --git a/models/tsm.py b/models/tsm.py new file mode 100644 index 0000000000000000000000000000000000000000..91acd16b288e7e0803e0448f0e93a484b0b92c17 --- /dev/null +++ b/models/tsm.py @@ -0,0 +1,204 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import math +import paddle.fluid as fluid +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear + +from model import Model +from .download import get_weights_path + +__all__ = ["TSM_ResNet", "tsm_resnet50"] + +# {num_layers: (url, md5)} +pretrain_infos = { + 50: ('https://paddlemodels.bj.bcebos.com/hapi/tsm_resnet50.pdparams', + '5755dc538e422589f417f7b38d7cc3c7') +} + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=None, + act=None, + param_attr=fluid.param_attr.ParamAttr(), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=fluid.param_attr.ParamAttr(), + bias_attr=fluid.param_attr.ParamAttr()) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + seg_num=8): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu') + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu') + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride) + self.shortcut = shortcut + self.seg_num = seg_num + self._num_channels_out = int(num_filters * 4) + + def forward(self, inputs): + shifts = fluid.layers.temporal_shift(inputs, self.seg_num, 1.0 / 8) + y = self.conv0(shifts) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=conv2, act="relu") + return y + + +class TSM_ResNet(Model): + """ + TSM network with ResNet as backbone + + Args: + num_layers (int): ResNet layer number, only support 50 currently. + Default 50. + seg_num (int): segment number of each video sample. Default 8. + num_classes (int): video class number. Default 400. + """ + def __init__(self, num_layers=50, seg_num=8, num_classes=400): + super(TSM_ResNet, self).__init__() + + self.layers = num_layers + self.seg_num = seg_num + self.class_dim = num_classes + + if self.layers == 50: + depth = [3, 4, 6, 3] + else: + raise NotImplementedError + num_filters = [64, 128, 256, 512] + + self.conv = ConvBNLayer( + num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.bottleneck_block_list = [] + num_channels = 64 + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + seg_num=self.seg_num)) + num_channels = int(bottleneck_block._num_channels_out) + self.bottleneck_block_list.append(bottleneck_block) + shortcut = True + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + stdv = 1.0 / math.sqrt(2048 * 1.0) + + self.out = Linear( + 2048, + self.class_dim, + act="softmax", + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=fluid.param_attr.ParamAttr( + learning_rate=2.0, regularizer=fluid.regularizer.L2Decay(0.))) + + def forward(self, inputs): + y = fluid.layers.reshape( + inputs, [-1, inputs.shape[2], inputs.shape[3], inputs.shape[4]]) + y = self.conv(y) + y = self.pool2d_max(y) + for bottleneck_block in self.bottleneck_block_list: + y = bottleneck_block(y) + y = self.pool2d_avg(y) + y = fluid.layers.dropout(y, dropout_prob=0.5) + y = fluid.layers.reshape(y, [-1, self.seg_num, y.shape[1]]) + y = fluid.layers.reduce_mean(y, dim=1) + y = fluid.layers.reshape(y, shape=[-1, 2048]) + y = self.out(y) + return y + + +def _tsm_resnet(num_layers, seg_num=8, num_classes=400, pretrained=True): + model = TSM_ResNet(num_layers, seg_num, num_classes) + if pretrained: + assert num_layers in pretrain_infos.keys(), \ + "TSM-ResNet{} do not have pretrained weights now, " \ + "pretrained should be set as False".format(num_layers) + weight_path = get_weights_path(*(pretrain_infos[num_layers])) + assert weight_path.endswith('.pdparams'), \ + "suffix of weight must be .pdparams" + model.load(weight_path[:-9]) + return model + + +def tsm_resnet50(seg_num=8, num_classes=400, pretrained=True): + return _tsm_resnet(50, seg_num, num_classes, pretrained) diff --git a/yolov3/dataset/download_voc.py b/yolov3/dataset/download_voc.py index 0d4e3cf368ca446f989f19449adf2775d741fe7f..8b064ed4034e5fa1471c8094a78266d531d9c111 100644 --- a/yolov3/dataset/download_voc.py +++ b/yolov3/dataset/download_voc.py @@ -17,7 +17,7 @@ import os.path as osp import sys import tarfile -from download import _download +from models.download import _download import logging logger = logging.getLogger(__name__)