diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt b/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt index f71e04c09aa38b8cf7b3a167b84d4dc0e6cc3ec7..9f57b4ee4a94c97090ea7dd763144350ae3bb85b 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt @@ -4,3 +4,5 @@ string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) + +set_tests_properties(test_tsm PROPERTIES TIMEOUT 900) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py new file mode 100644 index 0000000000000000000000000000000000000000..13a97fb7478db8acaa46db3b7a6c4341997193eb --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_tsm.py @@ -0,0 +1,348 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import argparse +import math +import numpy as np +import os +import random +import sys +import time +import unittest + +import paddle.fluid as fluid +from paddle.fluid.dygraph import declarative, ProgramTranslator, to_variable +from paddle.fluid.dygraph.nn import Conv2D, BatchNorm, Linear, Pool2D +from paddle.fluid.layer_helper import LayerHelper +from tsm_config_utils import * + +random.seed(0) +np.random.seed(0) + + +def parse_args(): + parser = argparse.ArgumentParser("Paddle Video train script") + parser.add_argument( + '--config', + type=str, + default='tsm.yaml', + help='path to config file of model') + parser.add_argument( + '--use_gpu', + type=bool, + default=fluid.is_compiled_with_cuda(), + help='default use gpu.') + args = parser.parse_args(['--config', 'tsm.yaml']) + return args + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=None, + act=None, + param_attr=fluid.param_attr.ParamAttr(), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=fluid.param_attr.ParamAttr(), + bias_attr=fluid.param_attr.ParamAttr()) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + stride, + shortcut=True, + seg_num=8): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu') + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu') + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride) + self.shortcut = shortcut + self.seg_num = seg_num + self._num_channels_out = int(num_filters * 4) + + def forward(self, inputs): + shifts = fluid.layers.temporal_shift(inputs, self.seg_num, 1.0 / 8) + y = self.conv0(shifts) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = fluid.layers.elementwise_add(x=short, y=conv2, act="relu") + return y + + +class TSM_ResNet(fluid.dygraph.Layer): + def __init__(self, name_scope, config, mode): + super(TSM_ResNet, self).__init__(name_scope) + + self.layers = config.MODEL.num_layers + self.seg_num = config.MODEL.seg_num + self.class_dim = config.MODEL.num_classes + self.reshape_list = [ + config.MODEL.seglen * 3, config[mode.upper()]['target_size'], + config[mode.upper()]['target_size'] + ] + + if self.layers == 50: + depth = [3, 4, 6, 3] + else: + raise NotImplementedError + num_filters = [64, 128, 256, 512] + + self.conv = ConvBNLayer( + num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu') + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.bottleneck_block_list = [] + num_channels = 64 + + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut, + seg_num=self.seg_num)) + num_channels = int(bottleneck_block._num_channels_out) + self.bottleneck_block_list.append(bottleneck_block) + shortcut = True + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + import math + stdv = 1.0 / math.sqrt(2048 * 1.0) + + self.out = Linear( + 2048, + self.class_dim, + act="softmax", + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=fluid.param_attr.ParamAttr( + learning_rate=2.0, regularizer=fluid.regularizer.L2Decay(0.))) + + @declarative + def forward(self, inputs): + y = fluid.layers.reshape(inputs, [-1] + self.reshape_list) + y = self.conv(y) + y = self.pool2d_max(y) + for bottleneck_block in self.bottleneck_block_list: + y = bottleneck_block(y) + y = self.pool2d_avg(y) + y = fluid.layers.dropout(y, dropout_prob=0.5) + y = fluid.layers.reshape(y, [-1, self.seg_num, y.shape[1]]) + y = fluid.layers.reduce_mean(y, dim=1) + y = fluid.layers.reshape(y, shape=[-1, 2048]) + y = self.out(y) + return y + + +class FakeDataReader(object): + def __init__(self, mode, cfg): + self.format = cfg.MODEL.format + self.num_classes = cfg.MODEL.num_classes + self.seg_num = cfg.MODEL.seg_num + self.seglen = cfg.MODEL.seglen + + self.target_size = cfg[mode.upper()]['target_size'] + self.img_mean = np.array(cfg.MODEL.image_mean).reshape( + [3, 1, 1]).astype(np.float32) + self.img_std = np.array(cfg.MODEL.image_std).reshape( + [3, 1, 1]).astype(np.float32) + + self.batch_size = cfg[mode.upper()]['batch_size'] + self.generator_out = [] + self.total_iter = 3 + for i in range(self.total_iter): + batch_out = [] + for j in range(self.batch_size): + label = np.int64(random.randint(0, self.num_classes - 1)) + random_mean = self.img_mean[0][0][0] + random_std = self.img_std[0][0][0] + imgs = np.random.normal(random_mean, random_std, [ + self.seg_num, self.seglen * 3, self.target_size, + self.target_size + ]).astype(np.float32) + batch_out.append((imgs, label)) + self.generator_out.append(batch_out) + + def create_reader(self): + def batch_reader(): + for i in range(self.total_iter): + yield self.generator_out[i] + + return batch_reader + + +def create_optimizer(cfg, params): + total_videos = cfg.total_videos + step = int(total_videos / cfg.batch_size + 1) + bd = [e * step for e in cfg.decay_epochs] + base_lr = cfg.learning_rate + lr_decay = cfg.learning_rate_decay + lr = [base_lr, base_lr * lr_decay, base_lr * lr_decay * lr_decay] + l2_weight_decay = cfg.l2_weight_decay + momentum = cfg.momentum + + optimizer = fluid.optimizer.Momentum( + learning_rate=fluid.layers.piecewise_decay( + boundaries=bd, values=lr), + momentum=momentum, + regularization=fluid.regularizer.L2Decay(l2_weight_decay), + parameter_list=params) + + return optimizer + + +def train(args, fake_data_reader, to_static): + program_translator = ProgramTranslator() + program_translator.enable(to_static) + + config = parse_config(args.config) + train_config = merge_configs(config, 'train', vars(args)) + valid_config = merge_configs(config, 'valid', vars(args)) + print_configs(train_config, 'Train') + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + + random.seed(0) + np.random.seed(0) + with fluid.dygraph.guard(place): + fluid.default_startup_program().random_seed = 1000 + fluid.default_main_program().random_seed = 1000 + + video_model = TSM_ResNet("TSM", train_config, 'Train') + + optimizer = create_optimizer(train_config.TRAIN, + video_model.parameters()) + + train_reader = fake_data_reader.create_reader() + + ret = [] + for epoch in range(train_config.TRAIN.epoch): + video_model.train() + total_loss = 0.0 + total_acc1 = 0.0 + total_acc5 = 0.0 + total_sample = 0 + for batch_id, data in enumerate(train_reader()): + x_data = np.array([item[0] for item in data]) + y_data = np.array([item[1] for item in data]).reshape([-1, 1]) + + imgs = to_variable(x_data) + labels = to_variable(y_data) + labels.stop_gradient = True + outputs = video_model(imgs) + loss = fluid.layers.cross_entropy( + input=outputs, label=labels, ignore_index=-1) + avg_loss = fluid.layers.mean(loss) + acc_top1 = fluid.layers.accuracy( + input=outputs, label=labels, k=1) + acc_top5 = fluid.layers.accuracy( + input=outputs, label=labels, k=5) + + avg_loss.backward() + optimizer.minimize(avg_loss) + video_model.clear_gradients() + + total_loss += avg_loss.numpy()[0] + total_acc1 += acc_top1.numpy()[0] + total_acc5 += acc_top5.numpy()[0] + total_sample += 1 + + print('TRAIN Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'. + format(epoch, batch_id, + avg_loss.numpy()[0], + acc_top1.numpy()[0], acc_top5.numpy()[0])) + ret.extend([ + avg_loss.numpy()[0], acc_top1.numpy()[0], + acc_top5.numpy()[0] + ]) + + print( + 'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}'. + format(epoch, total_loss / total_sample, total_acc1 / + total_sample, total_acc5 / total_sample)) + return ret + + +class TestTsm(unittest.TestCase): + def test_dygraph_static_same_loss(self): + if fluid.is_compiled_with_cuda(): + fluid.set_flags({"FLAGS_cudnn_deterministic": True}) + args = parse_args() + fake_data_reader = FakeDataReader("train", parse_config(args.config)) + dygraph_loss = train(args, fake_data_reader, to_static=False) + static_loss = train(args, fake_data_reader, to_static=True) + self.assertTrue( + np.allclose(dygraph_loss, static_loss), + msg="dygraph_loss: {} \nstatic_loss: {}".format(dygraph_loss, + static_loss)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm.yaml b/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b682dbd6fb2019e4a4d52da30b1e69079c07eb2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm.yaml @@ -0,0 +1,43 @@ +MODEL: + name: "TSM" + format: "pkl" + num_classes: 400 + seg_num: 8 + seglen: 1 + image_mean: [0.485, 0.456, 0.406] + image_std: [0.229, 0.224, 0.225] + num_layers: 50 + topk: 5 + +TRAIN: + epoch: 1 + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 4 #128 + use_gpu: True + num_gpus: 1 #8 + filelist: "./data/dataset/kinetics/train.list" + learning_rate: 0.01 + learning_rate_decay: 0.1 + decay_epochs: [40, 60] + l2_weight_decay: 1e-4 + momentum: 0.9 + total_videos: 8000 #239781 + +VALID: + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 32 #128 + filelist: "./data/dataset/kinetics/val.list" + +TEST: + short_size: 256 + target_size: 224 + num_reader_threads: 12 + buf_size: 1024 + batch_size: 64 + filelist: "./data/dataset/kinetics/test.list" diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4fedd1b246b27f6e3ddfd8d12dfcec51e7737e5b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/tsm_config_utils.py @@ -0,0 +1,85 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import yaml +import logging +logger = logging.getLogger(__name__) + +CONFIG_SECS = [ + 'train', + 'valid', + 'test', + 'infer', +] + + +class AttrDict(dict): + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + if key in self.__dict__: + self.__dict__[key] = value + else: + self[key] = value + + +def parse_config(cfg_file): + """Load a config file into AttrDict""" + import yaml + with open(cfg_file, 'r') as fopen: + yaml_config = AttrDict(yaml.load(fopen, Loader=yaml.Loader)) + create_attr_dict(yaml_config) + return yaml_config + + +def create_attr_dict(yaml_config): + from ast import literal_eval + for key, value in yaml_config.items(): + if type(value) is dict: + yaml_config[key] = value = AttrDict(value) + if isinstance(value, str): + try: + value = literal_eval(value) + except BaseException: + pass + if isinstance(value, AttrDict): + create_attr_dict(yaml_config[key]) + else: + yaml_config[key] = value + return + + +def merge_configs(cfg, sec, args_dict): + assert sec in CONFIG_SECS, "invalid config section {}".format(sec) + sec_dict = getattr(cfg, sec.upper()) + for k, v in args_dict.items(): + if v is None: + continue + try: + if hasattr(sec_dict, k): + setattr(sec_dict, k, v) + except: + pass + return cfg + + +def print_configs(cfg, mode): + logger.info("---------------- {:>5} Arguments ----------------".format( + mode)) + for sec, sec_items in cfg.items(): + logger.info("{}:".format(sec)) + for k, v in sec_items.items(): + logger.info(" {}:{}".format(k, v)) + logger.info("-------------------------------------------------")