From 9ec4ae1e9ec14fc40e6f4eabd810ed1b28887363 Mon Sep 17 00:00:00 2001 From: shippingwang Date: Thu, 24 Sep 2020 11:27:39 +0000 Subject: [PATCH] upgrade to API2.0 --- dygraph/tsn/model.py | 165 +++++++++++++-------------- dygraph/tsn/train.py | 266 +++++++++++++++++++++---------------------- 2 files changed, 211 insertions(+), 220 deletions(-) diff --git a/dygraph/tsn/model.py b/dygraph/tsn/model.py index ecd65ac1..5a7d4b01 100644 --- a/dygraph/tsn/model.py +++ b/dygraph/tsn/model.py @@ -18,91 +18,92 @@ from __future__ import print_function import numpy as np import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout - import math +from paddle.nn import Conv2d, BatchNorm2d, Linear, Dropout, MaxPool2d, AvgPool2d +from paddle import ParamAttr +import paddle.nn.functional as F +from paddle.jit import to_static +from paddle.static import InputSpec - -class ConvBNLayer(fluid.dygraph.Layer): +class ConvBNLayer(paddle.nn.Layer): def __init__(self, - num_channels, - num_filters, - filter_size, + in_channels, + out_channels, + kernel_size, stride=1, groups=1, act=None, name=None): super(ConvBNLayer, self).__init__() - self._conv = Conv2D( - num_channels=num_channels, - num_filters=num_filters, - filter_size=filter_size, + self._conv = Conv2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, stride=stride, - padding=(filter_size - 1) // 2, + padding=(kernel_size - 1) // 2, groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights"), + weight_attr=ParamAttr(name=name + "_weights"), bias_attr=False) if name == "conv1": bn_name = "bn_" + name else: bn_name = "bn" + name[3:] - self._batch_norm = BatchNorm( - num_filters, - act=act, - param_attr=ParamAttr(name=bn_name + "_scale"), - bias_attr=ParamAttr(bn_name + "_offset"), - moving_mean_name=bn_name + "_mean", - moving_variance_name=bn_name + "_variance") + + self._act = act + + self._batch_norm = BatchNorm2d( + out_channels, + weight_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(bn_name + "_offset")) def forward(self, inputs): y = self._conv(inputs) + y = self._batch_norm(y) + if self._act: + y = getattr(paddle.nn.functional, self._act)(y) return y -class BottleneckBlock(fluid.dygraph.Layer): +class BottleneckBlock(paddle.nn.Layer): def __init__(self, - num_channels, - num_filters, + in_channels, + out_channels, stride, shortcut=True, name=None): super(BottleneckBlock, self).__init__() self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, - filter_size=1, + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, act="relu", name=name + "_branch2a") self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, - filter_size=3, + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, stride=stride, act="relu", name=name + "_branch2b") + self.conv2 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters * 4, - filter_size=1, + in_channels=out_channels, + out_channels=out_channels * 4, + kernel_size=1, act=None, name=name + "_branch2c") if not shortcut: self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters * 4, - filter_size=1, + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, stride=stride, name=name + "_branch1") self.shortcut = shortcut - self._num_channels_out = num_filters * 4 def forward(self, inputs): y = self.conv0(inputs) @@ -114,37 +115,37 @@ class BottleneckBlock(fluid.dygraph.Layer): else: short = self.short(inputs) - y = fluid.layers.elementwise_add(x=short, y=conv2) - return fluid.layers.relu(y) + y = paddle.add(x=short, y=conv2) + return F.relu(y) -class BasicBlock(fluid.dygraph.Layer): +class BasicBlock(paddle.nn.Layer): def __init__(self, - num_channels, - num_filters, + in_channels, + out_channels, stride, shortcut=True, name=None): super(BasicBlock, self).__init__() self.stride = stride self.conv0 = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, + in_channels=in_channels, + out_channels=out_channels, filter_size=3, stride=stride, act="relu", name=name + "_branch2a") self.conv1 = ConvBNLayer( - num_channels=num_filters, - num_filters=num_filters, + in_channels=out_channels, + out_channels=out_channels, filter_size=3, act=None, name=name + "_branch2b") if not shortcut: self.short = ConvBNLayer( - num_channels=num_channels, - num_filters=num_filters, + in_channels=in_channels, + out_channels=out_channels, filter_size=1, stride=stride, name=name + "_branch1") @@ -159,13 +160,11 @@ class BasicBlock(fluid.dygraph.Layer): short = inputs else: short = self.short(inputs) - y = fluid.layers.elementwise_add(x=short, y=conv1) - - layer_helper = LayerHelper(self.full_name(), act="relu") - return layer_helper.append_activation(y) + y = paddle.add(short, conv1) + y = F.relu(y) + return y - -class TSN_ResNet(fluid.dygraph.Layer): +class TSN_ResNet(paddle.nn.Layer): def __init__(self, config): super(TSN_ResNet, self).__init__() self.layers = config.MODEL.num_layers @@ -184,19 +183,19 @@ class TSN_ResNet(fluid.dygraph.Layer): depth = [3, 4, 23, 3] elif self.layers == 152: depth = [3, 8, 36, 3] - num_channels = [64, 256, 512, + in_channels = [64, 256, 512, 1024] if self.layers >= 50 else [64, 64, 128, 256] - num_filters = [64, 128, 256, 512] + out_channels = [64, 128, 256, 512] self.conv = ConvBNLayer( - num_channels=3, - num_filters=64, - filter_size=7, + in_channels=3, + out_channels=64, + kernel_size=7, stride=2, act="relu", name="conv1") - self.pool2d_max = Pool2D( - pool_size=3, pool_stride=2, pool_padding=1, pool_type="max") + self.pool2d_max = MaxPool2d( + kernel_size=3, stride=2, padding=1) self.block_list = [] if self.layers >= 50: @@ -213,9 +212,9 @@ class TSN_ResNet(fluid.dygraph.Layer): bottleneck_block = self.add_sublayer( conv_name, BottleneckBlock( - num_channels=num_channels[block] - if i == 0 else num_filters[block] * 4, - num_filters=num_filters[block], + in_channels=in_channels[block] + if i == 0 else out_channels[block] * 4, + out_channels=out_channels[block], stride=2 if i == 0 and block != 0 else 1, shortcut=shortcut, name=conv_name)) @@ -229,44 +228,44 @@ class TSN_ResNet(fluid.dygraph.Layer): basic_block = self.add_sublayer( conv_name, BasicBlock( - num_channels=num_channels[block] - if i == 0 else num_filters[block], - num_filters=num_filters[block], + in_channels=in_channels[block] + if i == 0 else out_channels[block], + out_channels=out_channels[block], stride=2 if i == 0 and block != 0 else 1, shortcut=shortcut, name=conv_name)) self.block_list.append(basic_block) shortcut = True - self.pool2d_avg = Pool2D( - pool_size=7, pool_type='avg', global_pooling=True) + self.pool2d_avg = AvgPool2d(kernel_size=7) - self.pool2d_avg_channels = num_channels[-1] * 2 + self.pool2d_avg_channels = in_channels[-1] * 2 self.out = Linear( self.pool2d_avg_channels, self.class_dim, - act='softmax', - param_attr=ParamAttr( - initializer=fluid.initializer.Normal( + weight_attr=ParamAttr( + initializer=paddle.nn.initializer.Normal( loc=0.0, scale=0.01), name="fc_0.w_0"), bias_attr=ParamAttr( - initializer=fluid.initializer.ConstantInitializer(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), name="fc_0.b_0")) + #@to_static(input_spec=[InputSpec(shape=[None, 3, 224, 224], name='inputs')]) + def forward(self, inputs): - y = fluid.layers.reshape( + y = paddle.reshape( inputs, [-1, inputs.shape[2], inputs.shape[3], inputs.shape[4]]) y = self.conv(y) y = self.pool2d_max(y) for block in self.block_list: y = block(y) y = self.pool2d_avg(y) - y = fluid.layers.dropout( - y, dropout_prob=0.2, dropout_implementation="upscale_in_train") - y = fluid.layers.reshape(y, [-1, self.seg_num, y.shape[1]]) - y = fluid.layers.reduce_mean(y, dim=1) - y = fluid.layers.reshape(y, shape=[-1, 2048]) + y = F.dropout(y, p=0.2) + y = paddle.reshape(y, [-1, self.seg_num, y.shape[1]]) + y = paddle.mean(y, axis=1) + y = paddle.reshape(y, shape=[-1, 2048]) y = self.out(y) + y = F.softmax(y) return y diff --git a/dygraph/tsn/train.py b/dygraph/tsn/train.py index 1cfcf27b..993a16f9 100644 --- a/dygraph/tsn/train.py +++ b/dygraph/tsn/train.py @@ -16,20 +16,19 @@ import os import sys import time import argparse -import ast import wget import tarfile import logging import numpy as np -import paddle.fluid as fluid import glob -from paddle.fluid.dygraph.base import to_variable +import ast from model import TSN_ResNet from utils.config_utils import * from reader.ucf101_reader import UCF101Reader import paddle from paddle.io import DataLoader, DistributedBatchSampler from compose import TSN_UCF101_Dataset +import paddle.nn.functional as F logging.root.handlers = [] FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s' @@ -127,11 +126,11 @@ def val(epoch, model, val_loader, cfg, args): outputs = model(imgs) - loss = fluid.layers.cross_entropy( + loss = F.cross_entropy( input=outputs, label=labels, ignore_index=-1) - avg_loss = fluid.layers.mean(loss) - acc_top1 = fluid.layers.accuracy(input=outputs, label=labels, k=1) - acc_top5 = fluid.layers.accuracy(input=outputs, label=labels, k=5) + avg_loss = paddle.mean(loss) + acc_top1 = paddle.metric.accuracy(input=outputs, label=labels, k=1) + acc_top5 = paddle.metric.accuracy(input=outputs, label=labels, k=5) dy_out = avg_loss.numpy()[0] total_loss += dy_out @@ -161,12 +160,12 @@ def create_optimizer(cfg, params): l2_weight_decay = cfg.l2_weight_decay momentum = cfg.momentum - optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.piecewise_decay( + optimizer = paddle.optimizer.Momentum( + learning_rate=paddle.optimizer.PiecewiseLR( boundaries=bd, values=lr), momentum=momentum, - regularization=fluid.regularizer.L2Decay(l2_weight_decay), - parameter_list=params) + weight_decay=paddle.regularizer.L2Decay(l2_weight_decay), + parameters=params) return optimizer @@ -178,162 +177,155 @@ def train(args): print_configs(train_config, 'Train') use_data_parallel = args.use_data_parallel - trainer_count = fluid.dygraph.parallel.Env().nranks + paddle.disable_static(paddle.CUDAPlace(0)) - # (data_parallel step1/6) - place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \ - if use_data_parallel else fluid.CUDAPlace(0) - pre_state_dict = fluid.load_program_state(args.pretrain) + place = paddle.CUDAPlace(paddle.distributed.ParallelEnv().dev_id) \ + if use_data_parallel else paddle.CUDAPlace(0) + if use_data_parallel: - with fluid.dygraph.guard(place): - if use_data_parallel: - # (data_parallel step2/6) - strategy = fluid.dygraph.parallel.prepare_context() - video_model = TSN_ResNet(train_config) - video_model = init_model(video_model, pre_state_dict) - optimizer = create_optimizer(train_config.TRAIN, - video_model.parameters()) + paddle.distributed.init_parallel_env() + + video_model = TSN_ResNet(train_config) + if use_data_parallel: + + video_model = paddle.DataParallel(video_model) + + pre_state_dict, _ = paddle.load(args.pretrain) + #if paddle.distributed.parallel.Env().local_rank == 0: + video_model = init_model(video_model, pre_state_dict) - if use_data_parallel: - # (data_parallel step3/6) - video_model = fluid.dygraph.parallel.DataParallel(video_model, - strategy) + optimizer = create_optimizer(train_config.TRAIN, + video_model.parameters()) - bs_denominator = 1 - if args.use_gpu: + bs_denominator = 1 + if args.use_gpu: # check number of GPUs - gpus = os.getenv("CUDA_VISIBLE_DEVICES", "") - if gpus == "": - pass - else: - gpus = gpus.split(",") - num_gpus = len(gpus) - bs_denominator = num_gpus - bs_train_single = int(train_config.TRAIN.batch_size / bs_denominator) - bs_val_single = int(valid_config.VALID.batch_size / bs_denominator) - - train_dataset = TSN_UCF101_Dataset(train_config, 'train') - val_dataset = TSN_UCF101_Dataset(valid_config, 'valid') - train_sampler = DistributedBatchSampler( + gpus = os.getenv("CUDA_VISIBLE_DEVICES", "") + if gpus == "": + pass + else: + gpus = gpus.split(",") + num_gpus = len(gpus) + bs_denominator = num_gpus + bs_train_single = int(train_config.TRAIN.batch_size / bs_denominator) + bs_val_single = int(valid_config.VALID.batch_size / bs_denominator) + + train_dataset = TSN_UCF101_Dataset(train_config, 'train') + val_dataset = TSN_UCF101_Dataset(valid_config, 'valid') + train_sampler = DistributedBatchSampler( train_dataset, batch_size=bs_train_single, shuffle=train_config.TRAIN.use_shuffle, drop_last=True) - train_loader = DataLoader( + train_loader = DataLoader( train_dataset, batch_sampler=train_sampler, places=place, num_workers=train_config.TRAIN.num_workers, return_list=True) - val_sampler = DistributedBatchSampler( + val_sampler = DistributedBatchSampler( val_dataset, batch_size=bs_val_single) - val_loader = DataLoader( + val_loader = DataLoader( val_dataset, batch_sampler=val_sampler, places=place, num_workers=valid_config.VALID.num_workers, return_list=True) - if use_data_parallel: - # (data_parallel step4/6) - train_reader = fluid.contrib.reader.distributed_batch_reader( - train_reader) - - # resume training the model - if args.resume is not None: - model_state, opt_state = fluid.load_dygraph(args.resume) - video_model.set_dict(model_state) - optimizer.set_dict(opt_state) - - for epoch in range(1, train_config.TRAIN.epoch + 1): - video_model.train() - total_loss = 0.0 - total_acc1 = 0.0 - total_acc5 = 0.0 - total_sample = 0 - batch_start = time.time() - for batch_id, data in enumerate(train_loader): - train_reader_cost = time.time() - batch_start - imgs = paddle.to_tensor(data[0]) - labels = paddle.to_tensor(data[1]) - labels.stop_gradient = True - outputs = video_model(imgs) - - loss = fluid.layers.cross_entropy( - input=outputs, label=labels, ignore_index=-1) - avg_loss = fluid.layers.mean(loss) - - acc_top1 = fluid.layers.accuracy( + + # resume training the model + if args.resume is not None: + model_state, opt_state = paddle.load(args.resume) + video_model.set_dict(model_state) + optimizer.set_dict(opt_state) + + for epoch in range(1, train_config.TRAIN.epoch + 1): + video_model.train() + total_loss = 0.0 + total_acc1 = 0.0 + total_acc5 = 0.0 + total_sample = 0 + batch_start = time.time() + for batch_id, data in enumerate(train_loader): + train_reader_cost = time.time() - batch_start + imgs = paddle.to_tensor(data[0], place=paddle.CUDAPinnedPlace()) + labels = paddle.to_tensor(data[1], place=paddle.CUDAPinnedPlace()) + labels.stop_gradient = True + outputs = video_model(imgs) + + loss = F.cross_entropy(input=outputs, label=labels, ignore_index=-1) + avg_loss = paddle.mean(loss) + + acc_top1 = paddle.metric.accuracy( input=outputs, label=labels, k=1) - acc_top5 = fluid.layers.accuracy( + acc_top5 = paddle.metric.accuracy( input=outputs, label=labels, k=5) - dy_out = avg_loss.numpy()[0] + dy_out = avg_loss.numpy()[0] - if use_data_parallel: + if use_data_parallel: # (data_parallel step5/6) - avg_loss = video_model.scale_loss(avg_loss) - avg_loss.backward() - video_model.apply_collective_grads() - else: - avg_loss.backward() - - optimizer.minimize(avg_loss) - video_model.clear_gradients() - - total_loss += dy_out - total_acc1 += acc_top1.numpy()[0] - total_acc5 += acc_top5.numpy()[0] - total_sample += 1 - train_batch_cost = time.time() - batch_start - print( - 'TRAIN Epoch: {}, iter: {}, batch_cost: {:.5f} s, reader_cost: {:.5f} s, loss={:.6f}, acc1 {:.6f}, acc5 {:.6f} '. - format(epoch, batch_id, train_batch_cost, train_reader_cost, - total_loss / total_sample, total_acc1 / total_sample, - total_acc5 / total_sample)) - batch_start = time.time() + avg_loss = video_model.scale_loss(avg_loss) + avg_loss.backward() + video_model.apply_collective_grads() + else: + avg_loss.backward() + optimizer.minimize(avg_loss) + optimizer.step() + optimizer.clear_grad() + + total_loss += dy_out + total_acc1 += acc_top1.numpy()[0] + total_acc5 += acc_top5.numpy()[0] + total_sample += 1 + train_batch_cost = time.time() - batch_start print( - 'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}'. - format(epoch, total_loss / total_sample, total_acc1 / - total_sample, total_acc5 / total_sample)) + 'TRAIN Epoch: {}, iter: {}, batch_cost: {:.5f} s, reader_cost: {:.5f} s, loss={:.6f}, acc1 {:.6f}, acc5 {:.6f} '. + format(epoch, batch_id, train_batch_cost, train_reader_cost, + total_loss / total_sample, total_acc1 / total_sample, + total_acc5 / total_sample)) + batch_start = time.time() - # save model's and optimizer's parameters which used for resuming the training stage - save_parameters = (not use_data_parallel) or ( - use_data_parallel and - fluid.dygraph.parallel.Env().local_rank == 0) - if save_parameters: - model_path_pre = "_tsn" - if not os.path.isdir(args.checkpoint): - os.makedirs(args.checkpoint) - model_path = os.path.join( - args.checkpoint, - "_" + model_path_pre + "_epoch{}".format(epoch)) - fluid.dygraph.save_dygraph(video_model.state_dict(), model_path) - fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path) - - if args.validate: - video_model.eval() - val_acc = val(epoch, video_model, val_loader, valid_config, - args) - # save the best parameters in trainging stage - if epoch == 1: - best_acc = val_acc - else: - if val_acc > best_acc: - best_acc = val_acc - if fluid.dygraph.parallel.Env().local_rank == 0: - if not os.path.isdir(args.weights): - os.makedirs(args.weights) - fluid.dygraph.save_dygraph(video_model.state_dict(), - args.weights + "/final") + print( + 'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}'. + format(epoch, total_loss / total_sample, total_acc1 / + total_sample, total_acc5 / total_sample)) + + # save model's and optimizer's parameters which used for resuming the training stage + save_parameters = (not use_data_parallel) or ( + use_data_parallel and + paddle.distributed.ParallelEnv().local_rank == 0) + if save_parameters: + model_path_pre = "_tsn" + if not os.path.isdir(args.checkpoint): + os.makedirs(args.checkpoint) + model_path = os.path.join( + args.checkpoint, + "_" + model_path_pre + "_epoch{}".format(epoch)) + paddle.save( + video_model.state_dict(), model_path) + paddle.save(optimizer.state_dict(), model_path) + + if args.validate: + video_model.eval() + val_acc = val(epoch, video_model,valid_loader, valid_config, args) + # save the best parameters in trainging stage + if epoch == 1: + best_acc = val_acc else: - if fluid.dygraph.parallel.Env().local_rank == 0: - if not os.path.isdir(args.weights): - os.makedirs(args.weights) - fluid.dygraph.save_dygraph(video_model.state_dict(), - args.weights + "/final") + if val_acc > best_acc: + best_acc = val_acc + if paddle.distributed.ParallelEnv().local_rank == 0: + if not os.path.isdir(args.weights): + os.makedirs(args.weights) + paddle.save(video_model.state_dict(), args.weights + "/final") + else: + if paddle.distributed.parallel.Env().local_rank == 0: + if not os.path.isdir(args.weights): + os.makedirs(args.weights) + paddle.save(video_model.state_dict(),args.weights + "/final") - logger.info('[TRAIN] training finished') + logger.info('[TRAIN] training finished') if __name__ == "__main__": -- GitLab