提交 9ec4ae1e 编写于 作者: S shippingwang

upgrade to API2.0

上级 4d1187d5
......@@ -18,91 +18,92 @@ from __future__ import print_function
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear, Dropout
import math
from paddle.nn import Conv2d, BatchNorm2d, Linear, Dropout, MaxPool2d, AvgPool2d
from paddle import ParamAttr
import paddle.nn.functional as F
from paddle.jit import to_static
from paddle.static import InputSpec
class ConvBNLayer(fluid.dygraph.Layer):
class ConvBNLayer(paddle.nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
self._conv = Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(filter_size - 1) // 2,
padding=(kernel_size - 1) // 2,
groups=groups,
act=None,
param_attr=ParamAttr(name=name + "_weights"),
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(bn_name + "_offset"),
moving_mean_name=bn_name + "_mean",
moving_variance_name=bn_name + "_variance")
self._act = act
self._batch_norm = BatchNorm2d(
out_channels,
weight_attr=ParamAttr(name=bn_name + "_scale"),
bias_attr=ParamAttr(bn_name + "_offset"))
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
if self._act:
y = getattr(paddle.nn.functional, self._act)(y)
return y
class BottleneckBlock(fluid.dygraph.Layer):
class BottleneckBlock(paddle.nn.Layer):
def __init__(self,
num_channels,
num_filters,
in_channels,
out_channels,
stride,
shortcut=True,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act="relu",
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act="relu",
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=stride,
name=name + "_branch1")
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
......@@ -114,37 +115,37 @@ class BottleneckBlock(fluid.dygraph.Layer):
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv2)
return fluid.layers.relu(y)
y = paddle.add(x=short, y=conv2)
return F.relu(y)
class BasicBlock(fluid.dygraph.Layer):
class BasicBlock(paddle.nn.Layer):
def __init__(self,
num_channels,
num_filters,
in_channels,
out_channels,
stride,
shortcut=True,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
in_channels=in_channels,
out_channels=out_channels,
filter_size=3,
stride=stride,
act="relu",
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
in_channels=out_channels,
out_channels=out_channels,
filter_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
in_channels=in_channels,
out_channels=out_channels,
filter_size=1,
stride=stride,
name=name + "_branch1")
......@@ -159,13 +160,11 @@ class BasicBlock(fluid.dygraph.Layer):
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv1)
layer_helper = LayerHelper(self.full_name(), act="relu")
return layer_helper.append_activation(y)
y = paddle.add(short, conv1)
y = F.relu(y)
return y
class TSN_ResNet(fluid.dygraph.Layer):
class TSN_ResNet(paddle.nn.Layer):
def __init__(self, config):
super(TSN_ResNet, self).__init__()
self.layers = config.MODEL.num_layers
......@@ -184,19 +183,19 @@ class TSN_ResNet(fluid.dygraph.Layer):
depth = [3, 4, 23, 3]
elif self.layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512,
in_channels = [64, 256, 512,
1024] if self.layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
out_channels = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
in_channels=3,
out_channels=64,
kernel_size=7,
stride=2,
act="relu",
name="conv1")
self.pool2d_max = Pool2D(
pool_size=3, pool_stride=2, pool_padding=1, pool_type="max")
self.pool2d_max = MaxPool2d(
kernel_size=3, stride=2, padding=1)
self.block_list = []
if self.layers >= 50:
......@@ -213,9 +212,9 @@ class TSN_ResNet(fluid.dygraph.Layer):
bottleneck_block = self.add_sublayer(
conv_name,
BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
in_channels=in_channels[block]
if i == 0 else out_channels[block] * 4,
out_channels=out_channels[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
name=conv_name))
......@@ -229,44 +228,44 @@ class TSN_ResNet(fluid.dygraph.Layer):
basic_block = self.add_sublayer(
conv_name,
BasicBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block],
num_filters=num_filters[block],
in_channels=in_channels[block]
if i == 0 else out_channels[block],
out_channels=out_channels[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
name=conv_name))
self.block_list.append(basic_block)
shortcut = True
self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
self.pool2d_avg = AvgPool2d(kernel_size=7)
self.pool2d_avg_channels = num_channels[-1] * 2
self.pool2d_avg_channels = in_channels[-1] * 2
self.out = Linear(
self.pool2d_avg_channels,
self.class_dim,
act='softmax',
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(
weight_attr=ParamAttr(
initializer=paddle.nn.initializer.Normal(
loc=0.0, scale=0.01),
name="fc_0.w_0"),
bias_attr=ParamAttr(
initializer=fluid.initializer.ConstantInitializer(value=0.0),
initializer=paddle.nn.initializer.Constant(value=0.0),
name="fc_0.b_0"))
#@to_static(input_spec=[InputSpec(shape=[None, 3, 224, 224], name='inputs')])
def forward(self, inputs):
y = fluid.layers.reshape(
y = paddle.reshape(
inputs, [-1, inputs.shape[2], inputs.shape[3], inputs.shape[4]])
y = self.conv(y)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
y = self.pool2d_avg(y)
y = fluid.layers.dropout(
y, dropout_prob=0.2, dropout_implementation="upscale_in_train")
y = fluid.layers.reshape(y, [-1, self.seg_num, y.shape[1]])
y = fluid.layers.reduce_mean(y, dim=1)
y = fluid.layers.reshape(y, shape=[-1, 2048])
y = F.dropout(y, p=0.2)
y = paddle.reshape(y, [-1, self.seg_num, y.shape[1]])
y = paddle.mean(y, axis=1)
y = paddle.reshape(y, shape=[-1, 2048])
y = self.out(y)
y = F.softmax(y)
return y
......@@ -16,20 +16,19 @@ import os
import sys
import time
import argparse
import ast
import wget
import tarfile
import logging
import numpy as np
import paddle.fluid as fluid
import glob
from paddle.fluid.dygraph.base import to_variable
import ast
from model import TSN_ResNet
from utils.config_utils import *
from reader.ucf101_reader import UCF101Reader
import paddle
from paddle.io import DataLoader, DistributedBatchSampler
from compose import TSN_UCF101_Dataset
import paddle.nn.functional as F
logging.root.handlers = []
FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
......@@ -127,11 +126,11 @@ def val(epoch, model, val_loader, cfg, args):
outputs = model(imgs)
loss = fluid.layers.cross_entropy(
loss = F.cross_entropy(
input=outputs, label=labels, ignore_index=-1)
avg_loss = fluid.layers.mean(loss)
acc_top1 = fluid.layers.accuracy(input=outputs, label=labels, k=1)
acc_top5 = fluid.layers.accuracy(input=outputs, label=labels, k=5)
avg_loss = paddle.mean(loss)
acc_top1 = paddle.metric.accuracy(input=outputs, label=labels, k=1)
acc_top5 = paddle.metric.accuracy(input=outputs, label=labels, k=5)
dy_out = avg_loss.numpy()[0]
total_loss += dy_out
......@@ -161,12 +160,12 @@ def create_optimizer(cfg, params):
l2_weight_decay = cfg.l2_weight_decay
momentum = cfg.momentum
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
optimizer = paddle.optimizer.Momentum(
learning_rate=paddle.optimizer.PiecewiseLR(
boundaries=bd, values=lr),
momentum=momentum,
regularization=fluid.regularizer.L2Decay(l2_weight_decay),
parameter_list=params)
weight_decay=paddle.regularizer.L2Decay(l2_weight_decay),
parameters=params)
return optimizer
......@@ -178,162 +177,155 @@ def train(args):
print_configs(train_config, 'Train')
use_data_parallel = args.use_data_parallel
trainer_count = fluid.dygraph.parallel.Env().nranks
paddle.disable_static(paddle.CUDAPlace(0))
# (data_parallel step1/6)
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
if use_data_parallel else fluid.CUDAPlace(0)
pre_state_dict = fluid.load_program_state(args.pretrain)
place = paddle.CUDAPlace(paddle.distributed.ParallelEnv().dev_id) \
if use_data_parallel else paddle.CUDAPlace(0)
if use_data_parallel:
with fluid.dygraph.guard(place):
if use_data_parallel:
# (data_parallel step2/6)
strategy = fluid.dygraph.parallel.prepare_context()
video_model = TSN_ResNet(train_config)
video_model = init_model(video_model, pre_state_dict)
optimizer = create_optimizer(train_config.TRAIN,
video_model.parameters())
paddle.distributed.init_parallel_env()
video_model = TSN_ResNet(train_config)
if use_data_parallel:
video_model = paddle.DataParallel(video_model)
pre_state_dict, _ = paddle.load(args.pretrain)
#if paddle.distributed.parallel.Env().local_rank == 0:
video_model = init_model(video_model, pre_state_dict)
if use_data_parallel:
# (data_parallel step3/6)
video_model = fluid.dygraph.parallel.DataParallel(video_model,
strategy)
optimizer = create_optimizer(train_config.TRAIN,
video_model.parameters())
bs_denominator = 1
if args.use_gpu:
bs_denominator = 1
if args.use_gpu:
# check number of GPUs
gpus = os.getenv("CUDA_VISIBLE_DEVICES", "")
if gpus == "":
pass
else:
gpus = gpus.split(",")
num_gpus = len(gpus)
bs_denominator = num_gpus
bs_train_single = int(train_config.TRAIN.batch_size / bs_denominator)
bs_val_single = int(valid_config.VALID.batch_size / bs_denominator)
train_dataset = TSN_UCF101_Dataset(train_config, 'train')
val_dataset = TSN_UCF101_Dataset(valid_config, 'valid')
train_sampler = DistributedBatchSampler(
gpus = os.getenv("CUDA_VISIBLE_DEVICES", "")
if gpus == "":
pass
else:
gpus = gpus.split(",")
num_gpus = len(gpus)
bs_denominator = num_gpus
bs_train_single = int(train_config.TRAIN.batch_size / bs_denominator)
bs_val_single = int(valid_config.VALID.batch_size / bs_denominator)
train_dataset = TSN_UCF101_Dataset(train_config, 'train')
val_dataset = TSN_UCF101_Dataset(valid_config, 'valid')
train_sampler = DistributedBatchSampler(
train_dataset,
batch_size=bs_train_single,
shuffle=train_config.TRAIN.use_shuffle,
drop_last=True)
train_loader = DataLoader(
train_loader = DataLoader(
train_dataset,
batch_sampler=train_sampler,
places=place,
num_workers=train_config.TRAIN.num_workers,
return_list=True)
val_sampler = DistributedBatchSampler(
val_sampler = DistributedBatchSampler(
val_dataset, batch_size=bs_val_single)
val_loader = DataLoader(
val_loader = DataLoader(
val_dataset,
batch_sampler=val_sampler,
places=place,
num_workers=valid_config.VALID.num_workers,
return_list=True)
if use_data_parallel:
# (data_parallel step4/6)
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
# resume training the model
if args.resume is not None:
model_state, opt_state = fluid.load_dygraph(args.resume)
video_model.set_dict(model_state)
optimizer.set_dict(opt_state)
for epoch in range(1, train_config.TRAIN.epoch + 1):
video_model.train()
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
batch_start = time.time()
for batch_id, data in enumerate(train_loader):
train_reader_cost = time.time() - batch_start
imgs = paddle.to_tensor(data[0])
labels = paddle.to_tensor(data[1])
labels.stop_gradient = True
outputs = video_model(imgs)
loss = fluid.layers.cross_entropy(
input=outputs, label=labels, ignore_index=-1)
avg_loss = fluid.layers.mean(loss)
acc_top1 = fluid.layers.accuracy(
# resume training the model
if args.resume is not None:
model_state, opt_state = paddle.load(args.resume)
video_model.set_dict(model_state)
optimizer.set_dict(opt_state)
for epoch in range(1, train_config.TRAIN.epoch + 1):
video_model.train()
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
batch_start = time.time()
for batch_id, data in enumerate(train_loader):
train_reader_cost = time.time() - batch_start
imgs = paddle.to_tensor(data[0], place=paddle.CUDAPinnedPlace())
labels = paddle.to_tensor(data[1], place=paddle.CUDAPinnedPlace())
labels.stop_gradient = True
outputs = video_model(imgs)
loss = F.cross_entropy(input=outputs, label=labels, ignore_index=-1)
avg_loss = paddle.mean(loss)
acc_top1 = paddle.metric.accuracy(
input=outputs, label=labels, k=1)
acc_top5 = fluid.layers.accuracy(
acc_top5 = paddle.metric.accuracy(
input=outputs, label=labels, k=5)
dy_out = avg_loss.numpy()[0]
dy_out = avg_loss.numpy()[0]
if use_data_parallel:
if use_data_parallel:
# (data_parallel step5/6)
avg_loss = video_model.scale_loss(avg_loss)
avg_loss.backward()
video_model.apply_collective_grads()
else:
avg_loss.backward()
optimizer.minimize(avg_loss)
video_model.clear_gradients()
total_loss += dy_out
total_acc1 += acc_top1.numpy()[0]
total_acc5 += acc_top5.numpy()[0]
total_sample += 1
train_batch_cost = time.time() - batch_start
print(
'TRAIN Epoch: {}, iter: {}, batch_cost: {:.5f} s, reader_cost: {:.5f} s, loss={:.6f}, acc1 {:.6f}, acc5 {:.6f} '.
format(epoch, batch_id, train_batch_cost, train_reader_cost,
total_loss / total_sample, total_acc1 / total_sample,
total_acc5 / total_sample))
batch_start = time.time()
avg_loss = video_model.scale_loss(avg_loss)
avg_loss.backward()
video_model.apply_collective_grads()
else:
avg_loss.backward()
optimizer.minimize(avg_loss)
optimizer.step()
optimizer.clear_grad()
total_loss += dy_out
total_acc1 += acc_top1.numpy()[0]
total_acc5 += acc_top5.numpy()[0]
total_sample += 1
train_batch_cost = time.time() - batch_start
print(
'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}'.
format(epoch, total_loss / total_sample, total_acc1 /
total_sample, total_acc5 / total_sample))
'TRAIN Epoch: {}, iter: {}, batch_cost: {:.5f} s, reader_cost: {:.5f} s, loss={:.6f}, acc1 {:.6f}, acc5 {:.6f} '.
format(epoch, batch_id, train_batch_cost, train_reader_cost,
total_loss / total_sample, total_acc1 / total_sample,
total_acc5 / total_sample))
batch_start = time.time()
# save model's and optimizer's parameters which used for resuming the training stage
save_parameters = (not use_data_parallel) or (
use_data_parallel and
fluid.dygraph.parallel.Env().local_rank == 0)
if save_parameters:
model_path_pre = "_tsn"
if not os.path.isdir(args.checkpoint):
os.makedirs(args.checkpoint)
model_path = os.path.join(
args.checkpoint,
"_" + model_path_pre + "_epoch{}".format(epoch))
fluid.dygraph.save_dygraph(video_model.state_dict(), model_path)
fluid.dygraph.save_dygraph(optimizer.state_dict(), model_path)
if args.validate:
video_model.eval()
val_acc = val(epoch, video_model, val_loader, valid_config,
args)
# save the best parameters in trainging stage
if epoch == 1:
best_acc = val_acc
else:
if val_acc > best_acc:
best_acc = val_acc
if fluid.dygraph.parallel.Env().local_rank == 0:
if not os.path.isdir(args.weights):
os.makedirs(args.weights)
fluid.dygraph.save_dygraph(video_model.state_dict(),
args.weights + "/final")
print(
'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}'.
format(epoch, total_loss / total_sample, total_acc1 /
total_sample, total_acc5 / total_sample))
# save model's and optimizer's parameters which used for resuming the training stage
save_parameters = (not use_data_parallel) or (
use_data_parallel and
paddle.distributed.ParallelEnv().local_rank == 0)
if save_parameters:
model_path_pre = "_tsn"
if not os.path.isdir(args.checkpoint):
os.makedirs(args.checkpoint)
model_path = os.path.join(
args.checkpoint,
"_" + model_path_pre + "_epoch{}".format(epoch))
paddle.save(
video_model.state_dict(), model_path)
paddle.save(optimizer.state_dict(), model_path)
if args.validate:
video_model.eval()
val_acc = val(epoch, video_model,valid_loader, valid_config, args)
# save the best parameters in trainging stage
if epoch == 1:
best_acc = val_acc
else:
if fluid.dygraph.parallel.Env().local_rank == 0:
if not os.path.isdir(args.weights):
os.makedirs(args.weights)
fluid.dygraph.save_dygraph(video_model.state_dict(),
args.weights + "/final")
if val_acc > best_acc:
best_acc = val_acc
if paddle.distributed.ParallelEnv().local_rank == 0:
if not os.path.isdir(args.weights):
os.makedirs(args.weights)
paddle.save(video_model.state_dict(), args.weights + "/final")
else:
if paddle.distributed.parallel.Env().local_rank == 0:
if not os.path.isdir(args.weights):
os.makedirs(args.weights)
paddle.save(video_model.state_dict(),args.weights + "/final")
logger.info('[TRAIN] training finished')
logger.info('[TRAIN] training finished')
if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册