提交 920d45fc 编写于 作者: S shippingwang

refine

上级 87592366
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
......@@ -13,7 +13,6 @@
#limitations under the License.
import yaml
from .utility import AttrDict
import logging
logger = logging.getLogger(__name__)
......@@ -25,6 +24,17 @@ CONFIG_SECS = [
]
class AttrDict(dict):
def __getattr__(self, key):
return self[key]
def __setattr__(self, key, value):
if key in self.__dict__:
self.__dict__[key] = value
else:
self[key] = value
def parse_config(cfg_file):
"""Load a config file into AttrDict"""
import yaml
......
......@@ -20,20 +20,22 @@ from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC
import math
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
name_scope,
#num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
def __init__(
self,
name_scope,
#num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__(name_scope)
self._conv = Conv2D(
self.full_name(),
# num_channels = num_channels,
# num_channels = num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
......@@ -43,10 +45,12 @@ class ConvBNLayer(fluid.dygraph.Layer):
param_attr=fluid.param_attr.ParamAttr(),
bias_attr=False)
self._batch_norm = BatchNorm(self.full_name(),
num_filters, act=act,
param_attr=fluid.param_attr.ParamAttr(),
bias_attr=fluid.param_attr.ParamAttr())
self._batch_norm = BatchNorm(
self.full_name(),
num_filters,
act=act,
param_attr=fluid.param_attr.ParamAttr(),
bias_attr=fluid.param_attr.ParamAttr())
def forward(self, inputs):
y = self._conv(inputs)
......@@ -54,32 +58,34 @@ class ConvBNLayer(fluid.dygraph.Layer):
return y
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
name_scope,
def __init__(
self,
name_scope,
# num_channels,
num_filters,
stride,
shortcut=True,
seg_num=8):
num_filters,
stride,
shortcut=True,
seg_num=8):
super(BottleneckBlock, self).__init__(name_scope)
self.conv0 = ConvBNLayer(
self.full_name(),
# num_channels=num_channels,
# num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
self.full_name(),
# num_channels=num_filters,
# num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
self.full_name(),
# num_channels=num_filters,
# num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
......@@ -87,7 +93,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
if not shortcut:
self.short = ConvBNLayer(
self.full_name(),
# num_channels=num_channels,
# num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
......@@ -103,33 +109,28 @@ class BottleneckBlock(fluid.dygraph.Layer):
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv2, act="relu")
return y
class TSM_ResNet(fluid.dygraph.Layer):
def __init__(self, name_scope, config):
super(TSM_ResNet, self).__init__(name_scope)
self.layers = config.MODEL.layers
self.layers = config.MODEL.num_layers
self.seg_num = config.MODEL.seg_num
self.class_dim = config.MODEL.class_dim
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
self.class_dim = config.MODEL.num_classes
if layers == 50:
if self.layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
else:
raise NotImplementedError
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
self.full_name(),
# num_channels=3,
# num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
......@@ -142,7 +143,7 @@ class TSM_ResNet(fluid.dygraph.Layer):
pool_type='max')
self.bottleneck_block_list = []
# num_channels = 64
# num_channels = 64
for block in range(len(depth)):
shortcut = False
......@@ -151,18 +152,16 @@ class TSM_ResNet(fluid.dygraph.Layer):
'bb_%d_%d' % (block, i),
BottleneckBlock(
self.full_name(),
# num_channels=num_channels,
# num_channels=num_channels,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
seg_num=seg_num))
# num_channels = int(bottleneck_block._num_channels_out)
seg_num=self.seg_num))
# num_channels = int(bottleneck_block._num_channels_out)
self.bottleneck_block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = Pool2D(
self.full_name(),
pool_size=7, pool_type='avg', global_pooling=True)
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True)
import math
stdv = 1.0 / math.sqrt(2048 * 1.0)
......@@ -173,17 +172,18 @@ class TSM_ResNet(fluid.dygraph.Layer):
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)),
bias_attr=fluid.param_attr.ParamAttr(
learning_rate=2.0,
regularizer=fluid.regularizer.L2Decay(0.)))
learning_rate=2.0,
regularizer=fluid.regularizer.L2Decay(0.)))
def forward(self, inputs):
y = fluid.layers.reshape(inputs, [-1, inputs.shape[2], inputs.shape[3], inputs.shape[4]])
y = fluid.layers.reshape(
inputs, [-1, inputs.shape[2], inputs.shape[3], inputs.shape[4]])
y = self.conv(y)
y = self.pool2d_max(y)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
y = fluid.layers.dropout(y, dropout_prob=0.5)
y = fluid.layers.reshape(y, [-1, self.seg_num, y.shape[1]])
......
......@@ -53,19 +53,19 @@ class KineticsReader():
"""
def __init__(self, name, mode, cfg):
super(KineticsReader, self).__init__(name, mode, cfg)
self.mode = mode
self.name = name
self.format = cfg.MODEL.format
self.num_classes = self.get_config_from_sec('model', 'num_classes')
self.seg_num = self.get_config_from_sec('model', 'seg_num')
self.seglen = self.get_config_from_sec('model', 'seglen')
self.seg_num = self.get_config_from_sec(mode, 'seg_num', self.seg_num)
self.short_size = self.get_config_from_sec(mode, 'short_size')
self.target_size = self.get_config_from_sec(mode, 'target_size')
self.num_reader_threads = self.get_config_from_sec(mode,
'num_reader_threads')
self.buf_size = self.get_config_from_sec(mode, 'buf_size')
self.fix_random_seed = self.get_config_from_sec(mode, 'fix_random_seed')
self.num_classes = cfg.MODEL.num_classes
self.seg_num = cfg.MODEL.seg_num
self.seglen = cfg.MODEL.seglen
# self.seg_num = cfg[mode.upper()]['seg_num']
self.short_size = cfg[mode.upper()]['short_size']
self.target_size = cfg[mode.upper()]['target_size']
self.num_reader_threads = cfg[mode.upper()]['num_reader_threads']
self.buf_size = cfg[mode.upper()]['buf_size']
self.img_mean = np.array(cfg.MODEL.image_mean).reshape(
[3, 1, 1]).astype(np.float32)
......@@ -78,10 +78,6 @@ class KineticsReader():
self.video_path = cfg[mode.upper()]['video_path']
else:
self.video_path = ''
if self.fix_random_seed:
random.seed(0)
np.random.seed(0)
self.num_reader_threads = 1
def create_reader(self):
# if set video_path for inference mode, just load this single video
......@@ -318,7 +314,6 @@ def group_multi_scale_crop(img_group, target_size, scales=None, \
w_offset, h_offset = random.choice(ret)
return crop_pair[0], crop_pair[1], w_offset, h_offset
crop_w, crop_h, offset_w, offset_h = _sample_crop_size(im_size)
......
......@@ -16,7 +16,6 @@ resume="" # set pretrain model path if needed
save_dir="./data/checkpoints"
save_inference_dir="./data/inference_model"
use_gpu=True
fix_random_seed=False
log_interval=1
valid_interval=1
......@@ -36,8 +35,7 @@ if [ "$mode"x == "train"x ]; then
--log_interval=$log_interval \
--valid_interval=$valid_interval \
--use_gpu=$use_gpu \
--save_dir=$save_dir \
--fix_random_seed=$fix_random_seed
--save_dir=$save_dir
elif [ "$pretrain"x != ""x ]; then
python train.py --model_name=$name \
--config=$configs \
......@@ -45,17 +43,14 @@ if [ "$mode"x == "train"x ]; then
--log_interval=$log_interval \
--valid_interval=$valid_interval \
--use_gpu=$use_gpu \
--save_dir=$save_dir \
--fix_random_seed=$fix_random_seed
--save_dir=$save_dir
else
nohup python train.py --model_name=$name \
python train.py --model_name=$name \
--config=$configs \
--log_interval=$log_interval \
--valid_interval=$valid_interval \
--use_gpu=$use_gpu \
--save_dir=$save_dir \
--fix_random_seed=$fix_random_seed \
>dy_debug.log 2>&1 &
--save_dir=$save_dir
fi
elif [ "$mode"x == "eval"x ]; then
echo $mode $name $configs $weights
......
......@@ -23,7 +23,7 @@ import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from model import TSM_ResNet
from utils.config_utils import *
from config_utils import *
from reader import KineticsReader
logging.root.handlers = []
......@@ -31,13 +31,11 @@ FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout)
logger = logging.getLogger(__name__)
def parse_args():
parser = argparse.ArgumentParser("Paddle Video train script")
parser.add_argument(
'--model_name',
type=str,
default='TSM',
help='name of model to train.')
'--model_name', type=str, default='TSM', help='name of model to train.')
parser.add_argument(
'--config',
type=str,
......@@ -100,7 +98,7 @@ def parse_args():
def val(epoch, model, cfg, args):
reader = KineticsReader(name = "tsm", mode="valid", cfg =cfg)
reader = KineticsReader(name="tsm", mode="valid", cfg=cfg)
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
......@@ -112,10 +110,11 @@ def val(epoch, model, cfg, args):
imgs = to_variable(x_data)
labels = to_variable(y_data)
labels.stop_gradient = True
outputs = model(imgs)
loss = fluid.layers.cross_entropy(input=outputs, label=labels, ignore_index=-1)
loss = fluid.layers.cross_entropy(
input=outputs, label=labels, ignore_index=-1)
avg_loss = fluid.layers.mean(loss)
acc_top1 = fluid.layers.accuracy(input=outputs, label=labels, k=1)
acc_top5 = fluid.layers.accuracy(input=outputs, label=labels, k=5)
......@@ -125,28 +124,33 @@ def val(epoch, model, cfg, args):
total_acc5 += acc_top5.numpy()[0]
total_sample += 1
print('TEST Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'.format(epoch, batch_id, avg_loss.numpy()[0], acc_top1.numpy()[0], acc_top5.numpy()[0]))
print('TEST Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'.format(
epoch, batch_id,
avg_loss.numpy()[0], acc_top1.numpy()[0], acc_top5.numpy()[0]))
print('Finish loss {} , acc1 {} , acc5 {}'.format(total_loss/ total_sample, total_acc1 / total_sample, total_acc5 / total_sample))
print('Finish loss {} , acc1 {} , acc5 {}'.format(
total_loss / total_sample, total_acc1 / total_sample, total_acc5 /
total_sample))
def optimizer(cfg):
def create_optimizer(cfg):
total_videos = cfg.total_videos
step = int(total_videos / cfg.batch_size + 1)
bd = [e * step for e in cfg.decay_epochs]
base_lr = cfg.base_learning_rate
base_lr = cfg.learning_rate
lr_decay = cfg.learning_rate_decay
lr = [base_lr, base_lr * lr_decay, base_lr * lr_decay * lr_decay]
l2_weight_decay = cfg.l2_weight_decay
momentum = cfg.momentum
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum,
regularization=fluid.regularizer.L2Decay(l2_weight_decay))
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum,
regularization=fluid.regularizer.L2Decay(l2_weight_decay))
return optimizer
def train(args):
config = parse_config(args.config)
train_config = merge_configs(config, 'train', vars(args))
......@@ -154,7 +158,6 @@ def train(args):
print_configs(train_config, 'Train')
#train_model = models.get_model(args.model_name, train_config, mode='train')
use_data_parallel = False
trainer_count = fluid.dygraph.parallel.Env().nranks
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
......@@ -164,11 +167,12 @@ def train(args):
if use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context()
video_model = TSM_ResNet(train_config)
optimizer = optimizer(train_config.MODEL)
video_model = TSM_ResNet("", train_config)
optimizer = create_optimizer(train_config.TRAIN)
if use_data_parallel:
video_model = fluid.dygraph.parallel.DataParallel(video_model, strategy)
video_model = fluid.dygraph.parallel.DataParallel(video_model,
strategy)
bs_denominator = 1
if args.use_gpu:
# check number of GPUs
......@@ -188,27 +192,35 @@ def train(args):
train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size /
bs_denominator)
train_reader = KineticsReader(name="tsm", mode="train", cfg=train_config)
valid_reader = get_reader(args.model_name.upper(), 'valid', valid_config)
train_reader = KineticsReader(
name="tsm", mode="train", cfg=train_config)
valid_reader = KineticsReader(
name="tsm", mode="valid", cfg=valid_config)
train_reader = train_reader.create_reader()
valid_reader = valid_reader.create_reader()
if use_data_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
for epoch in range(args.epochs):
for epoch in range(train_config.TRAIN.epoch):
video_model.train()
for batch_id, data in enumerate(train_reader()):
x_data = np.array([item[0] for item in data])
y_data = np.array([item[1] for item in data]).reshape([-1, 1])
imgs = to_variable(x_data)
labels = to_variable(y_data)
labels.stop_gradient = True
outputs = video_model(imgs)
loss = fluid.layers.cross_entropy(input=outputs, label=labels, ignore_index=-1)
loss = fluid.layers.cross_entropy(
input=outputs, label=labels, ignore_index=-1)
avg_loss = fluid.layers.mean(loss)
acc_top1 = fluid.layers.accuracy(input=outputs, label=labels, k=1)
acc_top5 = fluid.layers.accuracy(input=outputs, label=labels, k=5)
acc_top1 = fluid.layers.accuracy(
input=outputs, label=labels, k=1)
acc_top5 = fluid.layers.accuracy(
input=outputs, label=labels, k=5)
loss_array = avg_loss.numpy()
if use_data_parallel:
......@@ -220,11 +232,13 @@ def train(args):
optimizer.minimize(avg_loss)
video_model.clear_gradients()
print('TRAIN Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'.format(epoch, batch_id, loss_array[0], acc_top1.numpy()[0], acc_top5.numpy()[0]))
print('TRAIN Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'.
format(epoch, batch_id, loss_array[0],
acc_top1.numpy()[0], acc_top5.numpy()[0]))
video_model.eval()
val(epoch, video_model, valid_config, args)
if fluid.dygraph.parallel.Env().local_rank == 0:
save_model_name = os.path.join("final")
fluid.dygraph.save_dygraph(video_model.state_dict(), "final")
......
......@@ -25,7 +25,6 @@ TRAIN:
l2_weight_decay: 1e-4
momentum: 0.9
total_videos: 8000 #239781
fix_random_seed: False
VALID:
short_size: 256
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册