未验证 提交 a0ed96af 编写于 作者: R ruri 提交者: GitHub

remove inplace=True and pool_size when using global pooling (#3298)

* remove use_inplace=True and add save_step params

* remove useless params when using global pooling
上级 1d494f87
......@@ -96,9 +96,10 @@ bash run.sh train 模型名
* **data_dir**: 数据存储路径,默认值: "./data/ILSVRC2012/"
* **model_save_dir**: 模型存储路径,默认值: "output/"
* **save_param**: params存储路径,默认值: None
* **pretrained_model**: 加载预训练模型路径,默认值: None
* **checkpoint**: 加载用于继续训练的检查点(指定具体模型存储路径,如"output/ResNet50/100/"),默认值: None
* **print_step**: 打印训练信息的batch步数,默认值:10
* **save_step**: 保存模型的epoch步数,默认值:1
模型类型和超参配置:
......@@ -134,7 +135,6 @@ bash run.sh train 模型名
一些开关:
* **use_gpu**: 是否在GPU上运行,默认值: True
* **use_inplace**: 是否开启inplace显存优化,默认值: True
* **use_label_smoothing**: 是否对数据进行label smoothing处理,默认值: False
* **label_smoothing_epsilon**: label_smoothing的epsilon, 默认值:0.2
* **random_seed**: 随机数种子, 默认值: 1000
......@@ -207,7 +207,11 @@ FP16相关内容已经迁移至PaddlePaddle/Fleet 中
PaddlePaddle/Models ImageClassification 支持自定义数据
1. 组织自定义数据,调整数据读取器以正确的传入数据
2. 注意更改训练脚本中 --data_dim --total_image 等参数
2. 注意更改训练脚本中
--data_dim 类别数为自定义数据类别数
--total_image 图片数量
3. 当进行finetune时,
指定--pretrained_model 加载预训练模型,注意:本模型库提供的是基于ImageNet 1000类数据的预训练模型,当使用不同类别数的数据时,请删除预训练模型中fc_weight 和fc_offset参数
## 已发布模型及其性能
......
......@@ -88,9 +88,10 @@ Environment settings:
* **data_dir**: the data root directory Default: "./data/ILSVRC2012".
* **model_save_dir**: the directory to save trained model. Default: "output".
* **save_param**: the path to save params. Default: None.
* **pretrained_model**: load model path for pretraining. Default: None.
* **checkpoint**: load the checkpoint path to resume. Default: None.
* **print_step**: the batch steps interval to print log. Default: 10.
* **save_step**: the epoch steps interval to save checkpoints. Default:1.
Solver and hyperparameters:
......@@ -105,7 +106,7 @@ Solver and hyperparameters:
* **lr**: initialized learning rate. Default: 0.1.
* **l2_decay**: L2_decay parameter. Default: 1e-4.
* **momentum_rate**: momentum_rate. Default: 0.9.
* **step_epochs**: piecewise dacay的decay step, Default: [30,60,90]
* **step_epochs**: decay step of piecewise step, Default: [30,60,90]
Reader and preprocess:
......@@ -126,7 +127,6 @@ Reader and preprocess:
Switch:
* **use_gpu**: whether to use GPU or not. Default: True.
* **use_inplace**: whether to use inplace memory optimization or not. Default: True.
* **use_label_smoothing**: whether to use label_smoothing or not. Default:False.
* **label_smoothing_epsilon**: the label_smoothing_epsilon. Default:0.2.
* **random_seed**: random seed for debugging, Default: 1000
......
......@@ -79,15 +79,14 @@ class DistResNet():
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(input=pool,
out = fluid.layers.fc(
input=pool,
size=class_dim,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv,
stdv),
initializer=fluid.initializer.Uniform(-stdv, stdv),
regularizer=fluid.regularizer.L2Decay(self.weight_decay)),
bias_attr=fluid.ParamAttr(
regularizer=fluid.regularizer.L2Decay(self.weight_decay))
)
regularizer=fluid.regularizer.L2Decay(self.weight_decay)))
return out
def conv_bn_layer(self,
......@@ -107,9 +106,12 @@ class DistResNet():
groups=groups,
act=None,
bias_attr=False,
param_attr=fluid.ParamAttr(regularizer=fluid.regularizer.L2Decay(self.weight_decay)))
param_attr=fluid.ParamAttr(
regularizer=fluid.regularizer.L2Decay(self.weight_decay)))
return fluid.layers.batch_norm(
input=conv, act=act, is_test=not self.is_train,
input=conv,
act=act,
is_test=not self.is_train,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(bn_init_value),
regularizer=None))
......@@ -132,9 +134,12 @@ class DistResNet():
act='relu')
# NOTE: default bias is 0.0 already
conv2 = self.conv_bn_layer(
input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, bn_init_value=0.0)
input=conv1,
num_filters=num_filters * 4,
filter_size=1,
act=None,
bn_init_value=0.0)
short = self.shortcut(input, num_filters * 4, stride)
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
......@@ -22,7 +22,6 @@ from .resnet_vc import ResNet50_vc, ResNet101_vc, ResNet152_vc
from .resnet_vd import ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd
from .resnext import ResNeXt50_64x4d, ResNeXt101_64x4d, ResNeXt152_64x4d, ResNeXt50_32x4d, ResNeXt101_32x4d, ResNeXt152_32x4d
from .resnext_vd import ResNeXt50_vd_64x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_64x4d, ResNeXt50_vd_32x4d, ResNeXt101_vd_32x4d, ResNeXt152_vd_32x4d
from .resnet_dist import DistResNet
from .inception_v4 import InceptionV4
from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d
from .se_resnext_vd import SE_ResNeXt50_32x4d_vd, SE_ResNeXt101_32x4d_vd, SE_154_vd
......
......@@ -29,6 +29,7 @@ import utils
__all__ = ["FastImageNet"]
class FastImageNet():
def __init__(self, layers=50, is_train=True):
self.layers = layers
......@@ -65,8 +66,9 @@ class FastImageNet():
stride=2 if i == 0 and block != 0 else 1)
pool_size = int(img_size / 32)
pool = fluid.layers.pool2d(
input=conv, pool_size=pool_size, pool_type='avg', global_pooling=True)
out = fluid.layers.fc(input=pool,
input=conv, pool_type='avg', global_pooling=True)
out = fluid.layers.fc(
input=pool,
size=class_dim,
act=None,
param_attr=fluid.param_attr.ParamAttr(
......@@ -93,8 +95,12 @@ class FastImageNet():
groups=groups,
act=None,
bias_attr=False,
param_attr=fluid.ParamAttr(regularizer=fluid.regularizer.L2Decay(1e-4)))
return fluid.layers.batch_norm(input=conv, act=act, is_test=not self.is_train,
param_attr=fluid.ParamAttr(
regularizer=fluid.regularizer.L2Decay(1e-4)))
return fluid.layers.batch_norm(
input=conv,
act=act,
is_test=not self.is_train,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Constant(bn_init_value),
regularizer=None))
......@@ -117,12 +123,17 @@ class FastImageNet():
act='relu')
# init bn-weight0
conv2 = self.conv_bn_layer(
input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, bn_init_value=0.0)
input=conv1,
num_filters=num_filters * 4,
filter_size=1,
act=None,
bn_init_value=0.0)
short = self.shortcut(input, num_filters * 4, stride)
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
def lr_decay(lrs, epochs, bs, total_image):
boundaries = []
values = []
......@@ -130,7 +141,7 @@ def lr_decay(lrs, epochs, bs, total_image):
step = total_image // bs[idx]
if step * bs[idx] < total_image:
step += 1
ratio = (lrs[idx][1] - lrs[idx][0])*1.0 / (epoch[1] - epoch[0])
ratio = (lrs[idx][1] - lrs[idx][0]) * 1.0 / (epoch[1] - epoch[0])
lr_base = lrs[idx][0]
for s in range(epoch[0], epoch[1]):
if boundaries:
......@@ -139,7 +150,9 @@ def lr_decay(lrs, epochs, bs, total_image):
boundaries = [step]
lr = lr_base + ratio * (s - epoch[0])
values.append(lr)
print("epoch: [%d], steps: [%d], lr: [%f]" % (s, boundaries[-1], values[-1]))
print("epoch: [%d], steps: [%d], lr: [%f]" %
(s, boundaries[-1], values[-1]))
values.append(lrs[-1])
print("epoch: [%d:], steps: [%d:], lr:[%f]" % (epochs[-1][-1], boundaries[-1], values[-1]))
print("epoch: [%d:], steps: [%d:], lr:[%f]" %
(epochs[-1][-1], boundaries[-1], values[-1]))
return boundaries, values
......@@ -45,7 +45,7 @@ class InceptionV4():
x = self.inceptionC(x, name=str(i + 1))
pool = fluid.layers.pool2d(
input=x, pool_size=8, pool_type='avg', global_pooling=True)
input=x, pool_type='avg', global_pooling=True)
drop = fluid.layers.dropout(x=pool, dropout_prob=0.2)
......
......@@ -125,11 +125,7 @@ class MobileNet():
name="conv6")
input = fluid.layers.pool2d(
input=input,
pool_size=0,
pool_stride=1,
pool_type='avg',
global_pooling=True)
input=input, pool_type='avg', global_pooling=True)
output = fluid.layers.fc(input=input,
size=class_dim,
......
......@@ -89,11 +89,7 @@ class MobileNetV2():
name='conv9')
input = fluid.layers.pool2d(
input=input,
pool_size=7,
pool_stride=1,
pool_type='avg',
global_pooling=True)
input=input, pool_type='avg', global_pooling=True)
output = fluid.layers.fc(input=input,
size=class_dim,
......
......@@ -77,7 +77,7 @@ class ResNet():
name=conv_name)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
input=conv, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
input=pool,
......@@ -96,7 +96,7 @@ class ResNet():
name=conv_name)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
input=conv, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
input=pool,
......
......@@ -57,11 +57,26 @@ class ResNet():
num_filters = [64, 128, 256, 512]
conv = self.conv_bn_layer(
input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1')
input=input,
num_filters=32,
filter_size=3,
stride=2,
act='relu',
name='conv1_1')
conv = self.conv_bn_layer(
input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2')
input=conv,
num_filters=32,
filter_size=3,
stride=1,
act='relu',
name='conv1_2')
conv = self.conv_bn_layer(
input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3')
input=conv,
num_filters=64,
filter_size=3,
stride=1,
act='relu',
name='conv1_3')
conv = fluid.layers.pool2d(
input=conv,
......@@ -74,18 +89,19 @@ class ResNet():
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name="res"+str(block+2)+"a"
conv_name = "res" + str(block + 2) + "a"
else:
conv_name="res"+str(block+2)+"b"+str(i)
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name="res"+str(block+2)+chr(97+i)
conv_name = "res" + str(block + 2) + chr(97 + i)
conv = self.bottleneck_block(
input=conv,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,name=conv_name)
stride=2 if i == 0 and block != 0 else 1,
name=conv_name)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
input=conv, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(input=pool,
size=class_dim,
......@@ -117,13 +133,14 @@ class ResNet():
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
return fluid.layers.batch_norm(input=conv,
return fluid.layers.batch_norm(
input=conv,
act=act,
name=bn_name+'.output.1',
name=bn_name + '.output.1',
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',)
moving_variance_name=bn_name + '_variance', )
def shortcut(self, input, ch_out, stride, name):
ch_in = input.shape[1]
......@@ -134,20 +151,30 @@ class ResNet():
def bottleneck_block(self, input, num_filters, stride, name):
conv0 = self.conv_bn_layer(
input=input, num_filters=num_filters, filter_size=1, act='relu',name=name+"_branch2a")
input=input,
num_filters=num_filters,
filter_size=1,
act='relu',
name=name + "_branch2a")
conv1 = self.conv_bn_layer(
input=conv0,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu',
name=name+"_branch2b")
name=name + "_branch2b")
conv2 = self.conv_bn_layer(
input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c")
input=conv1,
num_filters=num_filters * 4,
filter_size=1,
act=None,
name=name + "_branch2c")
short = self.shortcut(input, num_filters * 4, stride, name=name + "_branch1")
short = self.shortcut(
input, num_filters * 4, stride, name=name + "_branch1")
return fluid.layers.elementwise_add(x=short, y=conv2, act='relu',name=name+".add.output.5")
return fluid.layers.elementwise_add(
x=short, y=conv2, act='relu', name=name + ".add.output.5")
def ResNet50_vc():
......
......@@ -102,7 +102,7 @@ class ResNet():
name=conv_name)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
input=conv, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
......
......@@ -82,7 +82,7 @@ class ResNeXt():
name=conv_name)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
input=conv, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
input=pool,
......
......@@ -65,7 +65,7 @@ class ResNeXt101_wsl():
name=conv_name)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
input=conv, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
input=pool,
......
......@@ -102,7 +102,7 @@ class ResNeXt():
name=conv_name)
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
input=conv, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
out = fluid.layers.fc(
input=pool,
......
......@@ -120,11 +120,7 @@ class SE_ResNeXt():
name=str(n) + '_' + str(i + 1))
pool = fluid.layers.pool2d(
input=conv,
pool_size=7,
pool_type='avg',
global_pooling=True,
use_cudnn=False)
input=conv, pool_type='avg', global_pooling=True, use_cudnn=False)
drop = fluid.layers.dropout(x=pool, dropout_prob=0.5)
stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0)
out = fluid.layers.fc(
......@@ -215,11 +211,7 @@ class SE_ResNeXt():
reduction_ratio,
name=None):
pool = fluid.layers.pool2d(
input=input,
pool_size=0,
pool_type='avg',
global_pooling=True,
use_cudnn=False)
input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
squeeze = fluid.layers.fc(
input=pool,
......
......@@ -146,7 +146,7 @@ class SE_ResNeXt():
name=str(n) + '_' + str(i + 1))
pool = fluid.layers.pool2d(
input=conv, pool_size=7, pool_type='avg', global_pooling=True)
input=conv, pool_type='avg', global_pooling=True)
if layers == 152:
pool = fluid.layers.dropout(x=pool, dropout_prob=0.2)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
......@@ -289,7 +289,7 @@ class SE_ResNeXt():
reduction_ratio,
name=None):
pool = fluid.layers.pool2d(
input=input, pool_size=0, pool_type='avg', global_pooling=True)
input=input, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
squeeze = fluid.layers.fc(
input=pool,
......
......@@ -100,10 +100,8 @@ def train(args):
args=args)
train_py_reader = train_out[-1]
train_fetch_vars = train_out[:-1]
train_fetch_list = []
for var in train_fetch_vars:
var.persistable = True
train_fetch_list.append(var.name)
train_fetch_list = [var.name for var in train_fetch_vars]
test_out = build_program(
is_train=False,
......@@ -112,10 +110,8 @@ def train(args):
args=args)
test_py_reader = test_out[-1]
test_fetch_vars = test_out[:-1]
test_fetch_list = []
for var in test_fetch_vars:
var.persistable = True
test_fetch_list.append(var.name)
test_fetch_list = [var.name for var in test_fetch_vars]
#Create test_prog and set layers' is_test params to True
test_prog = test_prog.clone(for_test=True)
......@@ -208,6 +204,7 @@ def train(args):
list(train_epoch_metrics_avg) + list(test_epoch_metrics_avg),
0, "epoch")
#For now, save model per epoch.
if pass_id % args.save_step == 0:
save_model(args, exe, train_prog, pass_id)
......
......@@ -88,10 +88,8 @@ def parse_args():
add_arg('data_dir', str, "./data/ILSVRC2012/", "The ImageNet dataset root directory.")
add_arg('pretrained_model', str, None, "Whether to load pretrained model.")
add_arg('checkpoint', str, None, "Whether to resume checkpoint.")
add_arg('save_params', str, "./output", "Whether to save params.")
add_arg('print_step', int, 10, "The steps interval to print logs")
add_arg('save_step', int, 100, "The steps interval to save checkpoints")
add_arg('save_step', int, 1, "The steps interval to save checkpoints")
# SOLVER AND HYPERPARAMETERS
add_arg('model', str, "ResNet50", "The name of network.")
......@@ -121,7 +119,6 @@ def parse_args():
parser.add_argument('--image_std', nargs='+', type=float, default=[0.229, 0.224, 0.225], help="The std of input image data")
# SWITCH
add_arg('use_inplace', bool, True, "Whether to use inplace memory optimization.")
#NOTE: (2019/08/08) FP16 is moving to PaddlePaddle/Fleet now
#add_arg('use_fp16', bool, False, "Whether to enable half precision training with fp16." )
#add_arg('scale_loss', float, 1.0, "The value of scale_loss for fp16." )
......@@ -371,7 +368,7 @@ def best_strategy_compiled(args, program, loss):
return program
else:
build_strategy = fluid.compiler.BuildStrategy()
build_strategy.enable_inplace = args.use_inplace
build_strategy.enable_inplace = True
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = fluid.core.get_cuda_device_count()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册