提交 80cbdf27 编写于 作者: C chajchaj 提交者: ruri

up to paddle 1.7 (#4216)

上级 c98dbafd
......@@ -23,7 +23,7 @@ import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework
import math
......@@ -32,7 +32,7 @@ import sys
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
filter_size,
num_filters,
stride,
......@@ -42,10 +42,10 @@ class ConvBNLayer(fluid.dygraph.Layer):
act='relu',
use_cudnn=True,
name=None):
super(ConvBNLayer, self).__init__(name_scope)
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
......@@ -58,13 +58,12 @@ class ConvBNLayer(fluid.dygraph.Layer):
bias_attr=False)
self._batch_norm = BatchNorm(
self.full_name(),
num_filters,
act=act,
param_attr=ParamAttr(name="_bn" + "_scale"),
bias_attr=ParamAttr(name="_bn" + "_offset"),
moving_mean_name="_bn" + '_mean',
moving_variance_name="_bn" + '_variance')
param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"),
bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"),
moving_mean_name=self.full_name() + "_bn" + '_mean',
moving_variance_name=self.full_name() + "_bn" + '_variance')
def forward(self, inputs):
y = self._conv(inputs)
......@@ -74,17 +73,17 @@ class ConvBNLayer(fluid.dygraph.Layer):
class DepthwiseSeparable(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters1,
num_filters2,
num_groups,
stride,
scale,
name=None):
super(DepthwiseSeparable, self).__init__(name_scope)
super(DepthwiseSeparable, self).__init__()
self._depthwise_conv = ConvBNLayer(
name_scope="dw",
num_channels=num_channels,
num_filters=int(num_filters1 * scale),
filter_size=3,
stride=stride,
......@@ -93,7 +92,7 @@ class DepthwiseSeparable(fluid.dygraph.Layer):
use_cudnn=False)
self._pointwise_conv = ConvBNLayer(
name_scope="sep",
num_channels=int(num_filters1 * scale),
filter_size=1,
num_filters=int(num_filters2 * scale),
stride=1,
......@@ -106,13 +105,13 @@ class DepthwiseSeparable(fluid.dygraph.Layer):
class MobileNetV1(fluid.dygraph.Layer):
def __init__(self, name_scope, scale=1.0, class_dim=102):
super(MobileNetV1, self).__init__(name_scope)
def __init__(self, scale=1.0, class_dim=1000):
super(MobileNetV1, self).__init__()
self.scale = scale
self.dwsl = []
self.conv1 = ConvBNLayer(
name_scope="conv1",
num_channels=3,
filter_size=3,
channels=3,
num_filters=int(32 * scale),
......@@ -121,7 +120,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws21 = self.add_sublayer(
sublayer=DepthwiseSeparable(
name_scope="conv2_1",
num_channels=int(32 * scale),
num_filters1=32,
num_filters2=64,
num_groups=32,
......@@ -132,7 +131,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws22 = self.add_sublayer(
sublayer=DepthwiseSeparable(
name_scope="conv2_2",
num_channels=int(64 * scale),
num_filters1=64,
num_filters2=128,
num_groups=64,
......@@ -143,7 +142,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws31 = self.add_sublayer(
sublayer=DepthwiseSeparable(
name_scope="conv3_1",
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=128,
num_groups=128,
......@@ -154,7 +153,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws32 = self.add_sublayer(
sublayer=DepthwiseSeparable(
name_scope="conv3_2",
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=256,
num_groups=128,
......@@ -165,7 +164,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws41 = self.add_sublayer(
sublayer=DepthwiseSeparable(
name_scope="conv4_1",
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=256,
num_groups=256,
......@@ -176,7 +175,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws42 = self.add_sublayer(
sublayer=DepthwiseSeparable(
name_scope="conv4_2",
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=512,
num_groups=256,
......@@ -188,7 +187,7 @@ class MobileNetV1(fluid.dygraph.Layer):
for i in range(5):
tmp = self.add_sublayer(
sublayer=DepthwiseSeparable(
name_scope="conv5_" + str(i + 1),
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=512,
num_groups=512,
......@@ -199,7 +198,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws56 = self.add_sublayer(
sublayer=DepthwiseSeparable(
name_scope="conv5_6",
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=1024,
num_groups=512,
......@@ -210,7 +209,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws6 = self.add_sublayer(
sublayer=DepthwiseSeparable(
name_scope="conv6",
num_channels=int(1024 * scale),
num_filters1=1024,
num_filters2=1024,
num_groups=1024,
......@@ -219,21 +218,20 @@ class MobileNetV1(fluid.dygraph.Layer):
name="conv6")
self.dwsl.append(dws6)
self.pool2d_avg = Pool2D(
name_scope="pool", pool_type='avg', global_pooling=True)
self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
self.out = FC(name_scope="fc",
size=class_dim,
param_attr=ParamAttr(
initializer=MSRA(),
name=self.full_name() + "fc7_weights"),
bias_attr=ParamAttr(name="fc7_offset"))
self.out = Linear(
int(1024 * scale),
class_dim,
param_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "fc7_weights"),
bias_attr=ParamAttr(name="fc7_offset"))
def forward(self, inputs):
y = self.conv1(inputs)
idx = 0
for dws in self.dwsl:
y = dws(y)
y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, 1024])
y = self.out(y)
return y
......@@ -25,7 +25,7 @@ import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework
......@@ -36,19 +36,19 @@ import sys
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
name=None,
use_cudnn=True):
super(ConvBNLayer, self).__init__(name)
super(ConvBNLayer, self).__init__()
tmp_param = ParamAttr(name=name + "_weights")
tmp_param = ParamAttr(name=self.full_name() + "_weights")
self._conv = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
......@@ -60,12 +60,11 @@ class ConvBNLayer(fluid.dygraph.Layer):
bias_attr=False)
self._batch_norm = BatchNorm(
self.full_name(),
num_filters,
param_attr=ParamAttr(name=name + "_bn" + "_scale"),
bias_attr=ParamAttr(name=name + "_bn" + "_offset"),
moving_mean_name=name + "_bn" + '_mean',
moving_variance_name=name + "_bn" + '_variance')
param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"),
bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"),
moving_mean_name=self.full_name() + "_bn" + '_mean',
moving_variance_name=self.full_name() + "_bn" + '_variance')
def forward(self, inputs, if_act=True):
y = self._conv(inputs)
......@@ -76,18 +75,19 @@ class ConvBNLayer(fluid.dygraph.Layer):
class InvertedResidualUnit(fluid.dygraph.Layer):
def __init__(self,
num_in_filter,
num_filters,
stride,
filter_size,
padding,
expansion_factor,
name=None):
super(InvertedResidualUnit, self).__init__(name)
def __init__(
self,
num_channels,
num_in_filter,
num_filters,
stride,
filter_size,
padding,
expansion_factor, ):
super(InvertedResidualUnit, self).__init__()
num_expfilter = int(round(num_in_filter * expansion_factor))
self._expand_conv = ConvBNLayer(
name=name + "_expand",
num_channels=num_channels,
num_filters=num_expfilter,
filter_size=1,
stride=1,
......@@ -95,7 +95,7 @@ class InvertedResidualUnit(fluid.dygraph.Layer):
num_groups=1)
self._bottleneck_conv = ConvBNLayer(
name=name + "_dwise",
num_channels=num_expfilter,
num_filters=num_expfilter,
filter_size=filter_size,
stride=stride,
......@@ -104,7 +104,7 @@ class InvertedResidualUnit(fluid.dygraph.Layer):
use_cudnn=False)
self._linear_conv = ConvBNLayer(
name=name + "_linear",
num_channels=num_expfilter,
num_filters=num_filters,
filter_size=1,
stride=1,
......@@ -121,11 +121,11 @@ class InvertedResidualUnit(fluid.dygraph.Layer):
class InvresiBlocks(fluid.dygraph.Layer):
def __init__(self, in_c, t, c, n, s, name=None):
super(InvresiBlocks, self).__init__(name)
def __init__(self, in_c, t, c, n, s):
super(InvresiBlocks, self).__init__()
self._first_block = InvertedResidualUnit(
name=name + "_1",
num_channels=in_c,
num_in_filter=in_c,
num_filters=c,
stride=s,
......@@ -137,14 +137,14 @@ class InvresiBlocks(fluid.dygraph.Layer):
for i in range(1, n):
tmp = self.add_sublayer(
sublayer=InvertedResidualUnit(
name=name + "_" + str(i + 1),
num_channels=c,
num_in_filter=c,
num_filters=c,
stride=1,
filter_size=3,
padding=1,
expansion_factor=t),
name=name + "_" + str(i + 1))
name=self.full_name() + "_" + str(i + 1))
self._inv_blocks.append(tmp)
def forward(self, inputs):
......@@ -155,8 +155,8 @@ class InvresiBlocks(fluid.dygraph.Layer):
class MobileNetV2(fluid.dygraph.Layer):
def __init__(self, name, class_dim=1000, scale=1.0):
super(MobileNetV2, self).__init__(name)
def __init__(self, class_dim=1000, scale=1.0):
super(MobileNetV2, self).__init__()
self.scale = scale
self.class_dim = class_dim
......@@ -172,7 +172,7 @@ class MobileNetV2(fluid.dygraph.Layer):
#1. conv1
self._conv1 = ConvBNLayer(
name="conv1_1",
num_channels=3,
num_filters=int(32 * scale),
filter_size=3,
stride=2,
......@@ -187,34 +187,30 @@ class MobileNetV2(fluid.dygraph.Layer):
i += 1
tmp = self.add_sublayer(
sublayer=InvresiBlocks(
name='conv' + str(i),
in_c=in_c,
t=t,
c=int(c * scale),
n=n,
s=s),
in_c=in_c, t=t, c=int(c * scale), n=n, s=s),
name='conv' + str(i))
self._invl.append(tmp)
in_c = int(c * scale)
#3. last_conv
self._out_c = int(1280 * scale) if scale > 1.0 else 1280
self._conv9 = ConvBNLayer(
name="conv9",
num_filters=int(1280 * scale) if scale > 1.0 else 1280,
num_channels=in_c,
num_filters=self._out_c,
filter_size=1,
stride=1,
padding=0)
#4. pool
self._pool2d_avg = Pool2D(
name_scope="pool", pool_type='avg', global_pooling=True)
self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
#5. fc
tmp_param = ParamAttr(name="fc10_weights")
self._fc = FC(name_scope="fc",
size=class_dim,
param_attr=tmp_param,
bias_attr=ParamAttr(name="fc10_offset"))
tmp_param = ParamAttr(name=self.full_name() + "fc10_weights")
self._fc = Linear(
self._out_c,
class_dim,
param_attr=tmp_param,
bias_attr=ParamAttr(name="fc10_offset"))
def forward(self, inputs):
y = self._conv1(inputs, if_act=True)
......@@ -222,5 +218,6 @@ class MobileNetV2(fluid.dygraph.Layer):
y = inv(y)
y = self._conv9(y, if_act=True)
y = self._pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self._out_c])
y = self._fc(y)
return y
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --log_dir ./mylog.time train.py --use_data_parallel 1 --batch_size=256 --reader_thread=8 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1
python3 -m paddle.distributed.launch --log_dir ./mylog.time train.py --use_data_parallel 1 --batch_size=256 --reader_thread=8 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1
export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --log_dir ./mylog.time train.py --use_data_parallel 1 --batch_size=256 --reader_thread=8 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV2
python3 -m paddle.distributed.launch --log_dir ./mylog.time train.py --use_data_parallel 1 --batch_size=256 --reader_thread=8 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV2
export CUDA_VISIBLE_DEVICES=0
python train.py --batch_size=256 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1
python3 train.py --batch_size=256 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1
export CUDA_VISIBLE_DEVICES=0
python train.py --batch_size=128 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --model=MobileNetV2
python3 train.py --batch_size=128 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --model=MobileNetV2
......@@ -27,7 +27,7 @@ import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC
#from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework
......@@ -95,11 +95,10 @@ def train_mobilenet():
net = None
if args.model == "MobileNetV1":
net = MobileNetV1("mobilenet_v1", class_dim=args.class_dim)
net = MobileNetV1(class_dim=args.class_dim)
para_name = 'mobilenet_v1_params'
elif args.model == "MobileNetV2":
net = MobileNetV2(
name="mobilenet_v2", class_dim=args.class_dim, scale=1.0)
net = MobileNetV2(class_dim=args.class_dim, scale=1.0)
para_name = 'mobilenet_v2_params'
else:
print(
......@@ -107,7 +106,7 @@ def train_mobilenet():
)
exit()
optimizer = create_optimizer(args)
optimizer = create_optimizer(args=args, parameter_list=net.parameters())
if args.use_data_parallel:
net = fluid.dygraph.parallel.DataParallel(net, strategy)
train_data_loader, train_data = utility.create_data_loader(
......
......@@ -148,7 +148,8 @@ class Optimizer(object):
"""
def __init__(self, args):
def __init__(self, args, parameter_list):
self.parameter_list = parameter_list
self.batch_size = args.batch_size
self.lr = args.lr
self.lr_strategy = args.lr_strategy
......@@ -175,7 +176,8 @@ class Optimizer(object):
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
regularization=fluid.regularizer.L2Decay(self.l2_decay),
parameter_list=self.parameter_list)
return optimizer
def cosine_decay(self):
......@@ -192,7 +194,8 @@ class Optimizer(object):
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
regularization=fluid.regularizer.L2Decay(self.l2_decay),
parameter_list=self.parameter_list)
return optimizer
def cosine_decay_warmup(self):
......@@ -209,7 +212,8 @@ class Optimizer(object):
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
regularization=fluid.regularizer.L2Decay(self.l2_decay),
parameter_list=self.parameter_list)
return optimizer
def exponential_decay_warmup(self):
......@@ -230,7 +234,8 @@ class Optimizer(object):
regularization=fluid.regularizer.L2Decay(self.l2_decay),
momentum=self.momentum_rate,
rho=0.9,
epsilon=0.001)
epsilon=0.001,
parameter_list=self.parameter_list)
return optimizer
def linear_decay(self):
......@@ -246,7 +251,8 @@ class Optimizer(object):
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
regularization=fluid.regularizer.L2Decay(self.l2_decay),
parameter_list=self.parameter_list)
return optimizer
......@@ -257,7 +263,8 @@ class Optimizer(object):
an adam_decay optimizer
"""
return fluid.optimizer.Adam(learning_rate=self.lr)
return fluid.optimizer.Adam(
learning_rate=self.lr, parameter_list=self.parameter_list)
def cosine_decay_RMSProp(self):
"""cosine decay with RMSProp optimizer
......@@ -275,7 +282,8 @@ class Optimizer(object):
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay),
# Apply epsilon=1 on ImageNet dataset.
epsilon=1)
epsilon=1,
parameter_list=self.parameter_list)
return optimizer
def default_decay(self):
......@@ -288,12 +296,13 @@ class Optimizer(object):
optimizer = fluid.optimizer.Momentum(
learning_rate=self.lr,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
regularization=fluid.regularizer.L2Decay(self.l2_decay),
parameter_list=self.parameter_list)
return optimizer
def create_optimizer(args):
Opt = Optimizer(args)
def create_optimizer(args, parameter_list):
Opt = Optimizer(args, parameter_list)
optimizer = getattr(Opt, args.lr_strategy)()
return optimizer
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册