提交 80cbdf27 编写于 作者: C chajchaj 提交者: ruri

up to paddle 1.7 (#4216)

上级 c98dbafd
...@@ -23,7 +23,7 @@ import paddle.fluid as fluid ...@@ -23,7 +23,7 @@ import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework from paddle.fluid import framework
import math import math
...@@ -32,7 +32,7 @@ import sys ...@@ -32,7 +32,7 @@ import sys
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
filter_size, filter_size,
num_filters, num_filters,
stride, stride,
...@@ -42,10 +42,10 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -42,10 +42,10 @@ class ConvBNLayer(fluid.dygraph.Layer):
act='relu', act='relu',
use_cudnn=True, use_cudnn=True,
name=None): name=None):
super(ConvBNLayer, self).__init__(name_scope) super(ConvBNLayer, self).__init__()
self._conv = Conv2D( self._conv = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
...@@ -58,13 +58,12 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -58,13 +58,12 @@ class ConvBNLayer(fluid.dygraph.Layer):
bias_attr=False) bias_attr=False)
self._batch_norm = BatchNorm( self._batch_norm = BatchNorm(
self.full_name(),
num_filters, num_filters,
act=act, act=act,
param_attr=ParamAttr(name="_bn" + "_scale"), param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"),
bias_attr=ParamAttr(name="_bn" + "_offset"), bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"),
moving_mean_name="_bn" + '_mean', moving_mean_name=self.full_name() + "_bn" + '_mean',
moving_variance_name="_bn" + '_variance') moving_variance_name=self.full_name() + "_bn" + '_variance')
def forward(self, inputs): def forward(self, inputs):
y = self._conv(inputs) y = self._conv(inputs)
...@@ -74,17 +73,17 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -74,17 +73,17 @@ class ConvBNLayer(fluid.dygraph.Layer):
class DepthwiseSeparable(fluid.dygraph.Layer): class DepthwiseSeparable(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters1, num_filters1,
num_filters2, num_filters2,
num_groups, num_groups,
stride, stride,
scale, scale,
name=None): name=None):
super(DepthwiseSeparable, self).__init__(name_scope) super(DepthwiseSeparable, self).__init__()
self._depthwise_conv = ConvBNLayer( self._depthwise_conv = ConvBNLayer(
name_scope="dw", num_channels=num_channels,
num_filters=int(num_filters1 * scale), num_filters=int(num_filters1 * scale),
filter_size=3, filter_size=3,
stride=stride, stride=stride,
...@@ -93,7 +92,7 @@ class DepthwiseSeparable(fluid.dygraph.Layer): ...@@ -93,7 +92,7 @@ class DepthwiseSeparable(fluid.dygraph.Layer):
use_cudnn=False) use_cudnn=False)
self._pointwise_conv = ConvBNLayer( self._pointwise_conv = ConvBNLayer(
name_scope="sep", num_channels=int(num_filters1 * scale),
filter_size=1, filter_size=1,
num_filters=int(num_filters2 * scale), num_filters=int(num_filters2 * scale),
stride=1, stride=1,
...@@ -106,13 +105,13 @@ class DepthwiseSeparable(fluid.dygraph.Layer): ...@@ -106,13 +105,13 @@ class DepthwiseSeparable(fluid.dygraph.Layer):
class MobileNetV1(fluid.dygraph.Layer): class MobileNetV1(fluid.dygraph.Layer):
def __init__(self, name_scope, scale=1.0, class_dim=102): def __init__(self, scale=1.0, class_dim=1000):
super(MobileNetV1, self).__init__(name_scope) super(MobileNetV1, self).__init__()
self.scale = scale self.scale = scale
self.dwsl = [] self.dwsl = []
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
name_scope="conv1", num_channels=3,
filter_size=3, filter_size=3,
channels=3, channels=3,
num_filters=int(32 * scale), num_filters=int(32 * scale),
...@@ -121,7 +120,7 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -121,7 +120,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws21 = self.add_sublayer( dws21 = self.add_sublayer(
sublayer=DepthwiseSeparable( sublayer=DepthwiseSeparable(
name_scope="conv2_1", num_channels=int(32 * scale),
num_filters1=32, num_filters1=32,
num_filters2=64, num_filters2=64,
num_groups=32, num_groups=32,
...@@ -132,7 +131,7 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -132,7 +131,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws22 = self.add_sublayer( dws22 = self.add_sublayer(
sublayer=DepthwiseSeparable( sublayer=DepthwiseSeparable(
name_scope="conv2_2", num_channels=int(64 * scale),
num_filters1=64, num_filters1=64,
num_filters2=128, num_filters2=128,
num_groups=64, num_groups=64,
...@@ -143,7 +142,7 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -143,7 +142,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws31 = self.add_sublayer( dws31 = self.add_sublayer(
sublayer=DepthwiseSeparable( sublayer=DepthwiseSeparable(
name_scope="conv3_1", num_channels=int(128 * scale),
num_filters1=128, num_filters1=128,
num_filters2=128, num_filters2=128,
num_groups=128, num_groups=128,
...@@ -154,7 +153,7 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -154,7 +153,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws32 = self.add_sublayer( dws32 = self.add_sublayer(
sublayer=DepthwiseSeparable( sublayer=DepthwiseSeparable(
name_scope="conv3_2", num_channels=int(128 * scale),
num_filters1=128, num_filters1=128,
num_filters2=256, num_filters2=256,
num_groups=128, num_groups=128,
...@@ -165,7 +164,7 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -165,7 +164,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws41 = self.add_sublayer( dws41 = self.add_sublayer(
sublayer=DepthwiseSeparable( sublayer=DepthwiseSeparable(
name_scope="conv4_1", num_channels=int(256 * scale),
num_filters1=256, num_filters1=256,
num_filters2=256, num_filters2=256,
num_groups=256, num_groups=256,
...@@ -176,7 +175,7 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -176,7 +175,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws42 = self.add_sublayer( dws42 = self.add_sublayer(
sublayer=DepthwiseSeparable( sublayer=DepthwiseSeparable(
name_scope="conv4_2", num_channels=int(256 * scale),
num_filters1=256, num_filters1=256,
num_filters2=512, num_filters2=512,
num_groups=256, num_groups=256,
...@@ -188,7 +187,7 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -188,7 +187,7 @@ class MobileNetV1(fluid.dygraph.Layer):
for i in range(5): for i in range(5):
tmp = self.add_sublayer( tmp = self.add_sublayer(
sublayer=DepthwiseSeparable( sublayer=DepthwiseSeparable(
name_scope="conv5_" + str(i + 1), num_channels=int(512 * scale),
num_filters1=512, num_filters1=512,
num_filters2=512, num_filters2=512,
num_groups=512, num_groups=512,
...@@ -199,7 +198,7 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -199,7 +198,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws56 = self.add_sublayer( dws56 = self.add_sublayer(
sublayer=DepthwiseSeparable( sublayer=DepthwiseSeparable(
name_scope="conv5_6", num_channels=int(512 * scale),
num_filters1=512, num_filters1=512,
num_filters2=1024, num_filters2=1024,
num_groups=512, num_groups=512,
...@@ -210,7 +209,7 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -210,7 +209,7 @@ class MobileNetV1(fluid.dygraph.Layer):
dws6 = self.add_sublayer( dws6 = self.add_sublayer(
sublayer=DepthwiseSeparable( sublayer=DepthwiseSeparable(
name_scope="conv6", num_channels=int(1024 * scale),
num_filters1=1024, num_filters1=1024,
num_filters2=1024, num_filters2=1024,
num_groups=1024, num_groups=1024,
...@@ -219,21 +218,20 @@ class MobileNetV1(fluid.dygraph.Layer): ...@@ -219,21 +218,20 @@ class MobileNetV1(fluid.dygraph.Layer):
name="conv6") name="conv6")
self.dwsl.append(dws6) self.dwsl.append(dws6)
self.pool2d_avg = Pool2D( self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
name_scope="pool", pool_type='avg', global_pooling=True)
self.out = FC(name_scope="fc", self.out = Linear(
size=class_dim, int(1024 * scale),
param_attr=ParamAttr( class_dim,
initializer=MSRA(), param_attr=ParamAttr(
name=self.full_name() + "fc7_weights"), initializer=MSRA(), name=self.full_name() + "fc7_weights"),
bias_attr=ParamAttr(name="fc7_offset")) bias_attr=ParamAttr(name="fc7_offset"))
def forward(self, inputs): def forward(self, inputs):
y = self.conv1(inputs) y = self.conv1(inputs)
idx = 0
for dws in self.dwsl: for dws in self.dwsl:
y = dws(y) y = dws(y)
y = self.pool2d_avg(y) y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, 1024])
y = self.out(y) y = self.out(y)
return y return y
...@@ -25,7 +25,7 @@ import paddle.fluid as fluid ...@@ -25,7 +25,7 @@ import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework from paddle.fluid import framework
...@@ -36,19 +36,19 @@ import sys ...@@ -36,19 +36,19 @@ import sys
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
num_channels,
filter_size, filter_size,
num_filters, num_filters,
stride, stride,
padding, padding,
channels=None, channels=None,
num_groups=1, num_groups=1,
name=None,
use_cudnn=True): use_cudnn=True):
super(ConvBNLayer, self).__init__(name) super(ConvBNLayer, self).__init__()
tmp_param = ParamAttr(name=name + "_weights") tmp_param = ParamAttr(name=self.full_name() + "_weights")
self._conv = Conv2D( self._conv = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
...@@ -60,12 +60,11 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -60,12 +60,11 @@ class ConvBNLayer(fluid.dygraph.Layer):
bias_attr=False) bias_attr=False)
self._batch_norm = BatchNorm( self._batch_norm = BatchNorm(
self.full_name(),
num_filters, num_filters,
param_attr=ParamAttr(name=name + "_bn" + "_scale"), param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"),
bias_attr=ParamAttr(name=name + "_bn" + "_offset"), bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"),
moving_mean_name=name + "_bn" + '_mean', moving_mean_name=self.full_name() + "_bn" + '_mean',
moving_variance_name=name + "_bn" + '_variance') moving_variance_name=self.full_name() + "_bn" + '_variance')
def forward(self, inputs, if_act=True): def forward(self, inputs, if_act=True):
y = self._conv(inputs) y = self._conv(inputs)
...@@ -76,18 +75,19 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -76,18 +75,19 @@ class ConvBNLayer(fluid.dygraph.Layer):
class InvertedResidualUnit(fluid.dygraph.Layer): class InvertedResidualUnit(fluid.dygraph.Layer):
def __init__(self, def __init__(
num_in_filter, self,
num_filters, num_channels,
stride, num_in_filter,
filter_size, num_filters,
padding, stride,
expansion_factor, filter_size,
name=None): padding,
super(InvertedResidualUnit, self).__init__(name) expansion_factor, ):
super(InvertedResidualUnit, self).__init__()
num_expfilter = int(round(num_in_filter * expansion_factor)) num_expfilter = int(round(num_in_filter * expansion_factor))
self._expand_conv = ConvBNLayer( self._expand_conv = ConvBNLayer(
name=name + "_expand", num_channels=num_channels,
num_filters=num_expfilter, num_filters=num_expfilter,
filter_size=1, filter_size=1,
stride=1, stride=1,
...@@ -95,7 +95,7 @@ class InvertedResidualUnit(fluid.dygraph.Layer): ...@@ -95,7 +95,7 @@ class InvertedResidualUnit(fluid.dygraph.Layer):
num_groups=1) num_groups=1)
self._bottleneck_conv = ConvBNLayer( self._bottleneck_conv = ConvBNLayer(
name=name + "_dwise", num_channels=num_expfilter,
num_filters=num_expfilter, num_filters=num_expfilter,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
...@@ -104,7 +104,7 @@ class InvertedResidualUnit(fluid.dygraph.Layer): ...@@ -104,7 +104,7 @@ class InvertedResidualUnit(fluid.dygraph.Layer):
use_cudnn=False) use_cudnn=False)
self._linear_conv = ConvBNLayer( self._linear_conv = ConvBNLayer(
name=name + "_linear", num_channels=num_expfilter,
num_filters=num_filters, num_filters=num_filters,
filter_size=1, filter_size=1,
stride=1, stride=1,
...@@ -121,11 +121,11 @@ class InvertedResidualUnit(fluid.dygraph.Layer): ...@@ -121,11 +121,11 @@ class InvertedResidualUnit(fluid.dygraph.Layer):
class InvresiBlocks(fluid.dygraph.Layer): class InvresiBlocks(fluid.dygraph.Layer):
def __init__(self, in_c, t, c, n, s, name=None): def __init__(self, in_c, t, c, n, s):
super(InvresiBlocks, self).__init__(name) super(InvresiBlocks, self).__init__()
self._first_block = InvertedResidualUnit( self._first_block = InvertedResidualUnit(
name=name + "_1", num_channels=in_c,
num_in_filter=in_c, num_in_filter=in_c,
num_filters=c, num_filters=c,
stride=s, stride=s,
...@@ -137,14 +137,14 @@ class InvresiBlocks(fluid.dygraph.Layer): ...@@ -137,14 +137,14 @@ class InvresiBlocks(fluid.dygraph.Layer):
for i in range(1, n): for i in range(1, n):
tmp = self.add_sublayer( tmp = self.add_sublayer(
sublayer=InvertedResidualUnit( sublayer=InvertedResidualUnit(
name=name + "_" + str(i + 1), num_channels=c,
num_in_filter=c, num_in_filter=c,
num_filters=c, num_filters=c,
stride=1, stride=1,
filter_size=3, filter_size=3,
padding=1, padding=1,
expansion_factor=t), expansion_factor=t),
name=name + "_" + str(i + 1)) name=self.full_name() + "_" + str(i + 1))
self._inv_blocks.append(tmp) self._inv_blocks.append(tmp)
def forward(self, inputs): def forward(self, inputs):
...@@ -155,8 +155,8 @@ class InvresiBlocks(fluid.dygraph.Layer): ...@@ -155,8 +155,8 @@ class InvresiBlocks(fluid.dygraph.Layer):
class MobileNetV2(fluid.dygraph.Layer): class MobileNetV2(fluid.dygraph.Layer):
def __init__(self, name, class_dim=1000, scale=1.0): def __init__(self, class_dim=1000, scale=1.0):
super(MobileNetV2, self).__init__(name) super(MobileNetV2, self).__init__()
self.scale = scale self.scale = scale
self.class_dim = class_dim self.class_dim = class_dim
...@@ -172,7 +172,7 @@ class MobileNetV2(fluid.dygraph.Layer): ...@@ -172,7 +172,7 @@ class MobileNetV2(fluid.dygraph.Layer):
#1. conv1 #1. conv1
self._conv1 = ConvBNLayer( self._conv1 = ConvBNLayer(
name="conv1_1", num_channels=3,
num_filters=int(32 * scale), num_filters=int(32 * scale),
filter_size=3, filter_size=3,
stride=2, stride=2,
...@@ -187,34 +187,30 @@ class MobileNetV2(fluid.dygraph.Layer): ...@@ -187,34 +187,30 @@ class MobileNetV2(fluid.dygraph.Layer):
i += 1 i += 1
tmp = self.add_sublayer( tmp = self.add_sublayer(
sublayer=InvresiBlocks( sublayer=InvresiBlocks(
name='conv' + str(i), in_c=in_c, t=t, c=int(c * scale), n=n, s=s),
in_c=in_c,
t=t,
c=int(c * scale),
n=n,
s=s),
name='conv' + str(i)) name='conv' + str(i))
self._invl.append(tmp) self._invl.append(tmp)
in_c = int(c * scale) in_c = int(c * scale)
#3. last_conv #3. last_conv
self._out_c = int(1280 * scale) if scale > 1.0 else 1280
self._conv9 = ConvBNLayer( self._conv9 = ConvBNLayer(
name="conv9", num_channels=in_c,
num_filters=int(1280 * scale) if scale > 1.0 else 1280, num_filters=self._out_c,
filter_size=1, filter_size=1,
stride=1, stride=1,
padding=0) padding=0)
#4. pool #4. pool
self._pool2d_avg = Pool2D( self._pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
name_scope="pool", pool_type='avg', global_pooling=True)
#5. fc #5. fc
tmp_param = ParamAttr(name="fc10_weights") tmp_param = ParamAttr(name=self.full_name() + "fc10_weights")
self._fc = FC(name_scope="fc", self._fc = Linear(
size=class_dim, self._out_c,
param_attr=tmp_param, class_dim,
bias_attr=ParamAttr(name="fc10_offset")) param_attr=tmp_param,
bias_attr=ParamAttr(name="fc10_offset"))
def forward(self, inputs): def forward(self, inputs):
y = self._conv1(inputs, if_act=True) y = self._conv1(inputs, if_act=True)
...@@ -222,5 +218,6 @@ class MobileNetV2(fluid.dygraph.Layer): ...@@ -222,5 +218,6 @@ class MobileNetV2(fluid.dygraph.Layer):
y = inv(y) y = inv(y)
y = self._conv9(y, if_act=True) y = self._conv9(y, if_act=True)
y = self._pool2d_avg(y) y = self._pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self._out_c])
y = self._fc(y) y = self._fc(y)
return y return y
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --log_dir ./mylog.time train.py --use_data_parallel 1 --batch_size=256 --reader_thread=8 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1 python3 -m paddle.distributed.launch --log_dir ./mylog.time train.py --use_data_parallel 1 --batch_size=256 --reader_thread=8 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3
python -m paddle.distributed.launch --log_dir ./mylog.time train.py --use_data_parallel 1 --batch_size=256 --reader_thread=8 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV2 python3 -m paddle.distributed.launch --log_dir ./mylog.time train.py --use_data_parallel 1 --batch_size=256 --reader_thread=8 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV2
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
python train.py --batch_size=256 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1 python3 train.py --batch_size=256 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --l2_decay=3e-5 --model=MobileNetV1
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
python train.py --batch_size=128 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --model=MobileNetV2 python3 train.py --batch_size=128 --total_images=1281167 --class_dim=1000 --image_shape=3,224,224 --model_save_dir=output/ --lr_strategy=piecewise_decay --lr=0.1 --data_dir=../../PaddleCV/image_classification/data/ILSVRC2012 --model=MobileNetV2
...@@ -27,7 +27,7 @@ import paddle.fluid as fluid ...@@ -27,7 +27,7 @@ import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC #from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework from paddle.fluid import framework
...@@ -95,11 +95,10 @@ def train_mobilenet(): ...@@ -95,11 +95,10 @@ def train_mobilenet():
net = None net = None
if args.model == "MobileNetV1": if args.model == "MobileNetV1":
net = MobileNetV1("mobilenet_v1", class_dim=args.class_dim) net = MobileNetV1(class_dim=args.class_dim)
para_name = 'mobilenet_v1_params' para_name = 'mobilenet_v1_params'
elif args.model == "MobileNetV2": elif args.model == "MobileNetV2":
net = MobileNetV2( net = MobileNetV2(class_dim=args.class_dim, scale=1.0)
name="mobilenet_v2", class_dim=args.class_dim, scale=1.0)
para_name = 'mobilenet_v2_params' para_name = 'mobilenet_v2_params'
else: else:
print( print(
...@@ -107,7 +106,7 @@ def train_mobilenet(): ...@@ -107,7 +106,7 @@ def train_mobilenet():
) )
exit() exit()
optimizer = create_optimizer(args) optimizer = create_optimizer(args=args, parameter_list=net.parameters())
if args.use_data_parallel: if args.use_data_parallel:
net = fluid.dygraph.parallel.DataParallel(net, strategy) net = fluid.dygraph.parallel.DataParallel(net, strategy)
train_data_loader, train_data = utility.create_data_loader( train_data_loader, train_data = utility.create_data_loader(
......
...@@ -148,7 +148,8 @@ class Optimizer(object): ...@@ -148,7 +148,8 @@ class Optimizer(object):
""" """
def __init__(self, args): def __init__(self, args, parameter_list):
self.parameter_list = parameter_list
self.batch_size = args.batch_size self.batch_size = args.batch_size
self.lr = args.lr self.lr = args.lr
self.lr_strategy = args.lr_strategy self.lr_strategy = args.lr_strategy
...@@ -175,7 +176,8 @@ class Optimizer(object): ...@@ -175,7 +176,8 @@ class Optimizer(object):
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate, learning_rate=learning_rate,
momentum=self.momentum_rate, momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay)) regularization=fluid.regularizer.L2Decay(self.l2_decay),
parameter_list=self.parameter_list)
return optimizer return optimizer
def cosine_decay(self): def cosine_decay(self):
...@@ -192,7 +194,8 @@ class Optimizer(object): ...@@ -192,7 +194,8 @@ class Optimizer(object):
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate, learning_rate=learning_rate,
momentum=self.momentum_rate, momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay)) regularization=fluid.regularizer.L2Decay(self.l2_decay),
parameter_list=self.parameter_list)
return optimizer return optimizer
def cosine_decay_warmup(self): def cosine_decay_warmup(self):
...@@ -209,7 +212,8 @@ class Optimizer(object): ...@@ -209,7 +212,8 @@ class Optimizer(object):
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate, learning_rate=learning_rate,
momentum=self.momentum_rate, momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay)) regularization=fluid.regularizer.L2Decay(self.l2_decay),
parameter_list=self.parameter_list)
return optimizer return optimizer
def exponential_decay_warmup(self): def exponential_decay_warmup(self):
...@@ -230,7 +234,8 @@ class Optimizer(object): ...@@ -230,7 +234,8 @@ class Optimizer(object):
regularization=fluid.regularizer.L2Decay(self.l2_decay), regularization=fluid.regularizer.L2Decay(self.l2_decay),
momentum=self.momentum_rate, momentum=self.momentum_rate,
rho=0.9, rho=0.9,
epsilon=0.001) epsilon=0.001,
parameter_list=self.parameter_list)
return optimizer return optimizer
def linear_decay(self): def linear_decay(self):
...@@ -246,7 +251,8 @@ class Optimizer(object): ...@@ -246,7 +251,8 @@ class Optimizer(object):
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate, learning_rate=learning_rate,
momentum=self.momentum_rate, momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay)) regularization=fluid.regularizer.L2Decay(self.l2_decay),
parameter_list=self.parameter_list)
return optimizer return optimizer
...@@ -257,7 +263,8 @@ class Optimizer(object): ...@@ -257,7 +263,8 @@ class Optimizer(object):
an adam_decay optimizer an adam_decay optimizer
""" """
return fluid.optimizer.Adam(learning_rate=self.lr) return fluid.optimizer.Adam(
learning_rate=self.lr, parameter_list=self.parameter_list)
def cosine_decay_RMSProp(self): def cosine_decay_RMSProp(self):
"""cosine decay with RMSProp optimizer """cosine decay with RMSProp optimizer
...@@ -275,7 +282,8 @@ class Optimizer(object): ...@@ -275,7 +282,8 @@ class Optimizer(object):
momentum=self.momentum_rate, momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay), regularization=fluid.regularizer.L2Decay(self.l2_decay),
# Apply epsilon=1 on ImageNet dataset. # Apply epsilon=1 on ImageNet dataset.
epsilon=1) epsilon=1,
parameter_list=self.parameter_list)
return optimizer return optimizer
def default_decay(self): def default_decay(self):
...@@ -288,12 +296,13 @@ class Optimizer(object): ...@@ -288,12 +296,13 @@ class Optimizer(object):
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=self.lr, learning_rate=self.lr,
momentum=self.momentum_rate, momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay)) regularization=fluid.regularizer.L2Decay(self.l2_decay),
parameter_list=self.parameter_list)
return optimizer return optimizer
def create_optimizer(args): def create_optimizer(args, parameter_list):
Opt = Optimizer(args) Opt = Optimizer(args, parameter_list)
optimizer = getattr(Opt, args.lr_strategy)() optimizer = getattr(Opt, args.lr_strategy)()
return optimizer return optimizer
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册