提交 c35c10a8 编写于 作者: Z zhongpu 提交者: hong

fix resnet model for dygraph incompatible upgrade, test=develop (#4114)

上级 1d50478e
...@@ -18,7 +18,7 @@ import ast ...@@ -18,7 +18,7 @@ import ast
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework from paddle.fluid import framework
...@@ -53,7 +53,7 @@ args = parse_args() ...@@ -53,7 +53,7 @@ args = parse_args()
batch_size = args.batch_size batch_size = args.batch_size
def optimizer_setting(): def optimizer_setting(parameter_list=None):
total_images = IMAGENET1000 total_images = IMAGENET1000
...@@ -64,28 +64,36 @@ def optimizer_setting(): ...@@ -64,28 +64,36 @@ def optimizer_setting():
lr = [] lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum( if fluid.in_dygraph_mode():
learning_rate=fluid.layers.piecewise_decay( optimizer = fluid.optimizer.Momentum(
boundaries=bd, values=lr), learning_rate=fluid.layers.piecewise_decay(
momentum=momentum_rate, boundaries=bd, values=lr),
regularization=fluid.regularizer.L2Decay(l2_decay)) momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay),
parameter_list=parameter_list)
else:
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=momentum_rate,
regularization=fluid.regularizer.L2Decay(l2_decay))
return optimizer return optimizer
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
num_filters, num_filters,
filter_size, filter_size,
stride=1, stride=1,
groups=1, groups=1,
act=None): act=None):
super(ConvBNLayer, self).__init__(name_scope) super(ConvBNLayer, self).__init__()
self._conv = Conv2D( self._conv = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
...@@ -94,7 +102,7 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -94,7 +102,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
act=None, act=None,
bias_attr=False) bias_attr=False)
self._batch_norm = BatchNorm(self.full_name(), num_filters, act=act) self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs): def forward(self, inputs):
y = self._conv(inputs) y = self._conv(inputs)
...@@ -105,28 +113,24 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -105,28 +113,24 @@ class ConvBNLayer(fluid.dygraph.Layer):
class BottleneckBlock(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
num_filters, num_filters,
stride, stride,
shortcut=True): shortcut=True):
super(BottleneckBlock, self).__init__(name_scope) super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=num_channels, num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=1, filter_size=1,
act='relu') act='relu')
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(),
num_channels=num_filters, num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
filter_size=3, filter_size=3,
stride=stride, stride=stride,
act='relu') act='relu')
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(),
num_channels=num_filters, num_channels=num_filters,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
...@@ -134,7 +138,6 @@ class BottleneckBlock(fluid.dygraph.Layer): ...@@ -134,7 +138,6 @@ class BottleneckBlock(fluid.dygraph.Layer):
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
self.full_name(),
num_channels=num_channels, num_channels=num_channels,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
...@@ -161,8 +164,8 @@ class BottleneckBlock(fluid.dygraph.Layer): ...@@ -161,8 +164,8 @@ class BottleneckBlock(fluid.dygraph.Layer):
class ResNet(fluid.dygraph.Layer): class ResNet(fluid.dygraph.Layer):
def __init__(self, name_scope, layers=50, class_dim=102): def __init__(self, layers=50, class_dim=102):
super(ResNet, self).__init__(name_scope) super(ResNet, self).__init__()
self.layers = layers self.layers = layers
supported_layers = [50, 101, 152] supported_layers = [50, 101, 152]
...@@ -175,47 +178,46 @@ class ResNet(fluid.dygraph.Layer): ...@@ -175,47 +178,46 @@ class ResNet(fluid.dygraph.Layer):
depth = [3, 4, 23, 3] depth = [3, 4, 23, 3]
elif layers == 152: elif layers == 152:
depth = [3, 8, 36, 3] depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [64, 128, 256, 512] num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer( self.conv = ConvBNLayer(
self.full_name(),
num_channels=3, num_channels=3,
num_filters=64, num_filters=64,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool2d_max = Pool2D( self.pool2d_max = Pool2D(
self.full_name(),
pool_size=3, pool_size=3,
pool_stride=2, pool_stride=2,
pool_padding=1, pool_padding=1,
pool_type='max') pool_type='max')
self.bottleneck_block_list = [] self.bottleneck_block_list = []
num_channels = 64
for block in range(len(depth)): for block in range(len(depth)):
shortcut = False shortcut = False
for i in range(depth[block]): for i in range(depth[block]):
bottleneck_block = self.add_sublayer( bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i), 'bb_%d_%d' % (block, i),
BottleneckBlock( BottleneckBlock(
self.full_name(), num_channels=num_channels[block]
num_channels=num_channels, if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block], num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut)) shortcut=shortcut))
num_channels = bottleneck_block._num_channels_out
self.bottleneck_block_list.append(bottleneck_block) self.bottleneck_block_list.append(bottleneck_block)
shortcut = True shortcut = True
self.pool2d_avg = Pool2D( self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True) pool_size=7, pool_type='avg', global_pooling=True)
self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1
import math import math
stdv = 1.0 / math.sqrt(2048 * 1.0) stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = FC(self.full_name(), self.out = Linear(self.pool2d_avg_output,
size=class_dim, class_dim,
act='softmax', act='softmax',
param_attr=fluid.param_attr.ParamAttr( param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv))) initializer=fluid.initializer.Uniform(-stdv, stdv)))
...@@ -226,6 +228,7 @@ class ResNet(fluid.dygraph.Layer): ...@@ -226,6 +228,7 @@ class ResNet(fluid.dygraph.Layer):
for bottleneck_block in self.bottleneck_block_list: for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y) y = bottleneck_block(y)
y = self.pool2d_avg(y) y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
y = self.out(y) y = self.out(y)
return y return y
...@@ -265,16 +268,13 @@ def eval(model, data): ...@@ -265,16 +268,13 @@ def eval(model, data):
# print("epoch id: %d, batch step: %d, loss: %f" % (eop, batch_id, dy_out)) # print("epoch id: %d, batch step: %d, loss: %f" % (eop, batch_id, dy_out))
if batch_id % 10 == 0: if batch_id % 10 == 0:
print("test | batch step %d, loss %0.3f acc1 %0.3f acc5 %0.3f" % \ print("test | batch step %d, acc1 %0.3f acc5 %0.3f" % \
( batch_id, total_loss / total_sample, \ ( batch_id, total_acc1 / total_sample, total_acc5 / total_sample))
total_acc1 / total_sample, total_acc5 / total_sample))
if args.ce: if args.ce:
print("kpis\ttest_acc1\t%0.3f" % (total_acc1 / total_sample)) print("kpis\ttest_acc1\t%0.3f" % (total_acc1 / total_sample))
print("kpis\ttest_acc5\t%0.3f" % (total_acc5 / total_sample)) print("kpis\ttest_acc5\t%0.3f" % (total_acc5 / total_sample))
print("kpis\ttest_loss\t%0.3f" % (total_loss / total_sample)) print("final eval acc1 %0.3f acc5 %0.3f" % \
print("final eval loss %0.3f acc1 %0.3f acc5 %0.3f" % \ (total_acc1 / total_sample, total_acc5 / total_sample))
(total_loss / total_sample, \
total_acc1 / total_sample, total_acc5 / total_sample))
def train_resnet(): def train_resnet():
...@@ -292,8 +292,8 @@ def train_resnet(): ...@@ -292,8 +292,8 @@ def train_resnet():
if args.use_data_parallel: if args.use_data_parallel:
strategy = fluid.dygraph.parallel.prepare_context() strategy = fluid.dygraph.parallel.prepare_context()
resnet = ResNet("resnet") resnet = ResNet()
optimizer = optimizer_setting() optimizer = optimizer_setting(parameter_list=resnet.parameters())
if args.use_data_parallel: if args.use_data_parallel:
resnet = fluid.dygraph.parallel.DataParallel(resnet, strategy) resnet = fluid.dygraph.parallel.DataParallel(resnet, strategy)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册