提交 91d87ec0 编写于 作者: M minqiyang

Add unittest for imperative resnet

Fix the bug of static BatchNorm layer
上级 e33427da
......@@ -27,6 +27,7 @@ __all__ = [
'Conv2D',
'Pool2D',
'FC',
'BatchNorm',
]
......@@ -209,14 +210,24 @@ class FC(layers.Layer):
def __init__(self,
size,
param_attr=None,
bias_attr=None,
dtype=core.VarDesc.VarType.FP32,
num_flatten_dims=1,
dtype=core.VarDesc.VarType.FP32):
act=None,
is_test=False,
name=None):
super(FC, self).__init__()
self._size = size
self._num_flatten_dims = num_flatten_dims
self._dtype = dtype
from ..layer_helper import LayerHelper
self._helper = LayerHelper('FC', param_attr=param_attr)
self._helper = LayerHelper(
'FC',
param_attr=param_attr,
bias_attr=bias_attr,
act=act,
name=name)
def _build_once(self, input):
input_shape = input.shape
......@@ -247,4 +258,132 @@ class FC(layers.Layer):
inputs={"X": [tmp]},
outputs={"Out": out},
attrs={"use_mkldnn": False})
return out
pre_activation = self._helper.append_bias_op(
pre_bias, dim_start=num_flatten_dims)
return self._helper.append_activation(pre_activation)
class BatchNorm(layers.Layer):
def __init__(self,
num_channels,
act=None,
is_test=False,
momentum=0.9,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
dtype=core.VarDesc.VarType.FP32,
data_layout='NCHW',
in_place=False,
name=None,
moving_mean_name=None,
moving_variance_name=None,
do_model_average_for_mean_and_var=False,
fuse_with_relu=False,
use_global_stats=False):
super(BatchNorm, self).__init__()
assert bias_attr is not False, "bias_attr should not be False in batch_norm."
from ..layer_helper import LayerHelper
self._helper = LayerHelper(
'batch_norm', param_attr=param_attr, bias_attr=bias_attr, name=name)
if dtype == core.VarDesc.VarType.FP16:
self._dtype = core.VarDesc.VarType.FP32
else:
self._dtype = dtype
param_shape = [num_channels]
# create parameter
self._scale = self._helper.create_parameter(
attr=self._helper.param_attr,
shape=param_shape,
dtype=self._dtype,
default_initializer=Constant(1.0))
# setting stop_gradient=True to reduce computation
if use_global_stats and self._helper.param_attr.learning_rate == 0.:
self._scale.stop_gradient = True
self._bias = self._helper.create_parameter(
attr=self._helper.bias_attr,
shape=param_shape,
dtype=self._dtype,
is_bias=True)
# setting stop_gradient=True to reduce computation
if use_global_stats and self._helper.bias_attr.learning_rate == 0.:
self._bias.stop_gradient = True
self._mean = self._helper.create_parameter(
attr=ParamAttr(
name=moving_mean_name,
initializer=Constant(0.0),
trainable=False,
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape,
dtype=self._dtype)
self._mean.stop_gradient = True
self._variance = self._helper.create_parameter(
attr=ParamAttr(
name=moving_variance_name,
initializer=Constant(1.0),
trainable=False,
do_model_average=do_model_average_for_mean_and_var),
shape=param_shape,
dtype=self._dtype)
self._variance.stop_gradient = True
self._in_place = in_place
self._momentum = momentum
self._epsilon = epsilon
self._is_test = is_test
self._fuse_with_relu = fuse_with_relu
self._use_global_stats = use_global_stats
def _build_once(self, input):
pass
def forward(self, input):
# create output
# mean and mean_out share the same memory
mean_out = self._mean
# variance and variance out share the same memory
variance_out = self._variance
saved_mean = self._helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
saved_variance = self._helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True)
batch_norm_out = input if self._in_place else self._helper.create_variable_for_type_inference(
dtype)
self._helper.append_op(
type="batch_norm",
inputs={
"X": input,
"Scale": self._scale,
"Bias": self._bias,
"Mean": self._mean,
"Variance": self._variance
},
outputs={
"Y": batch_norm_out,
"MeanOut": mean_out,
"VarianceOut": variance_out,
"SavedMean": saved_mean,
"SavedVariance": saved_variance
},
attrs={
"momentum": self._momentum,
"epsilon": self._epsilon,
"is_test": self._is_test,
"use_mkldnn": False,
"fuse_with_relu": self._fuse_with_relu,
"use_global_stats": self._use_global_stats
})
return self._helper.append_activation(batch_norm_out)
......@@ -2835,7 +2835,7 @@ def batch_norm(input,
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True)
# setting stop_gradient=True to reduce computation
if use_global_stats and helper.bias_attr.learning_rate == 0.:
scale.stop_gradient = True
bias.stop_gradient = True
mean = helper.create_parameter(
attr=ParamAttr(
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import unittest
import numpy as np
import six
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.imperative.base import to_variable
from test_imperative_base import new_program_scope
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
"input_std": [0.229, 0.224, 0.225],
"learning_strategy": {
"name": "piecewise_decay",
"batch_size": 256,
"epochs": [30, 60, 90],
"steps": [0.1, 0.01, 0.001, 0.0001]
}
}
def optimizer_setting(params):
ls = params["learning_strategy"]
if ls["name"] == "piecewise_decay":
if "total_images" not in params:
total_images = 1281167
else:
total_images = params["total_images"]
batch_size = ls["batch_size"]
step = int(total_images / batch_size + 1)
bd = [step * e for e in ls["epochs"]]
base_lr = params["lr"]
lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.Momentum(
learning_rate=fluid.layers.piecewise_decay(
boundaries=bd, values=lr),
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
return optimizer
class ConvBNLayer(fluid.imperative.Layer):
def __init__(self, num_filters, filter_size, stride=1, groups=1, act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
3,
num_filters,
filter_size,
stride, (filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=None)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(fluid.imperative.Layer):
def __init__(self, num_filters, stride, shortcut=False):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_filters=num_filters, filter_size=1, act='relu')
self.conv1 = ConvBNLayer(
num_filters=num_filters, filter_size=3, stride=stride, act='relu')
self.conv2 = ConvBNLayer(
num_filters=num_filters * 4, filter_size=1, act=None)
if shortcut:
self.short = ConvBNLayer(
num_filters=num_filters * 4, filter_size=1, stride=stride)
self.shortcut = shortcut
def forward(self, inputs):
self.conv0()
self.conv1()
self.conv2()
if self.shortcut:
self.short()
return fluid.layers.elementwise_add(
x=self.short, y=self.conv2, act='relu')
class ResNet(fluid.imperative.Layer):
def __init__(self, layers=50, class_dim=1000):
self.layers = layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_filters=64, filter_size=7, stride=2, act='relu')
self.pool2d_max = Pool2D(
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
self.bottleneck_block_list = []
for block in range(len(depth)):
shortcut = True
for i in range(depth[block]):
bottleneck_block = BottleneckBlock(
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut)
self.bottleneck_block_list.append(bottleneck_block)
shortcut = False
self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
import math
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = FC(size=class_dim,
act='softmax',
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs):
y = self.conv(inputs)
y = self.pool2d_max(y)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
y = self.out()
return y
class TestImperativeResnet(unittest.TestCase):
def test_resnet_cpu_float32(self):
seed = 90
with fluid.imperative.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
resnet = ResNet()
optimizer = optimizer_setting(train_parameters)
train_reader = paddle.batch(
paddle.dataset.flowers.train(), batch_size=256)
dy_param_init_value = {}
for batch_id, data in enumerate(train_reader()):
if batch_id >= 2:
break
x_data = np.array(
[x[0].reshape(1, 28, 28) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
128, 1)
img = to_variable(x_data)
label = to_variable(y_data)
label._stop_gradient = True
cost = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=cost)
dy_out = avg_loss._numpy()
if batch_id == 0:
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_init_value[param.name] = param._numpy()
avg_loss._backward()
optimizer.minimize(avg_loss)
dy_param_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_value[param.name] = param._numpy()
# with new_program_scope():
# fluid.default_startup_program().random_seed = seed
# fluid.default_main_program().random_seed = seed
# exe = fluid.Executor(fluid.CPUPlace())
# # mnist = Conv2D(1, 20, 5)
# mnist = MNIST()
# sgd = SGDOptimizer(learning_rate=1e-3)
# train_reader = paddle.batch(
# paddle.dataset.mnist.train(), batch_size=128)
# img = fluid.layers.data(
# name='pixel', shape=[1, 28, 28], dtype='float32')
# label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# cost = mnist(img)
# loss = fluid.layers.reduce_mean(cost)
# sgd.minimize(loss)
# # initialize params and fetch them
# static_param_init_value = {}
# static_param_name_list = []
# for param in fluid.default_startup_program().global_block(
# ).all_parameters():
# static_param_name_list.append(param.name)
# out = exe.run(fluid.default_startup_program(),
# fetch_list=static_param_name_list)
# for i in range(len(static_param_name_list)):
# static_param_init_value[static_param_name_list[i]] = out[i]
# for batch_id, data in enumerate(train_reader()):
# if batch_id >= 2:
# break
# x_data = np.array(
# [x[0].reshape(1, 28, 28) for x in data]).astype('float32')
# y_data = np.array([x[1] for x in data]).astype('int64').reshape(
# [128, 1])
# fetch_list = [loss.name]
# fetch_list.extend(static_param_name_list)
# out = exe.run(fluid.default_main_program(),
# feed={"pixel": x_data,
# "label": y_data},
# fetch_list=fetch_list)
# static_param_value = {}
# static_out = out[0]
# for i in range(1, len(out)):
# static_param_value[static_param_name_list[i - 1]] = out[i]
# for key, value in six.iteritems(static_param_init_value):
# self.assertTrue(
# np.allclose(value.all(), dy_param_init_value[key].all()))
# self.assertTrue(np.allclose(static_out.all(), dy_out.all()))
# for key, value in six.iteritems(static_param_value):
# self.assertTrue(np.allclose(value.all(), dy_param_value[key].all()))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册