提交 cafbd62e 编写于 作者: M minqiyang

Add FLAGS_cudnn_deterministic to unittest now

test=develop
上级 be2d3ae6
......@@ -437,10 +437,8 @@ class LayerHelper(object):
# NOTE(dzhwinter): some activation support inplace compution.
# NOTE(minqiyang): currently, we don't support inplace in imperative mode
if not force_no_inplace and core.IsInplace(act_type):
print("inplace")
tmp = input_var
else:
print("not inplace")
tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
self.append_op(
type=act_type,
......
......@@ -84,6 +84,7 @@ list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer)
list(REMOVE_ITEM TEST_OPS test_image_classification_resnet)
list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op)
list(REMOVE_ITEM TEST_OPS test_nearest_interp_op)
list(REMOVE_ITEM TEST_OPS test_imperative_resnet)
foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP)
......@@ -91,6 +92,8 @@ py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS FLAGS_inner_
py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL)
py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op SERIAL)
py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op SERIAL)
py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS
FLAGS_cudnn_deterministic=1)
if(WITH_DISTRIBUTE)
py_test_modules(test_dist_train MODULES test_dist_train SERIAL)
set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20)
......
......@@ -26,7 +26,7 @@ from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC
from paddle.fluid.imperative.base import to_variable
from test_imperative_base import new_program_scope
batch_size = 1
batch_size = 8
train_parameters = {
"input_size": [3, 224, 224],
"input_mean": [0.485, 0.456, 0.406],
......@@ -57,7 +57,7 @@ def optimizer_setting(params):
base_lr = params["lr"]
lr = []
lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizer = fluid.optimizer.SGD(learning_rate=params["lr"])
optimizer = fluid.optimizer.SGD(learning_rate=0.01)
# TODO(minqiyang): Add learning rate scheduler support to imperative mode
# optimizer = fluid.optimizer.Momentum(
# learning_rate=params["lr"],
......@@ -89,11 +89,11 @@ class ConvBNLayer(fluid.imperative.Layer):
act=None,
bias_attr=None)
# self._batch_norm = BatchNorm(num_filters, act=act)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
# y = self._batch_norm(y)
y = self._batch_norm(y)
return y
......@@ -204,229 +204,76 @@ class ResNet(fluid.imperative.Layer):
class TestImperativeResnet(unittest.TestCase):
# def test_resnet_gpu_float32(self):
# seed = 90
# batch_size = train_parameters["batch_size"]
# with fluid.imperative.guard():
# fluid.default_startup_program().random_seed = seed
# fluid.default_main_program().random_seed = seed
# resnet = ResNet()
# optimizer = optimizer_setting(train_parameters)
# np.random.seed(seed)
# import random
# random.seed = seed
# train_reader = paddle.batch(
# paddle.dataset.flowers.train(use_xmap=False),
# batch_size=batch_size)
# dy_param_init_value = {}
# for param in fluid.default_main_program().global_block(
# ).all_parameters():
# dy_param_init_value[param.name] = param._numpy()
# for batch_id, data in enumerate(train_reader()):
# if batch_id >= 1:
# break
# dy_x_data = np.array(
# [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
# y_data = np.array([x[1] for x in data]).astype('int64').reshape(
# batch_size, 1)
# img = to_variable(dy_x_data)
# label = to_variable(y_data)
# label._stop_gradient = True
# out = resnet(img)
# loss = fluid.layers.cross_entropy(input=out, label=label)
# avg_loss = fluid.layers.mean(x=loss)
# dy_out = avg_loss._numpy()
# if batch_id == 0:
# for param in fluid.default_main_program().global_block(
# ).all_parameters():
# if param.name not in dy_param_init_value:
# dy_param_init_value[param.name] = param._numpy()
# avg_loss._backward()
# dy_grad_value = {}
# for param in fluid.default_main_program().global_block(
# ).all_parameters():
# if not param.stop_gradient:
# np_array = np.array(param._ivar._grad_ivar().value()
# .get_tensor())
# dy_grad_value[param.name + core.grad_var_suffix(
# )] = np_array
# optimizer.minimize(avg_loss)
# dy_param_value = {}
# for param in fluid.default_main_program().global_block(
# ).all_parameters():
# dy_param_value[param.name] = param._numpy()
# with new_program_scope():
# fluid.default_startup_program().random_seed = seed
# fluid.default_main_program().random_seed = seed
# exe = fluid.Executor(fluid.CUDAPlace(0))
# resnet = ResNet()
# optimizer = optimizer_setting(train_parameters)
# np.random.seed(seed)
# import random
# random.seed = seed
# train_reader = paddle.batch(
# paddle.dataset.flowers.train(use_xmap=False),
# batch_size=batch_size)
# img = fluid.layers.data(
# name='pixel', shape=[3, 224, 224], dtype='float32')
# label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# out = resnet(img)
# loss = fluid.layers.cross_entropy(input=out, label=label)
# avg_loss = fluid.layers.mean(x=loss)
# optimizer.minimize(avg_loss)
# # initialize params and fetch them
# static_param_init_value = {}
# static_param_name_list = []
# static_grad_name_list = []
# for param in fluid.default_startup_program().global_block(
# ).all_parameters():
# static_param_name_list.append(param.name)
# for param in fluid.default_main_program().global_block(
# ).all_parameters():
# if not param.stop_gradient:
# static_grad_name_list.append(param.name +
# core.grad_var_suffix())
# out = exe.run(fluid.default_startup_program(),
# fetch_list=static_param_name_list)
# for i in range(len(static_param_name_list)):
# static_param_init_value[static_param_name_list[i]] = out[i]
# for batch_id, data in enumerate(train_reader()):
# if batch_id >= 1:
# break
# static_x_data = np.array(
# [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
# y_data = np.array([x[1] for x in data]).astype('int64').reshape(
# [batch_size, 1])
# fetch_list = [avg_loss.name]
# fetch_list.extend(static_param_name_list)
# fetch_list.extend(static_grad_name_list)
# out = exe.run(fluid.default_main_program(),
# feed={"pixel": static_x_data,
# "label": y_data},
# fetch_list=fetch_list)
# static_param_value = {}
# static_grad_value = {}
# static_out = out[0]
# param_start_pos = 1
# grad_start_pos = len(static_param_name_list) + param_start_pos
# for i in range(param_start_pos,
# len(static_param_name_list) + param_start_pos):
# static_param_value[static_param_name_list[
# i - param_start_pos]] = out[i]
# for i in range(grad_start_pos,
# len(static_grad_name_list) + grad_start_pos):
# static_grad_value[static_grad_name_list[
# i - grad_start_pos]] = out[i]
# self.assertTrue(np.allclose(static_out, dy_out))
# self.assertEqual(len(dy_param_init_value), len(static_param_init_value))
# for key, value in six.iteritems(static_param_init_value):
# self.assertTrue(np.allclose(value, dy_param_init_value[key]))
# self.assertEqual(len(dy_grad_value), len(static_grad_value))
# # TODO(minqiyang): find a way to align the gradient
# # for key, value in six.iteritems(static_grad_value):
# # self.assertTrue(
# # np.allclose(value, dy_grad_value[key]))
# self.assertEqual(len(dy_param_value), len(static_param_value))
# # for key, value in six.iteritems(static_param_value):
# # self.assertTrue(np.allclose(value, dy_param_value[key]))
def test_resnet_cpu_float32(self):
def test_resnet_gpu_float32(self):
seed = 90
batch_size = train_parameters["batch_size"]
# with fluid.imperative.guard(device=None):
# fluid.default_startup_program().random_seed = seed
# fluid.default_main_program().random_seed = seed
# resnet = ResNet()
# optimizer = optimizer_setting(train_parameters)
# np.random.seed(seed)
# import random
# random.seed = seed
# train_reader = paddle.batch(
# paddle.dataset.flowers.train(use_xmap=False),
# batch_size=batch_size)
# dy_param_init_value = {}
# for param in fluid.default_main_program().global_block(
# ).all_parameters():
# dy_param_init_value[param.name] = param._numpy()
# for batch_id, data in enumerate(train_reader()):
# if batch_id >= 1:
# break
# dy_x_data = np.array(
# [x[0].reshape(3, 224, 224) for x in data]).astype('float32')
# y_data = np.array([x[1] for x in data]).astype('int64').reshape(
# batch_size, 1)
# img = to_variable(dy_x_data)
# label = to_variable(y_data)
# label._stop_gradient = True
# out = resnet(img)
# loss = fluid.layers.cross_entropy(input=out, label=label)
# avg_loss = fluid.layers.mean(x=loss)
# dy_out = avg_loss._numpy()
# if batch_id == 0:
# for param in fluid.default_main_program().global_block(
# ).all_parameters():
# if param.name not in dy_param_init_value:
# dy_param_init_value[param.name] = param._numpy()
# avg_loss._backward()
# dy_grad_value = {}
# for param in fluid.default_main_program().global_block(
# ).all_parameters():
# if not param.stop_gradient:
# np_array = np.array(param._ivar._grad_ivar().value()
# .get_tensor())
# dy_grad_value[param.name + core.grad_var_suffix(
# )] = np_array
# optimizer.minimize(avg_loss)
# dy_param_value = {}
# for param in fluid.default_main_program().global_block(
# ).all_parameters():
# dy_param_value[param.name] = param._numpy()
with fluid.imperative.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
resnet = ResNet()
optimizer = optimizer_setting(train_parameters)
np.random.seed(seed)
import random
random.seed = seed
train_reader = paddle.batch(
paddle.dataset.flowers.train(use_xmap=False),
batch_size=batch_size)
dy_param_init_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_init_value[param.name] = param._numpy()
for batch_id, data in enumerate(train_reader()):
if batch_id >= 1:
break
dy_x_data = np.array(
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
batch_size, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label._stop_gradient = True
out = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=loss)
dy_out = avg_loss._numpy()
if batch_id == 0:
for param in fluid.default_main_program().global_block(
).all_parameters():
if param.name not in dy_param_init_value:
dy_param_init_value[param.name] = param._numpy()
avg_loss._backward()
dy_grad_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
if not param.stop_gradient:
np_array = np.array(param._ivar._grad_ivar().value()
.get_tensor())
dy_grad_value[param.name + core.grad_var_suffix(
)] = np_array
optimizer.minimize(avg_loss)
dy_param_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_value[param.name] = param._numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
exe = fluid.Executor(fluid.CPUPlace())
exe = fluid.Executor(fluid.CUDAPlace(0))
resnet = ResNet()
optimizer = optimizer_setting(train_parameters)
......@@ -447,54 +294,140 @@ class TestImperativeResnet(unittest.TestCase):
optimizer.minimize(avg_loss)
# initialize params and fetch them
dy_param_init_value = {}
dy_param_name_list = []
dy_grad_name_list = []
static_param_init_value = {}
static_param_name_list = []
static_grad_name_list = []
for param in fluid.default_startup_program().global_block(
).all_parameters():
dy_param_name_list.append(param.name)
static_param_name_list.append(param.name)
for param in fluid.default_main_program().global_block(
).all_parameters():
if not param.stop_gradient:
dy_grad_name_list.append(param.name + core.grad_var_suffix(
))
static_grad_name_list.append(param.name +
core.grad_var_suffix())
out = exe.run(fluid.default_startup_program(),
fetch_list=dy_param_name_list)
fetch_list=static_param_name_list)
for i in range(len(dy_param_name_list)):
dy_param_init_value[dy_param_name_list[i]] = out[i]
for i in range(len(static_param_name_list)):
static_param_init_value[static_param_name_list[i]] = out[i]
for batch_id, data in enumerate(train_reader()):
if batch_id >= 1:
break
dy_x_data = np.array(
static_x_data = np.array(
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
[batch_size, 1])
fetch_list = [avg_loss.name]
fetch_list.extend(dy_param_name_list)
fetch_list.extend(dy_grad_name_list)
fetch_list.extend(static_param_name_list)
fetch_list.extend(static_grad_name_list)
out = exe.run(fluid.default_main_program(),
feed={"pixel": dy_x_data,
feed={"pixel": static_x_data,
"label": y_data},
fetch_list=fetch_list)
dy_param_value = {}
dy_grad_value = {}
dy_out = out[0]
static_param_value = {}
static_grad_value = {}
static_out = out[0]
param_start_pos = 1
grad_start_pos = len(dy_param_name_list) + param_start_pos
grad_start_pos = len(static_param_name_list) + param_start_pos
for i in range(param_start_pos,
len(dy_param_name_list) + param_start_pos):
dy_param_value[dy_param_name_list[i -
param_start_pos]] = out[i]
len(static_param_name_list) + param_start_pos):
static_param_value[static_param_name_list[
i - param_start_pos]] = out[i]
for i in range(grad_start_pos,
len(dy_grad_name_list) + grad_start_pos):
dy_grad_value[dy_grad_name_list[i - grad_start_pos]] = out[
i]
len(static_grad_name_list) + grad_start_pos):
static_grad_value[static_grad_name_list[
i - grad_start_pos]] = out[i]
self.assertTrue(np.allclose(static_out, dy_out))
self.assertEqual(len(dy_param_init_value), len(static_param_init_value))
for key, value in six.iteritems(static_param_init_value):
self.assertTrue(np.allclose(value, dy_param_init_value[key]))
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
self.assertEqual(len(dy_grad_value), len(static_grad_value))
for key, value in six.iteritems(static_grad_value):
# TODO(minqiyang): find a way to align the gradient
self.assertTrue(np.allclose(value, dy_grad_value[key]))
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
self.assertEqual(len(dy_param_value), len(static_param_value))
for key, value in six.iteritems(static_param_value):
self.assertTrue(np.allclose(value, dy_param_value[key]))
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
def test_resnet_cpu_float32(self):
seed = 90
batch_size = train_parameters["batch_size"]
with fluid.imperative.guard(device=None):
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
resnet = ResNet()
optimizer = optimizer_setting(train_parameters)
np.random.seed(seed)
import random
random.seed = seed
train_reader = paddle.batch(
paddle.dataset.flowers.train(use_xmap=False),
batch_size=batch_size)
dy_param_init_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_init_value[param.name] = param._numpy()
for batch_id, data in enumerate(train_reader()):
if batch_id >= 1:
break
dy_x_data = np.array(
[x[0].reshape(3, 224, 224) for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(
batch_size, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label._stop_gradient = True
out = resnet(img)
loss = fluid.layers.cross_entropy(input=out, label=label)
avg_loss = fluid.layers.mean(x=loss)
dy_out = avg_loss._numpy()
if batch_id == 0:
for param in fluid.default_main_program().global_block(
).all_parameters():
if param.name not in dy_param_init_value:
dy_param_init_value[param.name] = param._numpy()
avg_loss._backward()
dy_grad_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
if not param.stop_gradient:
np_array = np.array(param._ivar._grad_ivar().value()
.get_tensor())
dy_grad_value[param.name + core.grad_var_suffix(
)] = np_array
optimizer.minimize(avg_loss)
dy_param_value = {}
for param in fluid.default_main_program().global_block(
).all_parameters():
dy_param_value[param.name] = param._numpy()
with new_program_scope():
fluid.default_startup_program().random_seed = seed
......@@ -575,19 +508,20 @@ class TestImperativeResnet(unittest.TestCase):
self.assertEqual(len(dy_param_init_value), len(static_param_init_value))
for key, value in six.iteritems(static_param_init_value):
self.assertTrue(np.allclose(value, dy_param_init_value[key]))
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
self.assertEqual(len(dy_grad_value), len(static_grad_value))
for key, value in six.iteritems(static_grad_value):
if not np.allclose(value, dy_grad_value[key]):
# print(key, value, dy_grad_value[key])
print(key)
# self.assertTrue(
# np.allclose(value, dy_grad_value[key]))
self.assertTrue(np.allclose(value, dy_grad_value[key]))
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
self.assertEqual(len(dy_param_value), len(static_param_value))
for key, value in six.iteritems(static_param_value):
print(key)
# self.assertTrue(np.allclose(value, dy_param_value[key]))
self.assertTrue(np.allclose(value, dy_param_value[key]))
self.assertTrue(np.isfinite(value.all()))
self.assertFalse(np.isnan(value.any()))
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册