diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index 4d3484d68340fe7fa840954c2a33de669536f684..df5591fb2a711edca9b69762ca044c493382b66e 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -437,10 +437,8 @@ class LayerHelper(object): # NOTE(dzhwinter): some activation support inplace compution. # NOTE(minqiyang): currently, we don't support inplace in imperative mode if not force_no_inplace and core.IsInplace(act_type): - print("inplace") tmp = input_var else: - print("not inplace") tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) self.append_op( type=act_type, diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 808e1e6aa80744db1289094d7c1bad00002a4c3e..c23dfa01e76c21d0d162f2fed986e2eaf3a70a6d 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -84,6 +84,7 @@ list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer) list(REMOVE_ITEM TEST_OPS test_image_classification_resnet) list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) +list(REMOVE_ITEM TEST_OPS test_imperative_resnet) foreach(TEST_OP ${TEST_OPS}) py_test_modules(${TEST_OP} MODULES ${TEST_OP}) endforeach(TEST_OP) @@ -91,6 +92,8 @@ py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS FLAGS_inner_ py_test_modules(test_warpctc_op MODULES test_warpctc_op ENVS FLAGS_warpctc_dir=${WARPCTC_LIB_DIR} SERIAL) py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op SERIAL) py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op SERIAL) +py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS + FLAGS_cudnn_deterministic=1) if(WITH_DISTRIBUTE) py_test_modules(test_dist_train MODULES test_dist_train SERIAL) set_tests_properties(test_listen_and_serv_op PROPERTIES TIMEOUT 20) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index f0c10169484dcc3e94decf34a92435aa7febaf79..fcf0f4a2d8a25deb9d63727245c11bde75d810ba 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -26,7 +26,7 @@ from paddle.fluid.imperative.nn import Conv2D, Pool2D, BatchNorm, FC from paddle.fluid.imperative.base import to_variable from test_imperative_base import new_program_scope -batch_size = 1 +batch_size = 8 train_parameters = { "input_size": [3, 224, 224], "input_mean": [0.485, 0.456, 0.406], @@ -57,7 +57,7 @@ def optimizer_setting(params): base_lr = params["lr"] lr = [] lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] - optimizer = fluid.optimizer.SGD(learning_rate=params["lr"]) + optimizer = fluid.optimizer.SGD(learning_rate=0.01) # TODO(minqiyang): Add learning rate scheduler support to imperative mode # optimizer = fluid.optimizer.Momentum( # learning_rate=params["lr"], @@ -89,11 +89,11 @@ class ConvBNLayer(fluid.imperative.Layer): act=None, bias_attr=None) - # self._batch_norm = BatchNorm(num_filters, act=act) + self._batch_norm = BatchNorm(num_filters, act=act) def forward(self, inputs): y = self._conv(inputs) - # y = self._batch_norm(y) + y = self._batch_norm(y) return y @@ -204,229 +204,76 @@ class ResNet(fluid.imperative.Layer): class TestImperativeResnet(unittest.TestCase): - # def test_resnet_gpu_float32(self): - # seed = 90 - - # batch_size = train_parameters["batch_size"] - # with fluid.imperative.guard(): - # fluid.default_startup_program().random_seed = seed - # fluid.default_main_program().random_seed = seed - - # resnet = ResNet() - # optimizer = optimizer_setting(train_parameters) - # np.random.seed(seed) - # import random - # random.seed = seed - # train_reader = paddle.batch( - # paddle.dataset.flowers.train(use_xmap=False), - # batch_size=batch_size) - - # dy_param_init_value = {} - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # dy_param_init_value[param.name] = param._numpy() - - # for batch_id, data in enumerate(train_reader()): - # if batch_id >= 1: - # break - - # dy_x_data = np.array( - # [x[0].reshape(3, 224, 224) for x in data]).astype('float32') - # y_data = np.array([x[1] for x in data]).astype('int64').reshape( - # batch_size, 1) - - # img = to_variable(dy_x_data) - # label = to_variable(y_data) - # label._stop_gradient = True - - # out = resnet(img) - # loss = fluid.layers.cross_entropy(input=out, label=label) - # avg_loss = fluid.layers.mean(x=loss) - - # dy_out = avg_loss._numpy() - - # if batch_id == 0: - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # if param.name not in dy_param_init_value: - # dy_param_init_value[param.name] = param._numpy() - - # avg_loss._backward() - # dy_grad_value = {} - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # if not param.stop_gradient: - # np_array = np.array(param._ivar._grad_ivar().value() - # .get_tensor()) - # dy_grad_value[param.name + core.grad_var_suffix( - # )] = np_array - - # optimizer.minimize(avg_loss) - - # dy_param_value = {} - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # dy_param_value[param.name] = param._numpy() - - # with new_program_scope(): - # fluid.default_startup_program().random_seed = seed - # fluid.default_main_program().random_seed = seed - - # exe = fluid.Executor(fluid.CUDAPlace(0)) - - # resnet = ResNet() - # optimizer = optimizer_setting(train_parameters) - - # np.random.seed(seed) - # import random - # random.seed = seed - # train_reader = paddle.batch( - # paddle.dataset.flowers.train(use_xmap=False), - # batch_size=batch_size) - - # img = fluid.layers.data( - # name='pixel', shape=[3, 224, 224], dtype='float32') - # label = fluid.layers.data(name='label', shape=[1], dtype='int64') - # out = resnet(img) - # loss = fluid.layers.cross_entropy(input=out, label=label) - # avg_loss = fluid.layers.mean(x=loss) - # optimizer.minimize(avg_loss) - - # # initialize params and fetch them - # static_param_init_value = {} - # static_param_name_list = [] - # static_grad_name_list = [] - # for param in fluid.default_startup_program().global_block( - # ).all_parameters(): - # static_param_name_list.append(param.name) - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # if not param.stop_gradient: - # static_grad_name_list.append(param.name + - # core.grad_var_suffix()) - - # out = exe.run(fluid.default_startup_program(), - # fetch_list=static_param_name_list) - - # for i in range(len(static_param_name_list)): - # static_param_init_value[static_param_name_list[i]] = out[i] - - # for batch_id, data in enumerate(train_reader()): - # if batch_id >= 1: - # break - - # static_x_data = np.array( - # [x[0].reshape(3, 224, 224) for x in data]).astype('float32') - # y_data = np.array([x[1] for x in data]).astype('int64').reshape( - # [batch_size, 1]) - - # fetch_list = [avg_loss.name] - # fetch_list.extend(static_param_name_list) - # fetch_list.extend(static_grad_name_list) - # out = exe.run(fluid.default_main_program(), - # feed={"pixel": static_x_data, - # "label": y_data}, - # fetch_list=fetch_list) - - # static_param_value = {} - # static_grad_value = {} - # static_out = out[0] - # param_start_pos = 1 - # grad_start_pos = len(static_param_name_list) + param_start_pos - # for i in range(param_start_pos, - # len(static_param_name_list) + param_start_pos): - # static_param_value[static_param_name_list[ - # i - param_start_pos]] = out[i] - # for i in range(grad_start_pos, - # len(static_grad_name_list) + grad_start_pos): - # static_grad_value[static_grad_name_list[ - # i - grad_start_pos]] = out[i] - - # self.assertTrue(np.allclose(static_out, dy_out)) - - # self.assertEqual(len(dy_param_init_value), len(static_param_init_value)) - # for key, value in six.iteritems(static_param_init_value): - # self.assertTrue(np.allclose(value, dy_param_init_value[key])) - - # self.assertEqual(len(dy_grad_value), len(static_grad_value)) - # # TODO(minqiyang): find a way to align the gradient - # # for key, value in six.iteritems(static_grad_value): - # # self.assertTrue( - # # np.allclose(value, dy_grad_value[key])) - - # self.assertEqual(len(dy_param_value), len(static_param_value)) - # # for key, value in six.iteritems(static_param_value): - # # self.assertTrue(np.allclose(value, dy_param_value[key])) - - def test_resnet_cpu_float32(self): + def test_resnet_gpu_float32(self): seed = 90 batch_size = train_parameters["batch_size"] - # with fluid.imperative.guard(device=None): - # fluid.default_startup_program().random_seed = seed - # fluid.default_main_program().random_seed = seed - - # resnet = ResNet() - # optimizer = optimizer_setting(train_parameters) - # np.random.seed(seed) - # import random - # random.seed = seed - # train_reader = paddle.batch( - # paddle.dataset.flowers.train(use_xmap=False), - # batch_size=batch_size) - - # dy_param_init_value = {} - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # dy_param_init_value[param.name] = param._numpy() - - # for batch_id, data in enumerate(train_reader()): - # if batch_id >= 1: - # break - - # dy_x_data = np.array( - # [x[0].reshape(3, 224, 224) for x in data]).astype('float32') - # y_data = np.array([x[1] for x in data]).astype('int64').reshape( - # batch_size, 1) - - # img = to_variable(dy_x_data) - # label = to_variable(y_data) - # label._stop_gradient = True - - # out = resnet(img) - # loss = fluid.layers.cross_entropy(input=out, label=label) - # avg_loss = fluid.layers.mean(x=loss) - - # dy_out = avg_loss._numpy() - - # if batch_id == 0: - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # if param.name not in dy_param_init_value: - # dy_param_init_value[param.name] = param._numpy() - - # avg_loss._backward() - # dy_grad_value = {} - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # if not param.stop_gradient: - # np_array = np.array(param._ivar._grad_ivar().value() - # .get_tensor()) - # dy_grad_value[param.name + core.grad_var_suffix( - # )] = np_array - - # optimizer.minimize(avg_loss) - - # dy_param_value = {} - # for param in fluid.default_main_program().global_block( - # ).all_parameters(): - # dy_param_value[param.name] = param._numpy() + with fluid.imperative.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + resnet = ResNet() + optimizer = optimizer_setting(train_parameters) + np.random.seed(seed) + import random + random.seed = seed + train_reader = paddle.batch( + paddle.dataset.flowers.train(use_xmap=False), + batch_size=batch_size) + + dy_param_init_value = {} + for param in fluid.default_main_program().global_block( + ).all_parameters(): + dy_param_init_value[param.name] = param._numpy() + + for batch_id, data in enumerate(train_reader()): + if batch_id >= 1: + break + + dy_x_data = np.array( + [x[0].reshape(3, 224, 224) for x in data]).astype('float32') + y_data = np.array([x[1] for x in data]).astype('int64').reshape( + batch_size, 1) + + img = to_variable(dy_x_data) + label = to_variable(y_data) + label._stop_gradient = True + + out = resnet(img) + loss = fluid.layers.cross_entropy(input=out, label=label) + avg_loss = fluid.layers.mean(x=loss) + + dy_out = avg_loss._numpy() + + if batch_id == 0: + for param in fluid.default_main_program().global_block( + ).all_parameters(): + if param.name not in dy_param_init_value: + dy_param_init_value[param.name] = param._numpy() + + avg_loss._backward() + + dy_grad_value = {} + for param in fluid.default_main_program().global_block( + ).all_parameters(): + if not param.stop_gradient: + np_array = np.array(param._ivar._grad_ivar().value() + .get_tensor()) + dy_grad_value[param.name + core.grad_var_suffix( + )] = np_array + + optimizer.minimize(avg_loss) + + dy_param_value = {} + for param in fluid.default_main_program().global_block( + ).all_parameters(): + dy_param_value[param.name] = param._numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed - exe = fluid.Executor(fluid.CPUPlace()) + exe = fluid.Executor(fluid.CUDAPlace(0)) resnet = ResNet() optimizer = optimizer_setting(train_parameters) @@ -447,54 +294,140 @@ class TestImperativeResnet(unittest.TestCase): optimizer.minimize(avg_loss) # initialize params and fetch them - dy_param_init_value = {} - dy_param_name_list = [] - dy_grad_name_list = [] + static_param_init_value = {} + static_param_name_list = [] + static_grad_name_list = [] for param in fluid.default_startup_program().global_block( ).all_parameters(): - dy_param_name_list.append(param.name) + static_param_name_list.append(param.name) for param in fluid.default_main_program().global_block( ).all_parameters(): if not param.stop_gradient: - dy_grad_name_list.append(param.name + core.grad_var_suffix( - )) + static_grad_name_list.append(param.name + + core.grad_var_suffix()) out = exe.run(fluid.default_startup_program(), - fetch_list=dy_param_name_list) + fetch_list=static_param_name_list) - for i in range(len(dy_param_name_list)): - dy_param_init_value[dy_param_name_list[i]] = out[i] + for i in range(len(static_param_name_list)): + static_param_init_value[static_param_name_list[i]] = out[i] for batch_id, data in enumerate(train_reader()): if batch_id >= 1: break - dy_x_data = np.array( + static_x_data = np.array( [x[0].reshape(3, 224, 224) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape( [batch_size, 1]) fetch_list = [avg_loss.name] - fetch_list.extend(dy_param_name_list) - fetch_list.extend(dy_grad_name_list) + fetch_list.extend(static_param_name_list) + fetch_list.extend(static_grad_name_list) out = exe.run(fluid.default_main_program(), - feed={"pixel": dy_x_data, + feed={"pixel": static_x_data, "label": y_data}, fetch_list=fetch_list) - dy_param_value = {} - dy_grad_value = {} - dy_out = out[0] + static_param_value = {} + static_grad_value = {} + static_out = out[0] param_start_pos = 1 - grad_start_pos = len(dy_param_name_list) + param_start_pos + grad_start_pos = len(static_param_name_list) + param_start_pos for i in range(param_start_pos, - len(dy_param_name_list) + param_start_pos): - dy_param_value[dy_param_name_list[i - - param_start_pos]] = out[i] + len(static_param_name_list) + param_start_pos): + static_param_value[static_param_name_list[ + i - param_start_pos]] = out[i] for i in range(grad_start_pos, - len(dy_grad_name_list) + grad_start_pos): - dy_grad_value[dy_grad_name_list[i - grad_start_pos]] = out[ - i] + len(static_grad_name_list) + grad_start_pos): + static_grad_value[static_grad_name_list[ + i - grad_start_pos]] = out[i] + + self.assertTrue(np.allclose(static_out, dy_out)) + + self.assertEqual(len(dy_param_init_value), len(static_param_init_value)) + for key, value in six.iteritems(static_param_init_value): + self.assertTrue(np.allclose(value, dy_param_init_value[key])) + self.assertTrue(np.isfinite(value.all())) + self.assertFalse(np.isnan(value.any())) + + self.assertEqual(len(dy_grad_value), len(static_grad_value)) + for key, value in six.iteritems(static_grad_value): + # TODO(minqiyang): find a way to align the gradient + self.assertTrue(np.allclose(value, dy_grad_value[key])) + self.assertTrue(np.isfinite(value.all())) + self.assertFalse(np.isnan(value.any())) + + self.assertEqual(len(dy_param_value), len(static_param_value)) + for key, value in six.iteritems(static_param_value): + self.assertTrue(np.allclose(value, dy_param_value[key])) + self.assertTrue(np.isfinite(value.all())) + self.assertFalse(np.isnan(value.any())) + + def test_resnet_cpu_float32(self): + seed = 90 + + batch_size = train_parameters["batch_size"] + with fluid.imperative.guard(device=None): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + resnet = ResNet() + optimizer = optimizer_setting(train_parameters) + np.random.seed(seed) + import random + random.seed = seed + train_reader = paddle.batch( + paddle.dataset.flowers.train(use_xmap=False), + batch_size=batch_size) + + dy_param_init_value = {} + for param in fluid.default_main_program().global_block( + ).all_parameters(): + dy_param_init_value[param.name] = param._numpy() + + for batch_id, data in enumerate(train_reader()): + if batch_id >= 1: + break + + dy_x_data = np.array( + [x[0].reshape(3, 224, 224) for x in data]).astype('float32') + y_data = np.array([x[1] for x in data]).astype('int64').reshape( + batch_size, 1) + + img = to_variable(dy_x_data) + label = to_variable(y_data) + label._stop_gradient = True + + out = resnet(img) + loss = fluid.layers.cross_entropy(input=out, label=label) + avg_loss = fluid.layers.mean(x=loss) + + dy_out = avg_loss._numpy() + + if batch_id == 0: + for param in fluid.default_main_program().global_block( + ).all_parameters(): + if param.name not in dy_param_init_value: + dy_param_init_value[param.name] = param._numpy() + + avg_loss._backward() + + dy_grad_value = {} + for param in fluid.default_main_program().global_block( + ).all_parameters(): + if not param.stop_gradient: + np_array = np.array(param._ivar._grad_ivar().value() + .get_tensor()) + dy_grad_value[param.name + core.grad_var_suffix( + )] = np_array + + optimizer.minimize(avg_loss) + + dy_param_value = {} + for param in fluid.default_main_program().global_block( + ).all_parameters(): + dy_param_value[param.name] = param._numpy() with new_program_scope(): fluid.default_startup_program().random_seed = seed @@ -575,19 +508,20 @@ class TestImperativeResnet(unittest.TestCase): self.assertEqual(len(dy_param_init_value), len(static_param_init_value)) for key, value in six.iteritems(static_param_init_value): self.assertTrue(np.allclose(value, dy_param_init_value[key])) + self.assertTrue(np.isfinite(value.all())) + self.assertFalse(np.isnan(value.any())) self.assertEqual(len(dy_grad_value), len(static_grad_value)) for key, value in six.iteritems(static_grad_value): - if not np.allclose(value, dy_grad_value[key]): - # print(key, value, dy_grad_value[key]) - print(key) - # self.assertTrue( - # np.allclose(value, dy_grad_value[key])) + self.assertTrue(np.allclose(value, dy_grad_value[key])) + self.assertTrue(np.isfinite(value.all())) + self.assertFalse(np.isnan(value.any())) self.assertEqual(len(dy_param_value), len(static_param_value)) for key, value in six.iteritems(static_param_value): - print(key) - # self.assertTrue(np.allclose(value, dy_param_value[key])) + self.assertTrue(np.allclose(value, dy_param_value[key])) + self.assertTrue(np.isfinite(value.all())) + self.assertFalse(np.isnan(value.any())) if __name__ == '__main__':