From dbd4d058af35ea115c3f8d8a310403539a947b48 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Wed, 16 Jan 2019 19:58:00 +0800 Subject: [PATCH] Add static implementation and fix fc layer --- paddle/fluid/pybind/pybind.cc | 7 + python/paddle/fluid/framework.py | 1 + python/paddle/fluid/imperative/base.py | 4 +- python/paddle/fluid/imperative/nn.py | 24 ++- python/paddle/fluid/optimizer.py | 3 + .../tests/unittests/test_imperative_resnet.py | 142 ++++++++++-------- 6 files changed, 112 insertions(+), 69 deletions(-) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index f3f4854a9e..7ed91fc6ee 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -138,6 +138,13 @@ PYBIND11_MODULE(core, m) { py::return_value_policy::reference) .def("value", [](const imperative::VarBase &self) { return self.var_; }, py::return_value_policy::reference) + .def("wait_device", + [](const imperative::VarBase &self) { + platform::DeviceContext *dev_ctx = + platform::DeviceContextPool::Instance().Get( + self.var_->Get().place()); + dev_ctx->Wait(); + }) .def_property( "desc", [](const imperative::VarBase &self) { return self.var_desc_; }, diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 012ceafe1e..56e19ea307 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -384,6 +384,7 @@ class Variable(object): self._ivar.stop_gradient = stop_gradient def _numpy(self): + self._ivar.wait_device() tensor = self._ivar.value().get_tensor() return np.array(tensor) diff --git a/python/paddle/fluid/imperative/base.py b/python/paddle/fluid/imperative/base.py index 83789dbe60..bd5798494d 100644 --- a/python/paddle/fluid/imperative/base.py +++ b/python/paddle/fluid/imperative/base.py @@ -45,9 +45,9 @@ def guard(device=0): def to_variable(value, block=None): - assert enabled(), "to_variable could only be called in imperative mode" - if isinstance(value, np.ndarray): + assert enabled(), "to_variable could only be called in imperative mode" + if not block: block = framework.default_main_program().current_block() py_var = framework.Variable( diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/imperative/nn.py index 6528de9a95..0b4c01f7aa 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/imperative/nn.py @@ -239,6 +239,17 @@ class FC(layers.Layer): shape=param_shape, dtype=self._dtype, is_bias=False) + print("create param: ", self._w.name, self._w.stop_gradient) + + if self._helper.bias_attr: + size = list([self._size]) + self._b = self._helper.create_parameter( + attr=self._helper.bias_attr, + shape=size, + dtype=self._dtype, + is_bias=True) + else: + self._b = None def forward(self, input): tmp = self._helper.create_variable_for_type_inference(self._dtype) @@ -259,8 +270,17 @@ class FC(layers.Layer): outputs={"Out": pre_bias}, attrs={"use_mkldnn": False}) - pre_activation = self._helper.append_bias_op( - pre_bias, dim_start=self._num_flatten_dims) + if self._b: + pre_activation = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + self._helper.append_op( + type='elementwise_add', + inputs={'X': [pre_bias], + 'Y': [self._b]}, + outputs={'Out': [pre_activation]}, + attrs={'axis': self._num_flatten_dims}) + else: + pre_activation = pre_bias return self._helper.append_activation(pre_activation) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 449eaa0970..b9d19d40ca 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -387,6 +387,9 @@ class Optimizer(object): params_grads = [] for param in parameters: + if param.stop_gradient: + print("parameter:", param.name, "stop gradient, skip it") + continue # create gradient variable grad_var = Variable( block=loss.block, diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index 594b751985..6a4fa70495 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -31,11 +31,11 @@ train_parameters = { "input_std": [0.229, 0.224, 0.225], "learning_strategy": { "name": "piecewise_decay", - "batch_size": 256, + "batch_size": 1, "epochs": [30, 60, 90], "steps": [0.1, 0.01, 0.001, 0.0001] }, - "batch_size": 256, + "batch_size": 1, "lr": 0.1, "total_images": 1281164, } @@ -201,6 +201,7 @@ class TestImperativeResnet(unittest.TestCase): def test_resnet_gpu_float32(self): seed = 90 + batch_size = train_parameters["batch_size"] with fluid.imperative.guard(): fluid.default_startup_program().random_seed = seed fluid.default_main_program().random_seed = seed @@ -208,17 +209,21 @@ class TestImperativeResnet(unittest.TestCase): resnet = ResNet() optimizer = optimizer_setting(train_parameters) train_reader = paddle.batch( - paddle.dataset.flowers.train(), batch_size=256) + paddle.dataset.flowers.train(), batch_size=batch_size) dy_param_init_value = {} + for param in fluid.default_main_program().global_block( + ).all_parameters(): + dy_param_init_value[param.name] = param._numpy() + for batch_id, data in enumerate(train_reader()): - if batch_id >= 2: + if batch_id >= 1: break x_data = np.array( [x[0].reshape(3, 224, 224) for x in data]).astype('float32') y_data = np.array([x[1] for x in data]).astype('int64').reshape( - 256, 1) + batch_size, 1) img = to_variable(x_data) label = to_variable(y_data) @@ -232,74 +237,81 @@ class TestImperativeResnet(unittest.TestCase): if batch_id == 0: for param in fluid.default_main_program().global_block( ).all_parameters(): - dy_param_init_value[param.name] = param._numpy() + if param.name not in dy_param_init_value: + dy_param_init_value[param.name] = param._numpy() avg_loss._backward() optimizer.minimize(avg_loss) + dy_param_value = {} for param in fluid.default_main_program().global_block( ).all_parameters(): dy_param_value[param.name] = param._numpy() - # with new_program_scope(): - # fluid.default_startup_program().random_seed = seed - # fluid.default_main_program().random_seed = seed - - # exe = fluid.Executor(fluid.CPUPlace()) - - # # mnist = Conv2D(1, 20, 5) - # mnist = MNIST() - # sgd = SGDOptimizer(learning_rate=1e-3) - # train_reader = paddle.batch( - # paddle.dataset.mnist.train(), batch_size=128) - - # img = fluid.layers.data( - # name='pixel', shape=[1, 28, 28], dtype='float32') - # label = fluid.layers.data(name='label', shape=[1], dtype='int64') - # cost = mnist(img) - # loss = fluid.layers.reduce_mean(cost) - # sgd.minimize(loss) - - # # initialize params and fetch them - # static_param_init_value = {} - # static_param_name_list = [] - # for param in fluid.default_startup_program().global_block( - # ).all_parameters(): - # static_param_name_list.append(param.name) - - # out = exe.run(fluid.default_startup_program(), - # fetch_list=static_param_name_list) - - # for i in range(len(static_param_name_list)): - # static_param_init_value[static_param_name_list[i]] = out[i] - - # for batch_id, data in enumerate(train_reader()): - # if batch_id >= 2: - # break - - # x_data = np.array( - # [x[0].reshape(1, 28, 28) for x in data]).astype('float32') - # y_data = np.array([x[1] for x in data]).astype('int64').reshape( - # [128, 1]) - - # fetch_list = [loss.name] - # fetch_list.extend(static_param_name_list) - # out = exe.run(fluid.default_main_program(), - # feed={"pixel": x_data, - # "label": y_data}, - # fetch_list=fetch_list) - - # static_param_value = {} - # static_out = out[0] - # for i in range(1, len(out)): - # static_param_value[static_param_name_list[i - 1]] = out[i] - - # for key, value in six.iteritems(static_param_init_value): - # self.assertTrue( - # np.allclose(value.all(), dy_param_init_value[key].all())) - # self.assertTrue(np.allclose(static_out.all(), dy_out.all())) - # for key, value in six.iteritems(static_param_value): - # self.assertTrue(np.allclose(value.all(), dy_param_value[key].all())) + with new_program_scope(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + exe = fluid.Executor(fluid.CUDAPlace(0)) + + resnet = ResNet() + optimizer = optimizer_setting(train_parameters) + train_reader = paddle.batch( + paddle.dataset.flowers.train(), batch_size=batch_size) + + img = fluid.layers.data( + name='pixel', shape=[3, 224, 224], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + out = resnet(img) + loss = fluid.layers.cross_entropy(input=out, label=label) + avg_loss = fluid.layers.mean(x=loss) + optimizer.minimize(avg_loss) + + # initialize params and fetch them + static_param_init_value = {} + static_param_name_list = [] + for param in fluid.default_startup_program().global_block( + ).all_parameters(): + static_param_name_list.append(param.name) + + out = exe.run(fluid.default_startup_program(), + fetch_list=static_param_name_list) + + for i in range(len(static_param_name_list)): + static_param_init_value[static_param_name_list[i]] = out[i] + + for batch_id, data in enumerate(train_reader()): + if batch_id >= 1: + break + + x_data = np.array( + [x[0].reshape(3, 224, 224) for x in data]).astype('float32') + y_data = np.array([x[1] for x in data]).astype('int64').reshape( + [batch_size, 1]) + + fetch_list = [loss.name] + fetch_list.extend(static_param_name_list) + out = exe.run(fluid.default_main_program(), + feed={"pixel": x_data, + "label": y_data}, + fetch_list=fetch_list) + + static_param_value = {} + static_out = out[0] + for i in range(1, len(out)): + static_param_value[static_param_name_list[i - 1]] = out[i] + + self.assertTrue(np.allclose(static_out.all(), dy_out.all())) + + for key, value in six.iteritems(static_param_init_value): + self.assertTrue( + np.allclose(value.all(), dy_param_init_value[key].all())) + + for key, value in six.iteritems(static_param_value): + if not np.allclose(value.all(), dy_param_value[key].all()): + print(key) + print(value, dy_param_value[key]) + self.assertTrue(np.allclose(value.all(), dy_param_value[key].all())) if __name__ == '__main__': -- GitLab