From cce766d7101b694fc0603433ab8f0b874f7591d8 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Mon, 1 Apr 2019 21:53:27 +0800 Subject: [PATCH] Reverse iterator op's input test=develop --- paddle/fluid/imperative/layer.cc | 22 +- python/paddle/fluid/framework.py | 25 +- .../tests/unittests/test_imperative_basic.py | 396 +++++++++--------- 3 files changed, 209 insertions(+), 234 deletions(-) diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 093c72ef224..0310d0677b8 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -81,10 +81,6 @@ class TensorAddToFunctor : public boost::static_visitor<> { } // namespace detail -template -using EigenVector = framework::EigenVector; - void AddTo(Variable* src, Variable* dst, platform::Place place) { framework::Tensor* dst_tensor = dst->GetMutable(); framework::Tensor* src_tensor = src->GetMutable(); @@ -99,18 +95,10 @@ void AddTo(Variable* src, Variable* dst, platform::Place place) { "dst_numel %lld vs. src_numel %lld", dst_tensor->numel(), src_tensor->numel()); - auto result = EigenVector<>::Flatten(*dst_tensor); - auto in_0_e = EigenVector<>::Flatten(*dst_tensor); - auto in_1_e = EigenVector<>::Flatten(*src_tensor); - platform::DeviceContext* dev_ctx = - platform::DeviceContextPool::Instance().Get(place); - platform::CPUDeviceContext* x = - reinterpret_cast(dev_ctx); - result.device(*x->eigen_device()) = in_0_e + in_1_e; - // detail::TensorAddToFunctor func( - // src_tensor->numel(), src_tensor->data(), - // dst_tensor->mutable_data(place)); - // boost::apply_visitor(func, place); + detail::TensorAddToFunctor func( + src_tensor->numel(), src_tensor->data(), + dst_tensor->mutable_data(place)); + boost::apply_visitor(func, place); } class Autograd { @@ -134,7 +122,7 @@ class Autograd { std::map> input_grads = ready_op->ApplyGrad(); - for (auto it : input_grads) { + for (auto it = input_grads.rbegin(); it != input_grads.rend(); ++it) { const std::vector& ingrads = it.second; for (int64_t i = ingrads.size() - 1; i >= 0; --i) { if (!ingrads[i]) continue; diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 4e0a3f97e40..7953d98bcbb 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -104,14 +104,14 @@ def cuda_places(device_ids=None): :code:`FLAGS_selected_gpus=0,1,2`, the returned list would be [fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)]. If :code:`FLAGS_selected_gpus` is not set, all visible - gpu places would be returned. + gpu places would be returned. If :code:`device_ids` is not None, it should be the device - ids of gpus. For example, if :code:`device_ids=[0,1,2]`, - the returned list would be + ids of gpus. For example, if :code:`device_ids=[0,1,2]`, + the returned list would be [fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)]. - - Args: + + Args: device_ids (None|list(int)|tuple(int)): gpu device id list. Returns: @@ -133,11 +133,11 @@ def cuda_places(device_ids=None): def cpu_places(device_count=None): ''' Create a list of :code:`fluid.CPUPlace` objects. - + If :code:`device_count` is None, the device count would - be determined by environment variable :code:`CPU_NUM`. + be determined by environment variable :code:`CPU_NUM`. If :code:`CPU_NUM` is not set, the device count would - be determined by :code:`multiprocessing.cpu_count()`. + be determined by :code:`multiprocessing.cpu_count()`. Args: device_count (None|int): device number. @@ -155,9 +155,9 @@ def cuda_pinned_places(device_count=None): Create a list of :code:`fluid.CUDAPinnedPlace` objects. If :code:`device_count` is None, the device count would - be determined by environment variable :code:`CPU_NUM`. + be determined by environment variable :code:`CPU_NUM`. If :code:`CPU_NUM` is not set, the device count would - be determined by :code:`multiprocessing.cpu_count()`. + be determined by :code:`multiprocessing.cpu_count()`. Args: device_count (None|int): device number. @@ -2716,6 +2716,11 @@ class Program(object): # whether the program is optimized by memory_optimize_transpiler self.__is_mem_optimized = False + # if this program has been optimized by distributed optimizer + # fleet_opt will be given a value + self._fleet_opt = None + self._program_config = None + @property def _is_mem_optimized(self): # if the program is optimized, operator input/outputs diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index c32eb68e61f..13f2d662178 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -51,22 +51,23 @@ class MyPyLayer(fluid.dygraph.PyLayer): class MLP(fluid.dygraph.Layer): def __init__(self, name_scope): super(MLP, self).__init__(name_scope) - self._fc1 = FC(self.full_name(), 3) - # self._fc2 = FC(self.full_name(), - # 4) - # self._fc3 = FC(self.full_name(), - # 4) - self._fc_list = [] - for i in range(100): - fc3 = FC(self.full_name(), 4) - self._fc_list.append(fc3) + self._fc1 = FC(self.full_name(), + 3, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1)), + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1))) + self._fc2 = FC(self.full_name(), + 4, + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1)), + bias_attr=fluid.ParamAttr( + initializer=fluid.initializer.Constant(value=0.1))) def forward(self, inputs): x = self._fc1(inputs) - y1 = self._fc2(x) - y2 = self._fc3(x) - z = fluid.layers.concat([y1, y2]) - x = fluid.layers.reduce_sum(z) + x = self._fc2(x) + x = fluid.layers.reduce_sum(x) return x @@ -191,215 +192,196 @@ class SimpleRNN(fluid.dygraph.Layer): class TestImperative(unittest.TestCase): - # def test_sum_op(self): - # x = np.ones([2, 2], np.float32) - # with fluid.dygraph.guard(): - # inputs = [] - # for _ in range(10): - # inputs.append(fluid.dygraph.base.to_variable(x)) - # ret = fluid.layers.sums(inputs) - # loss = fluid.layers.reduce_sum(ret) - # loss._backward() - # self.assertTrue(np.allclose(ret._numpy(), x * 10)) - # self.assertTrue(np.allclose(inputs[0]._gradient(), x)) - - # def test_layer(self): - # with fluid.dygraph.guard(): - # cl = core.Layer() - # cl.forward([]) - # l = fluid.dygraph.Layer("l") - # self.assertRaises(NotImplementedError, l.forward, []) - - # def test_pylayer_func_id(self): - - # with fluid.dygraph.guard(): - - # class PyLayer1(fluid.dygraph.PyLayer): - # def __init__(self): - # super(PyLayer1, self).__init__() - - # @staticmethod - # def forward(input): - # return input - - # @staticmethod - # def backward(input): - # return input - - # class PyLayer2(fluid.dygraph.PyLayer): - # def __init__(self): - # super(PyLayer2, self).__init__() - - # @staticmethod - # def forward(input): - # return input - - # @staticmethod - # def backward(input): - # return input - - # py_layer_1 = PyLayer1() - # py_layer_2 = PyLayer2() - # py_layer_1(fluid.dygraph.base.to_variable(np.ones([2, 2]))) - # py_layer_2(fluid.dygraph.base.to_variable(np.ones([2, 2]))) - # id = py_layer_1.forward_id - # self.assertGreater(id, 0) - # self.assertEqual(py_layer_1.backward_id, id + 1) - # self.assertEqual(py_layer_2.forward_id, id + 2) - # self.assertEqual(py_layer_2.backward_id, id + 3) - # py_layer_1(fluid.dygraph.base.to_variable(np.ones([2, 2]))) - # self.assertEqual(py_layer_1.forward_id, id) - - # def test_pylayer(self): - # np_inp = np.ones([2, 2], np.float32) - # with fluid.dygraph.guard(): - # my_py_layer = MyPyLayer() - # var_inp = fluid.dygraph.base.to_variable(np_inp) - # outs = my_py_layer(var_inp) - # dy_out = np.sum(outs[0]._numpy()) - # outs[0]._backward() - # dy_grad = var_inp._gradient() - - # with new_program_scope(): - # inp = fluid.layers.data( - # name="inp", shape=[2, 2], append_batch_size=False) - # # TODO(panyx0718): Paddle doesn't diff against data `inp`. - # x1 = inp * 1 - # # TODO(panyx0718): If reduce_sum is skipped, the result is wrong. - # x = fluid.layers.reduce_sum(fluid.layers.tanh(x1)) - # param_grads = fluid.backward.append_backward( - # x, parameter_list=[x1.name])[0] - # exe = fluid.Executor(fluid.CPUPlace( - # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - - # static_out, static_grad = exe.run( - # feed={inp.name: np_inp}, - # fetch_list=[x.name, param_grads[1].name]) - - # self.assertTrue(np.allclose(dy_out, static_out)) - # self.assertTrue(np.allclose(dy_grad, static_grad)) - - # def test_layer_in_out(self): - # np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) - # with fluid.dygraph.guard(): - # var_inp = fluid.dygraph.base.to_variable(np_inp) - # l = MyLayer("my_layer") - # x = l(var_inp)[0] - # self.assertIsNotNone(x) - # dy_out = x._numpy() - # x._backward() - # dy_grad = l._x_for_debug._gradient() - - # with new_program_scope(): - # inp = fluid.layers.data( - # name="inp", shape=[3], append_batch_size=False) - # l = MyLayer("my_layer") - # x = l(inp)[0] - # param_grads = fluid.backward.append_backward( - # x, parameter_list=[l._x_for_debug.name])[0] - # exe = fluid.Executor(fluid.CPUPlace( - # ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - - # static_out, static_grad = exe.run( - # feed={inp.name: np_inp}, - # fetch_list=[x.name, param_grads[1].name]) - - # self.assertTrue(np.allclose(dy_out, static_out)) - # self.assertTrue(np.allclose(dy_grad, static_grad)) + def test_sum_op(self): + x = np.ones([2, 2], np.float32) + with fluid.dygraph.guard(): + inputs = [] + for _ in range(10): + inputs.append(fluid.dygraph.base.to_variable(x)) + ret = fluid.layers.sums(inputs) + loss = fluid.layers.reduce_sum(ret) + loss._backward() + self.assertTrue(np.allclose(ret._numpy(), x * 10)) + self.assertTrue(np.allclose(inputs[0]._gradient(), x)) + + def test_layer(self): + with fluid.dygraph.guard(): + cl = core.Layer() + cl.forward([]) + l = fluid.dygraph.Layer("l") + self.assertRaises(NotImplementedError, l.forward, []) + + def test_pylayer_func_id(self): + + with fluid.dygraph.guard(): + + class PyLayer1(fluid.dygraph.PyLayer): + def __init__(self): + super(PyLayer1, self).__init__() + + @staticmethod + def forward(input): + return input + + @staticmethod + def backward(input): + return input + + class PyLayer2(fluid.dygraph.PyLayer): + def __init__(self): + super(PyLayer2, self).__init__() + + @staticmethod + def forward(input): + return input + + @staticmethod + def backward(input): + return input + + py_layer_1 = PyLayer1() + py_layer_2 = PyLayer2() + py_layer_1(fluid.dygraph.base.to_variable(np.ones([2, 2]))) + py_layer_2(fluid.dygraph.base.to_variable(np.ones([2, 2]))) + id = py_layer_1.forward_id + self.assertGreater(id, 0) + self.assertEqual(py_layer_1.backward_id, id + 1) + self.assertEqual(py_layer_2.forward_id, id + 2) + self.assertEqual(py_layer_2.backward_id, id + 3) + py_layer_1(fluid.dygraph.base.to_variable(np.ones([2, 2]))) + self.assertEqual(py_layer_1.forward_id, id) + + def test_pylayer(self): + np_inp = np.ones([2, 2], np.float32) + with fluid.dygraph.guard(): + my_py_layer = MyPyLayer() + var_inp = fluid.dygraph.base.to_variable(np_inp) + outs = my_py_layer(var_inp) + dy_out = np.sum(outs[0]._numpy()) + outs[0]._backward() + dy_grad = var_inp._gradient() + + with new_program_scope(): + inp = fluid.layers.data( + name="inp", shape=[2, 2], append_batch_size=False) + # TODO(panyx0718): Paddle doesn't diff against data `inp`. + x1 = inp * 1 + # TODO(panyx0718): If reduce_sum is skipped, the result is wrong. + x = fluid.layers.reduce_sum(fluid.layers.tanh(x1)) + param_grads = fluid.backward.append_backward( + x, parameter_list=[x1.name])[0] + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + + static_out, static_grad = exe.run( + feed={inp.name: np_inp}, + fetch_list=[x.name, param_grads[1].name]) + + self.assertTrue(np.allclose(dy_out, static_out)) + self.assertTrue(np.allclose(dy_grad, static_grad)) + + def test_layer_in_out(self): + np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) + with fluid.dygraph.guard(): + var_inp = fluid.dygraph.base.to_variable(np_inp) + l = MyLayer("my_layer") + x = l(var_inp)[0] + self.assertIsNotNone(x) + dy_out = x._numpy() + x._backward() + dy_grad = l._x_for_debug._gradient() + + with new_program_scope(): + inp = fluid.layers.data( + name="inp", shape=[3], append_batch_size=False) + l = MyLayer("my_layer") + x = l(inp)[0] + param_grads = fluid.backward.append_backward( + x, parameter_list=[l._x_for_debug.name])[0] + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + + static_out, static_grad = exe.run( + feed={inp.name: np_inp}, + fetch_list=[x.name, param_grads[1].name]) + + self.assertTrue(np.allclose(dy_out, static_out)) + self.assertTrue(np.allclose(dy_grad, static_grad)) def test_mlp(self): - seed = 90 np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) - with fluid.dygraph.guard(place=fluid.CPUPlace()): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - + with fluid.dygraph.guard(): var_inp = fluid.dygraph.base.to_variable(np_inp) mlp = MLP("mlp") - opt = fluid.optimizer.SGDOptimizer(learning_rate=0.001) - for i in range(100): - out = mlp(var_inp) - dy_out = out._numpy() - out._backward() - opt.minimize(out) - dy_grad = mlp._fc1._w._gradient() - dy_fc0_w0 = mlp._fc1._w._numpy() - mlp.clear_gradients() + out = mlp(var_inp) + dy_out = out._numpy() + out._backward() + dy_grad = mlp._fc1._w._gradient() with new_program_scope(): - fluid.default_startup_program().random_seed = seed - fluid.default_main_program().random_seed = seed - inp = fluid.layers.data( name="inp", shape=[2, 2], append_batch_size=False) mlp = MLP("mlp") out = mlp(inp) - opt = fluid.optimizer.SGDOptimizer(learning_rate=0.001) - opt.minimize(out) - # param_grads = fluid.backward.append_backward( - # out, parameter_list=[mlp._fc1._w.name])[0] - exe = fluid.Executor(fluid.CPUPlace()) + param_grads = fluid.backward.append_backward( + out, parameter_list=[mlp._fc1._w.name])[0] + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) exe.run(fluid.default_startup_program()) - for i in range(100): - static_out, static_grad, static_fc0_w0 = exe.run( - feed={inp.name: np_inp}, - fetch_list=[ - out.name, "mlp/MLP_0/FC_0.w_0@GRAD", - "mlp/MLP_0/FC_0.w_0" - ]) + static_out, static_grad = exe.run( + feed={inp.name: np_inp}, + fetch_list=[out.name, param_grads[1].name]) + + self.assertTrue(np.allclose(dy_out, static_out)) + self.assertTrue(np.allclose(dy_grad, static_grad)) + + params = mlp.parameters(True) + self.assertEqual("mlp/MLP_0/FC_0.w_0", params[0].name) + self.assertEqual("mlp/MLP_0/FC_0.b_0", params[1].name) + self.assertEqual("mlp/MLP_0/FC_1.w_0", params[2].name) + self.assertEqual("mlp/MLP_0/FC_1.b_0", params[3].name) + self.assertEqual(len(params), 4) + + sublayers = mlp.sublayers(True) + self.assertEqual(mlp._fc1, sublayers[0]) + self.assertEqual(mlp._fc2, sublayers[1]) + self.assertEqual(len(sublayers), 2) + + def test_rnn(self): + np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], + [10.0, 11.0, 12.0]]) + np_inp = np_inp.reshape((1, 4, 3)) + np_inp = np_inp.astype(np.float32) + with fluid.dygraph.guard(): + var_inp = fluid.dygraph.base.to_variable(np_inp) + var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) + simple_rnn = SimpleRNN("simple_rnn") + outs, pre_hiddens = simple_rnn.forward(var_inp) + dy_out = outs[3]._numpy() + outs[3]._backward() + dy_grad_h2o = simple_rnn._cell._h2o_w._gradient() + dy_grad_h2h = simple_rnn._cell._h2h_w._gradient() + dy_grad_i2h = simple_rnn._cell._i2h_w._gradient() - print(dy_out, static_out) + with new_program_scope(): + inp = fluid.layers.data( + name="inp", shape=[1, 4, 3], append_batch_size=False) + simple_rnn = SimpleRNN("simple_rnn") + outs, pre_hiddens = simple_rnn(inp) + param_grads = fluid.backward.append_backward(outs[3]) + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + static_out, static_grad_h2o, static_grad_h2h, static_grad_i2h = exe.run( + feed={inp.name: np_inp}, + fetch_list=[ + outs[3].name, param_grads[0][1].name, + param_grads[1][1].name, param_grads[2][1].name + ]) self.assertTrue(np.allclose(dy_out, static_out)) - self.assertTrue(np.array_equal(dy_grad, static_grad)) - - print(dy_fc0_w0, static_fc0_w0) - #params = mlp.parameters(True) - #self.assertEqual("mlp/MLP_0/FC_0.w_0", params[0].name) - #self.assertEqual("mlp/MLP_0/FC_0.b_0", params[1].name) - #self.assertEqual("mlp/MLP_0/FC_1.w_0", params[2].name) - #self.assertEqual("mlp/MLP_0/FC_1.b_0", params[3].name) - #self.assertEqual(len(params), 4) - - #sublayers = mlp.sublayers(True) - #self.assertEqual(mlp._fc1, sublayers[0]) - #self.assertEqual(mlp._fc2, sublayers[1]) - #self.assertEqual(len(sublayers), 2) - - # def test_rnn(self): - # np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], - # [10.0, 11.0, 12.0]]) - # np_inp = np_inp.reshape((1, 4, 3)) - # np_inp = np_inp.astype(np.float32) - # with fluid.dygraph.guard(): - # var_inp = fluid.dygraph.base.to_variable(np_inp) - # var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) - # simple_rnn = SimpleRNN("simple_rnn") - # outs, pre_hiddens = simple_rnn.forward(var_inp) - # dy_out = outs[3]._numpy() - # outs[3]._backward() - # dy_grad_h2o = simple_rnn._cell._h2o_w._gradient() - # dy_grad_h2h = simple_rnn._cell._h2h_w._gradient() - # dy_grad_i2h = simple_rnn._cell._i2h_w._gradient() - - # with new_program_scope(): - # inp = fluid.layers.data( - # name="inp", shape=[1, 4, 3], append_batch_size=False) - # simple_rnn = SimpleRNN("simple_rnn") - # outs, pre_hiddens = simple_rnn(inp) - # param_grads = fluid.backward.append_backward(outs[3]) - # exe = fluid.Executor(fluid.CPUPlace()) - # exe.run(fluid.default_startup_program()) - # static_out, static_grad_h2o, static_grad_h2h, static_grad_i2h = exe.run( - # feed={inp.name: np_inp}, - # fetch_list=[ - # outs[3].name, param_grads[0][1].name, - # param_grads[1][1].name, param_grads[2][1].name - # ]) - # self.assertTrue(np.allclose(dy_out, static_out)) - # self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o)) - # self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h)) - # self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h)) + self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o)) + self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h)) + self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h)) if __name__ == '__main__': -- GitLab