diff --git a/paddle/fluid/framework/block_desc.cc b/paddle/fluid/framework/block_desc.cc index c6c7141beed332d5d72c010fa68ddd89dcf70f9f..9f4696830c145747cce1cef0033526bfde103635 100644 --- a/paddle/fluid/framework/block_desc.cc +++ b/paddle/fluid/framework/block_desc.cc @@ -156,6 +156,7 @@ void BlockDesc::RemoveOp(size_t s, size_t e) { } void BlockDesc::RemoveOpInternal(const OpDesc *op_desc) { + // TODO(minqiyang): make this faster for (auto it = ops_.begin(); it != ops_.end(); ++it) { if (it->get() == op_desc) { ops_.erase(it); diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 8da378b6cf2062beac121feb8f2783b7b9bcd78c..2dca0b95372a0e40c8674a80eea1a8305cfafc80 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -226,6 +226,8 @@ class PYBIND11_HIDDEN OpBase { backward_hooks_() {} virtual ~OpBase() { + // TODO(minqiyang): remove op_desc from block_desc in tracer + // // reset all output vars' pre op for (auto iter : output_vars_) { for (VarBase* var : iter.second) { @@ -233,13 +235,6 @@ class PYBIND11_HIDDEN OpBase { } } - // remove op desc from block desc - if (op_desc_) { - if (block_) { - block_->RemoveOpInternal(op_desc_); - } - } - // release resource for (framework::OpDesc* desc : grad_op_descs_) { delete desc; diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index cb6310137ed3fd3eb79c4ca8041721d544198e77..190e7b5608a0cdf156b449e919e108a0917a0980 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -19,7 +19,7 @@ import numpy as np from .wrapped_decorator import signature_safe_contextmanager from .core import VarDesc from . import unique_name -from .imperative import base +from .imperative import base as imperative_base __all__ = [ 'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear', @@ -166,7 +166,7 @@ class ConstantInitializer(Initializer): 'force_cpu': self._force_cpu or force_init_on_cpu() }, stop_gradient=True) - if not base.enabled(): + if not imperative_base.enabled(): var.op = op return op @@ -246,7 +246,7 @@ class UniformInitializer(Initializer): attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - if not base.enabled(): + if not imperative_base.enabled(): var.op = op return op @@ -325,7 +325,7 @@ class NormalInitializer(Initializer): outputs={"Out": var}, attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - if not base.enabled(): + if not imperative_base.enabled(): var.op = op return op @@ -404,7 +404,7 @@ class TruncatedNormalInitializer(Initializer): outputs={"Out": var}, attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}) - if not base.enabled(): + if not imperative_base.enabled(): var.op = op return op @@ -510,7 +510,7 @@ class XavierInitializer(Initializer): "seed": self._seed }, stop_gradient=True) - if not base.enabled(): + if not imperative_base.enabled(): var.op = op return op @@ -611,7 +611,7 @@ class MSRAInitializer(Initializer): "seed": self._seed }, stop_gradient=True) - if not base.enabled(): + if not imperative_base.enabled(): var.op = op return op @@ -710,7 +710,7 @@ class BilinearInitializer(Initializer): 'shape': list(shape), value_name: values }) - if not base.enabled(): + if not imperative_base.enabled(): var.op = op return op @@ -769,7 +769,7 @@ class NumpyArrayInitializer(Initializer): value_name: values }, stop_gradient=True) - if not base.enabled(): + if not imperative_base.enabled(): var.op = op return op diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index 4b099768ea7ce5c91ee8e3db078312a1ce4a0860..dae0c466ee5ea919688b29100f77f17f5f3b8c6d 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -191,126 +191,28 @@ class SimpleRNN(fluid.imperative.Layer): return outs, pre_hiddens -# class TestImperative(unittest.TestCase): -# def test_sum_op(self): -# x = np.ones([2, 2], np.float32) -# with fluid.imperative.guard(): -# inputs = [] -# for _ in range(10): -# inputs.append(fluid.imperative.base.to_variable(x)) -# ret = fluid.layers.sums(inputs) -# loss = fluid.layers.reduce_sum(ret) -# loss._backward() -# self.assertTrue(np.allclose(ret._numpy(), x * 10)) -# self.assertTrue(np.allclose(inputs[0]._gradient(), x)) - -# def test_layer(self): -# with fluid.imperative.guard(): -# cl = core.Layer() -# cl.forward([]) -# l = fluid.imperative.Layer("l") -# self.assertRaises(NotImplementedError, l.forward, []) - -# def test_layer_in_out(self): -# np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) -# with fluid.imperative.guard(): -# var_inp = fluid.imperative.base.to_variable(np_inp) -# l = MyLayer("my_layer") -# x = l(var_inp)[0] -# self.assertIsNotNone(x) -# dy_out = x._numpy() -# x._backward() -# dy_grad = l._x_for_debug._gradient() - -# with new_program_scope(): -# inp = fluid.layers.data(name="inp", shape=[3], append_batch_size=False) -# l = MyLayer("my_layer") -# x = l(inp)[0] -# param_grads = fluid.backward.append_backward(x, parameter_list=[l._x_for_debug.name])[0] -# exe = fluid.Executor(fluid.CPUPlace( -# ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) - -# static_out, static_grad = exe.run(feed={inp.name: np_inp}, -# fetch_list=[x.name, param_grads[1].name]) - -# self.assertTrue(np.allclose(dy_out, static_out)) -# self.assertTrue(np.allclose(dy_grad, static_grad)) - -# with fluid.imperative.guard(): -# var_inp = fluid.imperative.base.to_variable(np_inp) -# mlp = MLP("mlp") -# out = mlp(var_inp) -# dy_out = out._numpy() -# out._backward() -# dy_grad = mlp._fc1._w._gradient() - -# with new_program_scope(): -# inp = fluid.layers.data( -# name="inp", shape=[2, 2], append_batch_size=False) -# mlp = MLP("mlp") -# out = mlp(inp) -# param_grads = fluid.backward.append_backward(out, parameter_list=[mlp._fc1._w.name])[0] -# exe = fluid.Executor(fluid.CPUPlace( -# ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) -# exe.run(fluid.default_startup_program()) - -# static_out, static_grad = exe.run( -# feed={inp.name: np_inp}, -# fetch_list=[out.name, param_grads[1].name]) - -# self.assertTrue(np.allclose(dy_out, static_out)) -# self.assertTrue(np.allclose(dy_grad, static_grad)) - -# params = mlp.parameters(True) -# self.assertEqual("mlp/MLP_0/FC_0_0.w_0", params[0].name) -# self.assertEqual("mlp/MLP_0/FC_0_0.b_0", params[1].name) -# self.assertEqual("mlp/MLP_0/FC_1_0.w_0", params[2].name) -# self.assertEqual("mlp/MLP_0/FC_1_0.b_0", params[3].name) -# self.assertEqual(len(params), 4) - -# sublayers = mlp.sublayers(True) -# self.assertEqual(mlp._fc1, sublayers[0]) -# self.assertEqual(mlp._fc2, sublayers[1]) -# self.assertEqual(len(sublayers), 2) - -# def test_rnn(self): -# np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], -# [10.0, 11.0, 12.0]]) -# np_inp = np_inp.reshape((1, 4, 3)) -# np_inp = np_inp.astype(np.float32) -# with fluid.imperative.guard(): -# var_inp = fluid.imperative.base.to_variable(np_inp) -# var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) -# simple_rnn = SimpleRNN("simple_rnn") -# outs, pre_hiddens = simple_rnn.forward(var_inp) -# dy_out = outs[3]._numpy() -# outs[3]._backward() -# dy_grad_h2o = simple_rnn._cell._h2o_w._gradient() -# dy_grad_h2h = simple_rnn._cell._h2h_w._gradient() -# dy_grad_i2h = simple_rnn._cell._i2h_w._gradient() - -# with new_program_scope(): -# inp = fluid.layers.data( -# name="inp", shape=[1, 4, 3], append_batch_size=False) -# simple_rnn = SimpleRNN("simple_rnn") -# outs, pre_hiddens = simple_rnn(inp) -# param_grads = fluid.backward.append_backward(outs[3]) -# exe = fluid.Executor(fluid.CPUPlace()) -# exe.run(fluid.default_startup_program()) -# static_out, static_grad_h2o, static_grad_h2h, static_grad_i2h = exe.run( -# feed={inp.name: np_inp}, -# fetch_list=[ -# outs[3].name, param_grads[0][1].name, -# param_grads[1][1].name, param_grads[2][1].name -# ]) -# self.assertTrue(np.allclose(dy_out, static_out)) -# self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o)) -# self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h)) -# self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h)) - - -class TestImperativePyLayer(unittest.TestCase): +class TestImperative(unittest.TestCase): + def test_sum_op(self): + x = np.ones([2, 2], np.float32) + with fluid.imperative.guard(): + inputs = [] + for _ in range(10): + inputs.append(fluid.imperative.base.to_variable(x)) + ret = fluid.layers.sums(inputs) + loss = fluid.layers.reduce_sum(ret) + loss._backward() + self.assertTrue(np.allclose(ret._numpy(), x * 10)) + self.assertTrue(np.allclose(inputs[0]._gradient(), x)) + + def test_layer(self): + with fluid.imperative.guard(): + cl = core.Layer() + cl.forward([]) + l = fluid.imperative.Layer("l") + self.assertRaises(NotImplementedError, l.forward, []) + def test_pylayer_func_id(self): + with fluid.imperative.guard(): class PyLayer1(fluid.imperative.PyLayer): @@ -378,6 +280,109 @@ class TestImperativePyLayer(unittest.TestCase): self.assertTrue(np.allclose(dy_out, static_out)) self.assertTrue(np.allclose(dy_grad, static_grad)) + def test_layer_in_out(self): + np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) + with fluid.imperative.guard(): + var_inp = fluid.imperative.base.to_variable(np_inp) + l = MyLayer("my_layer") + x = l(var_inp)[0] + self.assertIsNotNone(x) + dy_out = x._numpy() + x._backward() + dy_grad = l._x_for_debug._gradient() + + with new_program_scope(): + inp = fluid.layers.data( + name="inp", shape=[3], append_batch_size=False) + l = MyLayer("my_layer") + x = l(inp)[0] + param_grads = fluid.backward.append_backward( + x, parameter_list=[l._x_for_debug.name])[0] + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + + static_out, static_grad = exe.run( + feed={inp.name: np_inp}, + fetch_list=[x.name, param_grads[1].name]) + + self.assertTrue(np.allclose(dy_out, static_out)) + self.assertTrue(np.allclose(dy_grad, static_grad)) + + def test_mlp(self): + np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) + with fluid.imperative.guard(): + var_inp = fluid.imperative.base.to_variable(np_inp) + mlp = MLP("mlp") + out = mlp(var_inp) + dy_out = out._numpy() + out._backward() + dy_grad = mlp._fc1._w._gradient() + + with new_program_scope(): + inp = fluid.layers.data( + name="inp", shape=[2, 2], append_batch_size=False) + mlp = MLP("mlp") + out = mlp(inp) + param_grads = fluid.backward.append_backward( + out, parameter_list=[mlp._fc1._w.name])[0] + exe = fluid.Executor(fluid.CPUPlace( + ) if not core.is_compiled_with_cuda() else fluid.CUDAPlace(0)) + exe.run(fluid.default_startup_program()) + + static_out, static_grad = exe.run( + feed={inp.name: np_inp}, + fetch_list=[out.name, param_grads[1].name]) + + self.assertTrue(np.allclose(dy_out, static_out)) + self.assertTrue(np.allclose(dy_grad, static_grad)) + + params = mlp.parameters(True) + self.assertEqual("mlp/MLP_0/FC_0_0.w_0", params[0].name) + self.assertEqual("mlp/MLP_0/FC_0_0.b_0", params[1].name) + self.assertEqual("mlp/MLP_0/FC_1_0.w_0", params[2].name) + self.assertEqual("mlp/MLP_0/FC_1_0.b_0", params[3].name) + self.assertEqual(len(params), 4) + + sublayers = mlp.sublayers(True) + self.assertEqual(mlp._fc1, sublayers[0]) + self.assertEqual(mlp._fc2, sublayers[1]) + self.assertEqual(len(sublayers), 2) + + def test_rnn(self): + np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], + [10.0, 11.0, 12.0]]) + np_inp = np_inp.reshape((1, 4, 3)) + np_inp = np_inp.astype(np.float32) + with fluid.imperative.guard(): + var_inp = fluid.imperative.base.to_variable(np_inp) + var_inp = fluid.layers.reshape(var_inp, shape=[1, 4, 3]) + simple_rnn = SimpleRNN("simple_rnn") + outs, pre_hiddens = simple_rnn.forward(var_inp) + dy_out = outs[3]._numpy() + outs[3]._backward() + dy_grad_h2o = simple_rnn._cell._h2o_w._gradient() + dy_grad_h2h = simple_rnn._cell._h2h_w._gradient() + dy_grad_i2h = simple_rnn._cell._i2h_w._gradient() + + with new_program_scope(): + inp = fluid.layers.data( + name="inp", shape=[1, 4, 3], append_batch_size=False) + simple_rnn = SimpleRNN("simple_rnn") + outs, pre_hiddens = simple_rnn(inp) + param_grads = fluid.backward.append_backward(outs[3]) + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + static_out, static_grad_h2o, static_grad_h2h, static_grad_i2h = exe.run( + feed={inp.name: np_inp}, + fetch_list=[ + outs[3].name, param_grads[0][1].name, + param_grads[1][1].name, param_grads[2][1].name + ]) + self.assertTrue(np.allclose(dy_out, static_out)) + self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o)) + self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h)) + self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h)) + if __name__ == '__main__': unittest.main()