diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 426644ca9183fd325bf32eb995d57006c1c4b5f0..b7df4b8886d629e98225c95eae9a4f2ed9400710 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -57,15 +57,15 @@ class Autograd { Autograd() {} void RunBackward(VarBase* var) { - if (var->stop_gradient_) { + if (var->IsStopGradient()) { return; } VLOG(3) << "start autograd"; std::deque ready; - ready.push_back(var->pre_op_); + ready.push_back(var->PreOp()); - std::map dep_counts = ComputeDepCounts(var->pre_op_); + std::map dep_counts = ComputeDepCounts(var->PreOp()); while (!ready.empty()) { OpBase* ready_op = ready.front(); @@ -77,7 +77,7 @@ class Autograd { const std::vector& ingrads = it.second; for (size_t i = 0; i < ingrads.size(); ++i) { if (!ingrads[i]) continue; - if (ready_op->input_vars_[it.first][i]->stop_gradient_) { + if (ready_op->input_vars_[it.first][i]->IsStopGradient()) { continue; } OpBase* pre_op = ready_op->pre_ops_[it.first][i]; diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 34aa701c5b9ec432843c5ce995afaf20e30e4829..0b1077c640e076797ba7e0200dc8d0eb8bfcff16 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -100,22 +100,20 @@ class VarBase { // Owns `var` and `grad` VarBase(framework::Variable* var, VarBase* grad) - : pre_op_(nullptr), - pre_op_out_name_(), - pre_op_out_idx_(-1), - var_desc_(nullptr), + : var_desc_(nullptr), var_(var), grads_(grad), - stop_gradient_(false) {} + stop_gradient_(false), + pre_op_(nullptr), + pre_op_out_idx_(-1) {} explicit VarBase(bool stop_gradient) - : pre_op_(nullptr), - pre_op_out_name_(), - pre_op_out_idx_(-1), - var_desc_(nullptr), + : var_desc_(nullptr), var_(new framework::Variable()), grads_(stop_gradient ? nullptr : new VarBase(true)), - stop_gradient_(stop_gradient) {} + stop_gradient_(stop_gradient), + pre_op_(nullptr), + pre_op_out_idx_(-1) {} virtual ~VarBase() { if (var_) { @@ -127,8 +125,27 @@ class VarBase { } } + OpBase* PreOp() const { return pre_op_; } + int PreOpOutIdx() const { return pre_op_out_idx_; } + + void SetStopGradient(bool stop_gradient) { stop_gradient_ = stop_gradient; } + bool IsStopGradient() const { return stop_gradient_; } + void RunBackward(); + void TrackPreOp(OpBase* pre_op, const std::string& pre_op_out_name, + int pre_op_out_idx, bool stop_gradient) { + pre_op_ = pre_op; + pre_op_out_name_ = pre_op_out_name; + pre_op_out_idx_ = pre_op_out_idx; + stop_gradient_ = stop_gradient; + } + + void ClearGradient() { + delete grads_; + grads_ = new VarBase(true); + } + framework::LoDTensor& GradValue(); inline std::string GradName() const { @@ -138,16 +155,16 @@ class VarBase { return string::Sprintf("%s@IGrad", var_desc_->Name()); } - OpBase* pre_op_; - std::string pre_op_out_name_; - int pre_op_out_idx_; - framework::VarDesc* var_desc_; framework::Variable* var_; VarBase* grads_; + private: bool stop_gradient_; + OpBase* pre_op_; + std::string pre_op_out_name_; + int pre_op_out_idx_; }; /* The wrapper for OpDesc which holds a OpDesc and a OpDesc of its diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 2878f5be883f25f221cf96f1cc2b2b6c7d243dea..843fee41f38f1247473ba06978248659495f8585 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -63,9 +63,9 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, invars.push_back(inp->var_); vars[inp->var_desc_->Name()] = inp; - if (inp->pre_op_) { - op->pre_ops_[it.first].push_back(inp->pre_op_); - op->pre_ops_out_idx_[it.first].push_back(inp->pre_op_out_idx_); + if (inp->PreOp()) { + op->pre_ops_[it.first].push_back(inp->PreOp()); + op->pre_ops_out_idx_[it.first].push_back(inp->PreOpOutIdx()); } else { op->pre_ops_[it.first].push_back(nullptr); } @@ -89,10 +89,7 @@ void Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs, } else { LOG(ERROR) << "tracer doesn't support yet"; } - out->stop_gradient_ = stop_gradient; - out->pre_op_ = op; - out->pre_op_out_name_ = it.first; - out->pre_op_out_idx_ = i; + out->TrackPreOp(op, it.first, i, stop_gradient); VLOG(3) << "output vname " << out->var_desc_->Name() << " " << out->var_->IsInitialized(); @@ -167,9 +164,9 @@ std::vector Tracer::PyTrace(OpBase* op, op->input_vars_[PyLayer::kFwdInp] = inputs; op->output_vars_[PyLayer::kFwdOut] = PyLayer::Apply(op->forward_id_, inputs); for (VarBase* inp : inputs) { - if (inp->pre_op_) { - op->pre_ops_[PyLayer::kFwdInp].push_back(inp->pre_op_); - op->pre_ops_out_idx_[PyLayer::kFwdInp].push_back(inp->pre_op_out_idx_); + if (inp->PreOp()) { + op->pre_ops_[PyLayer::kFwdInp].push_back(inp->PreOp()); + op->pre_ops_out_idx_[PyLayer::kFwdInp].push_back(inp->PreOpOutIdx()); } else { op->pre_ops_[PyLayer::kFwdInp].push_back(nullptr); } @@ -178,10 +175,7 @@ std::vector Tracer::PyTrace(OpBase* op, auto& outputs = op->output_vars_[PyLayer::kFwdOut]; for (size_t i = 0; i < outputs.size(); ++i) { VarBase* out = outputs[i]; - out->stop_gradient_ = stop_gradient; - out->pre_op_ = op; - out->pre_op_out_name_ = PyLayer::kFwdOut; - out->pre_op_out_idx_ = i; + out->TrackPreOp(op, PyLayer::kFwdOut, i, stop_gradient); } if (!stop_gradient) { auto& grad_input_vars = diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index f3f4854a9efbcf5ab325e7f6aec81135c018dcd5..96fa428ee36bd8639d956623ec0554be7ae44501 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -133,6 +133,7 @@ PYBIND11_MODULE(core, m) { [](imperative::VarBase &self) { self.RunBackward(); }) .def("_grad_name", &imperative::VarBase::GradName) .def("_grad_value", &imperative::VarBase::GradValue) + .def("_clear_gradient", &imperative::VarBase::ClearGradient) .def("_grad_ivar", [](const imperative::VarBase &self) { return self.grads_; }, py::return_value_policy::reference) @@ -147,9 +148,9 @@ PYBIND11_MODULE(core, m) { py::return_value_policy::reference) .def_property( "stop_gradient", - [](const imperative::VarBase &self) { return self.stop_gradient_; }, + [](const imperative::VarBase &self) { return self.IsStopGradient(); }, [](imperative::VarBase &self, bool stop_gradient) { - self.stop_gradient_ = stop_gradient; + self.SetStopGradient(stop_gradient); }); py::class_(m, "OpBase", R"DOC()DOC") diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index e9a9265931f432ebf7b2351cd6c74ece8b0f863a..569ca2a4f72a8d66dacd5d57773297e20d31d5a9 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -389,6 +389,9 @@ class Variable(object): def _gradient(self): return np.array(self._ivar._grad_value()) + def _clear_gradient(self): + self._ivar._clear_gradient() + def __str__(self): return self.to_string(True) diff --git a/python/paddle/fluid/imperative/layers.py b/python/paddle/fluid/imperative/layers.py index f0fec03dba38870832275fe8c042fccc53c5d066..f457f56203eb2c1da62f4d8ad8915c322c822e0a 100644 --- a/python/paddle/fluid/imperative/layers.py +++ b/python/paddle/fluid/imperative/layers.py @@ -27,18 +27,25 @@ class Layer(core.Layer): """Layers composed of operators.""" def __init__(self, dtype=core.VarDesc.VarType.FP32, name=None): - self._once_built = False + self._built = False self._dtype = dtype + def parameters(self): + return [] + + def clear_gradients(self): + for p in self.parameters(): + p._clear_gradient() + def _build_once(self, inputs): pass def __call__(self, *inputs): - if not self._once_built: + if not self._built: self._build_once(*inputs) - self._once_built = True outputs = self.forward(*inputs) + self._built = True return outputs def forward(self, *inputs): diff --git a/python/paddle/fluid/imperative/nn.py b/python/paddle/fluid/imperative/nn.py index 8754e5d4d0c8c829303f1fe9cd39ead36619ac3b..03fbfe76d120e30edbe7f88ca716141d150d3a9c 100644 --- a/python/paddle/fluid/imperative/nn.py +++ b/python/paddle/fluid/imperative/nn.py @@ -48,6 +48,7 @@ class Conv2D(layers.Layer): assert param_attr is not False, "param_attr should not be False here." super(Conv2D, self).__init__(name=name, dtype=dtype) + # TODO(minqiyang): Move this to the top. from ..layer_helper import LayerHelper self._helper = LayerHelper( type(self).__name__, @@ -209,14 +210,25 @@ class FC(layers.Layer): def __init__(self, size, param_attr=None, + bias_attr=None, num_flatten_dims=1, - dtype=core.VarDesc.VarType.FP32): + dtype=core.VarDesc.VarType.FP32, + act=None, + name=None): super(FC, self).__init__() self._size = size self._num_flatten_dims = num_flatten_dims self._dtype = dtype from ..layer_helper import LayerHelper - self._helper = LayerHelper('FC', param_attr=param_attr) + self._helper = LayerHelper( + 'FC', + param_attr=param_attr, + bias_attr=bias_attr, + act=act, + name=name) + + def parameters(self): + return [self._w, self._b] def _build_once(self, input): input_shape = input.shape @@ -247,4 +259,22 @@ class FC(layers.Layer): inputs={"X": [tmp]}, outputs={"Out": out}, attrs={"use_mkldnn": False}) - return out + + bias_attr = self._helper.bias_attr + if bias_attr: + # add bias + size = list(out.shape[1:]) + if not self._built: + self._b = self._helper.create_parameter( + attr=bias_attr, shape=size, dtype=out.dtype, is_bias=True) + bias_out = self._helper.create_variable_for_type_inference( + dtype=out.dtype) + self._helper.append_op( + type='elementwise_add', + inputs={'X': [out], + 'Y': [self._b]}, + outputs={'Out': [bias_out]}, + attrs={'axis': 1}) + out = bias_out + # add activation + return self._helper.append_activation(out) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_base.py b/python/paddle/fluid/tests/unittests/test_imperative_base.py index 478cc13fb5bb775b3a40e674e70555fa50117836..1dd5348a8852d78fde73ab9ddf9d0015e903cb3f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_base.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_base.py @@ -21,10 +21,11 @@ from paddle.fluid import core @contextlib.contextmanager -def new_program_scope(): - prog = fluid.Program() - startup_prog = fluid.Program() - scope = fluid.core.Scope() +def new_program_scope(main=None, startup=None, scope=None): + prog = main if main else fluid.Program() + startup_prog = startup if startup else fluid.Program() + scope = scope if scope else fluid.core.Scope() with fluid.scope_guard(scope): with fluid.program_guard(prog, startup_prog): - yield + with fluid.unique_name.guard(): + yield diff --git a/python/paddle/fluid/tests/unittests/test_imperative_gan.py b/python/paddle/fluid/tests/unittests/test_imperative_gan.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe286f85ec551946a9431f70d7012b4e7d79662 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_gan.py @@ -0,0 +1,185 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import unittest +import numpy as np +import six +import sys + +import paddle +import paddle.fluid as fluid +from paddle.fluid.optimizer import SGDOptimizer +from paddle.fluid.imperative.nn import Conv2D, Pool2D, FC +from test_imperative_base import new_program_scope +from paddle.fluid.imperative.base import to_variable + + +class Discriminator(fluid.imperative.Layer): + def __init__(self): + super(Discriminator, self).__init__() + self._fc1 = FC(size=32, act='elu', name="d_fc1") + self._fc2 = FC(size=1, name="d_fc2") + + def parameters(self): + return self._fc1.parameters() + self._fc2.parameters() + + def forward(self, inputs): + x = self._fc1(inputs) + return self._fc2(x) + + +class Generator(fluid.imperative.Layer): + def __init__(self): + super(Generator, self).__init__() + self._fc1 = FC(size=64, act='elu', name="g_fc1") + self._fc2 = FC(size=64, act='elu', name="g_fc2") + self._fc3 = FC(size=1, name="g_fc3") + + def parameters(self): + return self._fc1.parameters() + self._fc2.parameters( + ) + self._fc3.parameters() + + def forward(self, inputs): + x = self._fc1(inputs) + x = self._fc2(x) + return self._fc3(x) + + +class TestImperativeMnist(unittest.TestCase): + def test_mnist_cpu_float32(self): + seed = 90 + + startup = fluid.Program() + startup.random_seed = seed + discriminate_p = fluid.Program() + generate_p = fluid.Program() + discriminate_p.random_seed = seed + generate_p.random_seed = seed + + scope = fluid.core.Scope() + with new_program_scope( + main=discriminate_p, startup=startup, scope=scope): + discriminator = Discriminator() + generator = Generator() + + img = fluid.layers.data( + name="img", shape=[2, 1], append_batch_size=False) + noise = fluid.layers.data( + name="noise", shape=[2, 2], append_batch_size=False) + + d_real = discriminator(img) + d_loss_real = fluid.layers.reduce_mean( + fluid.layers.sigmoid_cross_entropy_with_logits( + x=d_real, + label=fluid.layers.fill_constant( + shape=[2, 1], dtype='float32', value=1.0))) + + d_fake = discriminator(generator(noise)) + d_loss_fake = fluid.layers.reduce_mean( + fluid.layers.sigmoid_cross_entropy_with_logits( + x=d_fake, + label=fluid.layers.fill_constant( + shape=[2, 1], dtype='float32', value=0.0))) + + d_loss = d_loss_real + d_loss_fake + + sgd = SGDOptimizer(learning_rate=1e-3) + sgd.minimize(d_loss) + + with new_program_scope(main=generate_p, startup=startup, scope=scope): + discriminator = Discriminator() + generator = Generator() + + noise = fluid.layers.data( + name="noise", shape=[2, 2], append_batch_size=False) + + d_fake = discriminator(generator(noise)) + g_loss = fluid.layers.reduce_mean( + fluid.layers.sigmoid_cross_entropy_with_logits( + x=d_fake, + label=fluid.layers.fill_constant( + shape=[2, 1], dtype='float32', value=1.0))) + + sgd = SGDOptimizer(learning_rate=1e-3) + sgd.minimize(g_loss) + + exe = fluid.Executor(fluid.CPUPlace()) + static_params = dict() + with fluid.scope_guard(scope): + img = np.ones([2, 1], np.float32) + noise = np.ones([2, 2], np.float32) + exe.run(startup) + static_d_loss = exe.run(discriminate_p, + feed={'img': img, + 'noise': noise}, + fetch_list=[d_loss])[0] + static_g_loss = exe.run(generate_p, + feed={'noise': noise}, + fetch_list=[g_loss])[0] + + # generate_p contains all parameters needed. + for param in generate_p.global_block().all_parameters(): + static_params[param.name] = np.array( + scope.find_var(param.name).get_tensor()) + + dy_params = dict() + with fluid.imperative.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + discriminator = Discriminator() + generator = Generator() + sgd = SGDOptimizer(learning_rate=1e-3) + + d_real = discriminator(to_variable(np.ones([2, 1], np.float32))) + d_loss_real = fluid.layers.reduce_mean( + fluid.layers.sigmoid_cross_entropy_with_logits( + x=d_real, label=to_variable(np.ones([2, 1], np.float32)))) + + d_fake = discriminator( + generator(to_variable(np.ones([2, 2], np.float32)))) + d_loss_fake = fluid.layers.reduce_mean( + fluid.layers.sigmoid_cross_entropy_with_logits( + x=d_fake, label=to_variable(np.zeros([2, 1], np.float32)))) + + d_loss = d_loss_real + d_loss_fake + d_loss._backward() + sgd.minimize(d_loss) + discriminator.clear_gradients() + generator.clear_gradients() + + d_fake = discriminator( + generator(to_variable(np.ones([2, 2], np.float32)))) + g_loss = fluid.layers.reduce_mean( + fluid.layers.sigmoid_cross_entropy_with_logits( + x=d_fake, label=to_variable(np.ones([2, 1], np.float32)))) + g_loss._backward() + sgd.minimize(g_loss) + for p in discriminator.parameters(): + dy_params[p.name] = p._numpy() + for p in generator.parameters(): + dy_params[p.name] = p._numpy() + + dy_g_loss = g_loss._numpy() + dy_d_loss = d_loss._numpy() + + self.assertEqual(dy_g_loss, static_g_loss) + self.assertEqual(dy_d_loss, static_d_loss) + for k, v in six.iteritems(dy_params): + self.assertTrue(np.allclose(v, static_params[k])) + + +if __name__ == '__main__': + unittest.main()