diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 3aa29cf2d8cd824ee594b4aac172e410acd01f2d..7c0b9d0782359c9ac53a76f1cec2c7b9bd194e7d 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -28,7 +28,6 @@ #endif DECLARE_bool(use_mkldnn); - namespace paddle { namespace imperative { @@ -186,7 +185,7 @@ size_t VarBase::GradOpNum() const { return grad_node_ ? grad_node_->size() : 0; } -void VarBase::ClearGradient() { +void VarBase::ClearGradient(bool set_to_zero) { VLOG(4) << "ClearGradient " << Name(); if (grad_var_) { if (grad_var_->Var().IsType()) { @@ -204,9 +203,13 @@ void VarBase::ClearGradient() { auto* grad_t = grad_var_->MutableVar()->GetMutable(); if (grad_t->IsInitialized()) { - auto* dev_ctx = - platform::DeviceContextPool::Instance().Get(grad_t->place()); - operators::math::set_constant(*dev_ctx, grad_t, 0.0); + if (set_to_zero) { + auto* dev_ctx = + platform::DeviceContextPool::Instance().Get(grad_t->place()); + operators::math::set_constant(*dev_ctx, grad_t, 0.0); + } else { + grad_t->clear(); + } #ifdef PADDLE_WITH_MKLDNN if (FLAGS_use_mkldnn) ClearMKLDNNCache(grad_t->place()); #endif @@ -219,6 +222,28 @@ void VarBase::ClearGradient() { } } +void VarBase::_GradientSetEmpty(bool is_empty) { + VLOG(4) << "Set gradient " << Name() << " is_empty:" << is_empty; + if (grad_var_) { + auto share_var = grad_var_->SharedVar(); + if (share_var) { + share_var->SetIsEmpty(is_empty); + } + } +} + +bool VarBase::_IsGradientSetEmpty() { + bool res = true; + if (grad_var_) { + auto share_var = grad_var_->SharedVar(); + if (share_var) { + res = share_var->is_empty_; + VLOG(4) << "Check gradient " << Name() << " is empty:" << res; + } + } + return res; +} + std::shared_ptr VarBase::NewVarBase(const platform::Place& dst_place, const bool blocking) const { PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/imperative/layer.h b/paddle/fluid/imperative/layer.h index 6c5ee60bde5243cfe7695139516ce0081bf9ef19..ec5fb63f0d9339e8a511a59df05f515f01822597 100644 --- a/paddle/fluid/imperative/layer.h +++ b/paddle/fluid/imperative/layer.h @@ -222,7 +222,10 @@ class VarBase { const platform::Place Place() const { return var_->Place(); } - void ClearGradient(); + void ClearGradient(bool set_to_zero = true); + + void _GradientSetEmpty(bool is_empty = true); + bool _IsGradientSetEmpty(); std::shared_ptr NewVarBase(const platform::Place& dst_place, const bool blocking) const; diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index ced5d0390a02e3260db51b5d57adb1169adbe3d1..eb62e4903f333db9b1e6d1bad21f22a299dbc518 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -1480,7 +1480,8 @@ void BindImperative(py::module *m_ptr) { # one of the variables needed for gradient computation has been modified by an inplace operation. )DOC") - .def("clear_gradient", &imperative::VarBase::ClearGradient, R"DOC( + .def("clear_gradient", &imperative::VarBase::ClearGradient, + py::arg("set_to_zero") = true, R"DOC( Only for Tensor that has gradient, normally we use this for Parameters since other temporary Tensor doesen't has gradient. @@ -1500,6 +1501,9 @@ void BindImperative(py::module *m_ptr) { linear.weight.clear_gradient() print("After clear_gradient, linear.weight.grad: {}".format(linear.weight.grad)) )DOC") + .def("_gradient_set_empty", &imperative::VarBase::_GradientSetEmpty, + py::arg("set_is_empty") = true) + .def("_is_gradient_set_empty", &imperative::VarBase::_IsGradientSetEmpty) .def("clone", [](std::shared_ptr &self) { const auto &tensor = self->Var().Get(); diff --git a/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py new file mode 100644 index 0000000000000000000000000000000000000000..a82b333ce1386ff41848ddb319188de2bf9459a3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_imperative_tensor_clear_gradient.py @@ -0,0 +1,82 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle.fluid as fluid +import paddle +from paddle.fluid.wrapped_decorator import wrap_decorator +import unittest +from unittest import TestCase +import numpy as np + + +def _dygraph_guard_(func): + def __impl__(*args, **kwargs): + if fluid.in_dygraph_mode(): + return func(*args, **kwargs) + else: + with fluid.dygraph.guard(): + return func(*args, **kwargs) + + return __impl__ + + +dygraph_guard = wrap_decorator(_dygraph_guard_) + + +class TestDygraphClearGradient(TestCase): + def setUp(self): + self.input_shape = [10, 2] + + @dygraph_guard + def test_tensor_method_clear_gradient_case1(self): + input = paddle.uniform(self.input_shape) + linear = paddle.nn.Linear(2, 3) + out = linear(input) + out.backward() + linear.weight.clear_gradient() + + # actual result + gradient_actual = linear.weight.grad + # expected result + gradient_expected = np.zeros([2, 3]).astype('float64') + self.assertTrue(np.allclose(gradient_actual.numpy(), gradient_expected)) + + @dygraph_guard + def test_tensor_method_clear_gradient_case2(self): + input = paddle.uniform(self.input_shape) + linear = paddle.nn.Linear(2, 3) + out = linear(input) + out.backward() + # default arg set_to_zero is true + # so, False means real clear gradient + linear.weight.clear_gradient(False) + + # before ._gradient_set_empty(False), + # the return of ._is_gradient_set_empty() should be True + self.assertTrue(linear.weight._is_gradient_set_empty()) + + # reset, because ClearGradient will call SetIsEmpty(True), but this is not our expectation. + linear.weight._gradient_set_empty(False) + # after ._gradient_set_empty(False), + # the return of ._is_gradient_set_empty() should be False + self.assertFalse(linear.weight._is_gradient_set_empty()) + + # actual result + gradient_actual = linear.weight.grad + # expected result + self.assertTrue(np.empty(gradient_actual)) + + +if __name__ == '__main__': + unittest.main()