diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index 2ed44ce489934f7696b09fdbffb156bada3ec17c..544e7c8fe85d63e39a43f3fe3184e4f9d11426e4 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -28,33 +28,40 @@ namespace egr { static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, - const paddle::experimental::Tensor& t) { - if (!tensor->defined() || !tensor->initialized()) { - // Simply copy tensor->impl + const paddle::experimental::Tensor& t, + bool is_fake_empty) { + if (is_fake_empty) { *tensor = t; } else { - // Accumulation - if (LIKELY(t.is_dense_tensor())) { - if (LIKELY(tensor->is_dense_tensor())) { - paddle::imperative::TensorAdd(t, tensor); + if (!tensor->defined() || !tensor->initialized()) { + // Simply copy tensor->impl + *tensor = t; + } else { + // Accumulation + if (LIKELY(t.is_dense_tensor())) { + if (LIKELY(tensor->is_dense_tensor())) { + paddle::imperative::TensorAdd(t, + tensor); + } else { + // TODO(jiabin): Support Other TensorBase later + // TODO(zhanlve): Replace SelectedRowsAddTensor with + // add_dygraph_function once it's supported + paddle::experimental::Tensor new_buffer( + std::make_shared(), "tmp_accumulator"); + paddle::imperative::SelectedRowsAddTensor(*tensor, t, &new_buffer); + tensor->set_impl(new_buffer.impl()); + } } else { // TODO(jiabin): Support Other TensorBase later // TODO(zhanlve): Replace SelectedRowsAddTensor with - // add_dygraph_function once it's supported - paddle::experimental::Tensor new_buffer( - std::make_shared(), "tmp_accumulator"); - paddle::imperative::SelectedRowsAddTensor(*tensor, t, &new_buffer); - tensor->set_impl(new_buffer.impl()); - } - } else { - // TODO(jiabin): Support Other TensorBase later - // TODO(zhanlve): Replace SelectedRowsAddTensor with add_dygraph_function - // once it's supported - if (tensor->is_dense_tensor()) { - paddle::imperative::SelectedRowsAddToTensor(t, tensor); - } else { - *tensor = std::move(*paddle::imperative::SelectedRowsMerge< - paddle::experimental::Tensor>(t, *tensor)); + // add_dygraph_function + // once it's supported + if (tensor->is_dense_tensor()) { + paddle::imperative::SelectedRowsAddToTensor(t, tensor); + } else { + *tensor = std::move(*paddle::imperative::SelectedRowsMerge< + paddle::experimental::Tensor>(t, *tensor)); + } } } } @@ -91,7 +98,8 @@ GradNodeAccumulation::operator()( if (!weak_grad_.expired() && !is_new_grad) { auto grad = weak_grad_.lock(); - CopyOrAddTensor(grad.get(), grad_out); + CopyOrAddTensor(grad.get(), grad_out, is_fake_empty_); + is_fake_empty_ = false; } // Apply Reduce Hooks diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h index f37de9c8e88f10e65090353160169a0bc18d137d..6374534578cb862279663ada1768fb53f8c58682 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.h +++ b/paddle/fluid/eager/accumulation/accumulation_node.h @@ -64,14 +64,16 @@ class GradNodeAccumulation : public GradNodeBase { new GradNodeAccumulation(nullptr)); } + void SetFakeEmpty(bool is_fake_empty) { is_fake_empty_ = is_fake_empty; } + private: + // TODO(Jiabin): remove this when we make our clear gradient really cleared; + bool is_fake_empty_ = {false}; std::weak_ptr weak_grad_; - + std::vector> reduce_hooks_; std::function retain_grad_hook_; - - std::vector> reduce_hooks_; }; } // namespace egr diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 1a0838d7f47c627a35f090b8248c12783ff20ac7..b54f4e1416c35a9cac5c5f856ade511a02c9b1f5 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -494,7 +494,8 @@ static PyObject* tensor_clear_gradient(TensorObject* self, PyObject* args, } paddle::experimental::Tensor* grad; - if (egr::egr_utils_api::IsLeafTensor(self->tensor)) { + bool is_leaf = egr::egr_utils_api::IsLeafTensor(self->tensor); + if (is_leaf) { grad = egr::EagerUtils::mutable_grad(self->tensor); PADDLE_ENFORCE(grad != nullptr, paddle::platform::errors::Fatal( @@ -518,6 +519,11 @@ static PyObject* tensor_clear_gradient(TensorObject* self, PyObject* args, if (grad->initialized()) { if (set_to_zero) { grad->set_impl(paddle::experimental::zeros_like(*grad).impl()); + if (is_leaf) { + std::static_pointer_cast( + egr::EagerUtils::grad_node(self->tensor)) + ->SetFakeEmpty(true); + } } else { VLOG(4) << "Gradient of " << self->tensor.name() << " is initialized, will be released.";