diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index 84ee1fbe5df96abc0c47b66a34a6e84e1f9be2b6..75659298ea764d21f4d289cb980f5401171cf1da 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -49,11 +49,17 @@ void BasicEngine::Init( "the size of tensors is %s, but the size of grad_tensors is %s.", tensors.size(), grad_tensors.size())); + PADDLE_ENFORCE_EQ(accumulators_.empty(), true, + platform::errors::AlreadyExists( + "Accumulators are not empty before preparing it for " + "backward network execution.")); + for (size_t i = 0; i < tensors.size(); ++i) { auto var = tensors[i]; auto grad_tensor = grad_tensors[i]; auto init_node = var->GradVarBase()->GradNode(); + PADDLE_ENFORCE_EQ( var->GradVarBase()->GraphIsFreed(), false, platform::errors::Unavailable( @@ -101,6 +107,16 @@ void BasicEngine::Init( *dev_ctx, grad_var); } + VariableWrapper* init_grad_var = var->GradVarBase()->SharedVar().get(); + auto& accumulator = accumulators_[init_grad_var]; + if (!accumulator) { + if (FLAGS_sort_sum_gradient) { + accumulator.reset(new SortedGradientAccumulator(init_grad_var)); + } else { + accumulator.reset(new EagerGradientAccumulator(init_grad_var)); + } + } + init_nodes_.push_back(init_node); } } @@ -237,10 +253,6 @@ void BasicEngine::PrepareDeps() { node_deps_.empty(), true, platform::errors::AlreadyExists("Op deps are not empty before preparing " "it for backward network execution.")); - PADDLE_ENFORCE_EQ(accumulators_.empty(), true, - platform::errors::AlreadyExists( - "Accumulators are not empty before preparing it for " - "backward network execution.")); PADDLE_ENFORCE_EQ(accumulators_with_grad_node_.empty(), true, platform::errors::AlreadyExists( "Accumulators with grad_node as the key are not empty " @@ -311,7 +323,9 @@ void BasicEngine::Execute() { // Start execute Computation graph std::queue> q; for (size_t i = 0; i < init_nodes_.size(); ++i) { - q.push(std::move(init_nodes_[i])); + if (node_deps_[init_nodes_[i].get()] == 0) { + q.push(std::move(init_nodes_[i])); + } } size_t op_num = 0; diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py index 623b7e68b3f7f722361ee83f4477632bd4502d72..bc280a01890d4a54f76026ccee31666c5f0ff2a8 100644 --- a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py +++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py @@ -115,6 +115,31 @@ class TestBackwardAPI(unittest.TestCase): self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy())) + def test_backward_accumulator_with_init_grad(self): + for dtype in self._dtypes: + x = np.random.random([10, ]).astype(dtype) + y_grad = np.random.random([10, ]).astype(dtype) + z_grad = np.random.random([10, ]).astype(dtype) + self._places = [paddle.CPUPlace()] + for place in self._places: + with dg.guard(place): + x_tensor = paddle.to_tensor(x, stop_gradient=False) + y_tensor = x_tensor**2 + z_tensor = y_tensor**3 + + y_grad_tensor = paddle.to_tensor(y_grad) + z_grad_tensor = paddle.to_tensor(z_grad) + paddle.autograd.backward([y_tensor, z_tensor], + [y_grad_tensor, z_grad_tensor]) + + y = x**2 + z = x**3 + x_grad = 2 * x_tensor * ( + y_grad_tensor + 3 * y_tensor * y_tensor * z_grad_tensor) + + self.assertTrue( + np.allclose(x_grad.numpy(), x_tensor.grad.numpy())) + if __name__ == '__main__': unittest.main()