diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc
index 84ee1fbe5df96abc0c47b66a34a6e84e1f9be2b6..75659298ea764d21f4d289cb980f5401171cf1da 100644
--- a/paddle/fluid/imperative/basic_engine.cc
+++ b/paddle/fluid/imperative/basic_engine.cc
@@ -49,11 +49,17 @@ void BasicEngine::Init(
           "the size of tensors is %s, but the size of grad_tensors is %s.",
           tensors.size(), grad_tensors.size()));
 
+  PADDLE_ENFORCE_EQ(accumulators_.empty(), true,
+                    platform::errors::AlreadyExists(
+                        "Accumulators are not empty before preparing it for "
+                        "backward network execution."));
+
   for (size_t i = 0; i < tensors.size(); ++i) {
     auto var = tensors[i];
     auto grad_tensor = grad_tensors[i];
 
     auto init_node = var->GradVarBase()->GradNode();
+
     PADDLE_ENFORCE_EQ(
         var->GradVarBase()->GraphIsFreed(), false,
         platform::errors::Unavailable(
@@ -101,6 +107,16 @@ void BasicEngine::Init(
           *dev_ctx, grad_var);
     }
 
+    VariableWrapper* init_grad_var = var->GradVarBase()->SharedVar().get();
+    auto& accumulator = accumulators_[init_grad_var];
+    if (!accumulator) {
+      if (FLAGS_sort_sum_gradient) {
+        accumulator.reset(new SortedGradientAccumulator(init_grad_var));
+      } else {
+        accumulator.reset(new EagerGradientAccumulator(init_grad_var));
+      }
+    }
+
     init_nodes_.push_back(init_node);
   }
 }
@@ -237,10 +253,6 @@ void BasicEngine::PrepareDeps() {
       node_deps_.empty(), true,
       platform::errors::AlreadyExists("Op deps are not empty before preparing "
                                       "it for backward network execution."));
-  PADDLE_ENFORCE_EQ(accumulators_.empty(), true,
-                    platform::errors::AlreadyExists(
-                        "Accumulators are not empty before preparing it for "
-                        "backward network execution."));
   PADDLE_ENFORCE_EQ(accumulators_with_grad_node_.empty(), true,
                     platform::errors::AlreadyExists(
                         "Accumulators with grad_node as the key are not empty "
@@ -311,7 +323,9 @@ void BasicEngine::Execute() {
   // Start execute Computation graph
   std::queue<std::shared_ptr<GradOpNode>> q;
   for (size_t i = 0; i < init_nodes_.size(); ++i) {
-    q.push(std::move(init_nodes_[i]));
+    if (node_deps_[init_nodes_[i].get()] == 0) {
+      q.push(std::move(init_nodes_[i]));
+    }
   }
 
   size_t op_num = 0;
diff --git a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py
index 623b7e68b3f7f722361ee83f4477632bd4502d72..bc280a01890d4a54f76026ccee31666c5f0ff2a8 100644
--- a/python/paddle/fluid/tests/unittests/test_custom_grad_input.py
+++ b/python/paddle/fluid/tests/unittests/test_custom_grad_input.py
@@ -115,6 +115,31 @@ class TestBackwardAPI(unittest.TestCase):
 
                     self.assertTrue(np.allclose(x_grad, x_tensor.grad.numpy()))
 
+    def test_backward_accumulator_with_init_grad(self):
+        for dtype in self._dtypes:
+            x = np.random.random([10, ]).astype(dtype)
+            y_grad = np.random.random([10, ]).astype(dtype)
+            z_grad = np.random.random([10, ]).astype(dtype)
+            self._places = [paddle.CPUPlace()]
+            for place in self._places:
+                with dg.guard(place):
+                    x_tensor = paddle.to_tensor(x, stop_gradient=False)
+                    y_tensor = x_tensor**2
+                    z_tensor = y_tensor**3
+
+                    y_grad_tensor = paddle.to_tensor(y_grad)
+                    z_grad_tensor = paddle.to_tensor(z_grad)
+                    paddle.autograd.backward([y_tensor, z_tensor],
+                                             [y_grad_tensor, z_grad_tensor])
+
+                    y = x**2
+                    z = x**3
+                    x_grad = 2 * x_tensor * (
+                        y_grad_tensor + 3 * y_tensor * y_tensor * z_grad_tensor)
+
+                    self.assertTrue(
+                        np.allclose(x_grad.numpy(), x_tensor.grad.numpy()))
+
 
 if __name__ == '__main__':
     unittest.main()