diff --git a/paddle/fluid/framework/details/execution_strategy.h b/paddle/fluid/framework/details/execution_strategy.h
index 68de1580e20a0221b9c9855c50849369eaaff871..b44e6b6a75a6f0375fe0c3e1eb47c5e4e6456d68 100644
--- a/paddle/fluid/framework/details/execution_strategy.h
+++ b/paddle/fluid/framework/details/execution_strategy.h
@@ -31,7 +31,7 @@ struct ExecutionStrategy {
   // iterations the framework cleans up a local execution scope.
   // In some models, the value of this parameter has a great
   // influence on the performance(about 15%) of the program.
-  size_t num_iteration_per_drop_scope_{1};
+  size_t num_iteration_per_drop_scope_{100};
   // At present, the kExperimental executor is the fastest in most models.
   ExecutorType type_{kExperimental};
   // This debug option.
diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc
index b3219208825cd1aea4c869064ff8f5fa8d3300fd..88ccbb51b4ee7140621714a177a6689d96e97bef 100644
--- a/paddle/fluid/operators/controlflow/while_op.cc
+++ b/paddle/fluid/operators/controlflow/while_op.cc
@@ -62,7 +62,7 @@ class WhileOp : public framework::OperatorBase {
 
     auto step_scopes =
         scope.FindVar(Output(kStepScopes))->GetMutable<StepScopeVar>();
-
+    PADDLE_ENFORCE_EQ(step_scopes->size(), 0, "The StepScope should be empty.");
     PADDLE_ENFORCE(platform::is_cpu_place(cond.place()),
                    "Condition of while op must in CPU memory.");
 
@@ -197,17 +197,22 @@ class WhileGradOp : public framework::OperatorBase {
           inside_tensor.set_lod(outside_tensor.lod());
           inside_tensor.ShareDataWith(outside_tensor);
         } else if (og_outside.IsType<framework::LoDTensorArray>()) {
-          auto &outside_array = og_outside.Get<framework::LoDTensorArray>();
+          auto outside_array =
+              og_outside.GetMutable<framework::LoDTensorArray>();
           auto &inside_array =
               detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>());
-          VLOG(8) << outside_og_name << " size = " << outside_array.size();
-          inside_array.resize(outside_array.size());
+          inside_array.clear();
+          inside_array.resize(outside_array->size());
+          VLOG(8) << outside_og_name << " size = " << outside_array->size();
 
           for (size_t j = 0; j < inside_array.size(); ++j) {
-            VLOG(8) << j << " " << outside_array[j].numel();
-            if (outside_array[j].numel() != 0) {
-              inside_array[j].set_lod(outside_array[j].lod());
-              inside_array[j].ShareDataWith(outside_array[j]);
+            if (!outside_array->at(j).IsInitialized()) {
+              outside_array->at(j).Resize({0});
+            }
+            VLOG(8) << j << " " << outside_array->at(j).numel();
+            if (outside_array->at(j).numel() != 0) {
+              inside_array[j].set_lod(outside_array->at(j).lod());
+              inside_array[j].ShareDataWith(outside_array->at(j));
             } else {
               PADDLE_ENFORCE_EQ(inside_array[j].numel(), 0);
             }
@@ -300,6 +305,7 @@ class WhileGradOp : public framework::OperatorBase {
       dev_ctx.Wait();
       const_cast<framework::Scope &>(scope).DeleteScope(&cur_scope);
     }
+    step_scopes->clear();
   }
 };
 
diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc
index e6c8772642573f1a4f331e8f33a77b34de7646fe..3c64ebe995081f8c8f971173d3948371f2837fa9 100644
--- a/paddle/fluid/operators/sum_op.cc
+++ b/paddle/fluid/operators/sum_op.cc
@@ -141,7 +141,7 @@ class SumOp : public framework::OperatorWithKernel {
       for (auto& x_var : x_vars) {
         auto& array = x_var->Get<framework::LoDTensorArray>();
         for (auto& each : array) {
-          if (each.numel() != 0) {
+          if (each.numel() != 0 && each.IsInitialized()) {
             return framework::OpKernelType(each.type(), ctx.device_context(),
                                            layout, library);
           }
diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h
index 7a3fecace45e053bda736133e8d8a95060074fb8..1f9f9486145d1493b0beb49547b81e1b4b6d5107 100644
--- a/paddle/fluid/operators/sum_op.h
+++ b/paddle/fluid/operators/sum_op.h
@@ -97,11 +97,11 @@ void LodTensorArrayCompute(const framework::ExecutionContext &context) {
     auto &in_array = in_vars[i]->Get<framework::LoDTensorArray>();
 
     for (size_t i = 0; i < in_array.size(); ++i) {
-      if (in_array[i].numel() != 0) {
+      if (in_array[i].IsInitialized() && (in_array[i].numel() != 0)) {
         if (i >= out_array.size()) {
           out_array.resize(i + 1);
         }
-        if (out_array[i].numel() == 0) {
+        if (!out_array[i].IsInitialized() || (out_array[i].numel() == 0)) {
           framework::TensorCopy(in_array[i], in_array[i].place(),
                                 context.device_context(), &out_array[i]);
           out_array[i].set_lod(in_array[i].lod());