Increase num_iteration_per_drop_scope (#19075)

* increase num_iteration_per_drop_scope test=develop * Fix bug of while_op test=develop * fix bug of whileOp test=develop

Increase num_iteration_per_drop_scope (#19075)
* increase num_iteration_per_drop_scope test=develop * Fix bug of while_op test=develop * fix bug of whileOp test=develop
b6d1d890 · chengduo · GitHub · 1d0f0431 · b6d1d890 · b6d1d890
4 changed file
--- a/paddle/fluid/framework/details/execution_strategy.h
+++ b/paddle/fluid/framework/details/execution_strategy.h
@@ -31,7 +31,7 @@ struct ExecutionStrategy {
  // iterations the framework cleans up a local execution scope.
  // In some models, the value of this parameter has a great
  // influence on the performance(about 15%) of the program.
-  size_t num_iteration_per_drop_scope_{1};
+  size_t num_iteration_per_drop_scope_{100};
  // At present, the kExperimental executor is the fastest in most models.
  ExecutorType type_{kExperimental};
  // This debug option.

--- a/paddle/fluid/operators/controlflow/while_op.cc
+++ b/paddle/fluid/operators/controlflow/while_op.cc
@@ -62,7 +62,7 @@ class WhileOp : public framework::OperatorBase {

    auto step_scopes =
        scope.FindVar(Output(kStepScopes))->GetMutable<StepScopeVar>();
-
+    PADDLE_ENFORCE_EQ(step_scopes->size(), 0, "The StepScope should be empty.");
    PADDLE_ENFORCE(platform::is_cpu_place(cond.place()),
                   "Condition of while op must in CPU memory.");

@@ -197,17 +197,22 @@ class WhileGradOp : public framework::OperatorBase {
          inside_tensor.set_lod(outside_tensor.lod());
          inside_tensor.ShareDataWith(outside_tensor);
        } else if (og_outside.IsType<framework::LoDTensorArray>()) {
-          auto &outside_array = og_outside.Get<framework::LoDTensorArray>();
+          auto outside_array =
+              og_outside.GetMutable<framework::LoDTensorArray>();
          auto &inside_array =
              detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>());
-          VLOG(8) << outside_og_name << " size = " << outside_array.size();
-          inside_array.resize(outside_array.size());
+          inside_array.clear();
+          inside_array.resize(outside_array->size());
+          VLOG(8) << outside_og_name << " size = " << outside_array->size();

          for (size_t j = 0; j < inside_array.size(); ++j) {
-            VLOG(8) << j << " " << outside_array[j].numel();
-            if (outside_array[j].numel() != 0) {
-              inside_array[j].set_lod(outside_array[j].lod());
-              inside_array[j].ShareDataWith(outside_array[j]);
+            if (!outside_array->at(j).IsInitialized()) {
+              outside_array->at(j).Resize({0});
+            }
+            VLOG(8) << j << " " << outside_array->at(j).numel();
+            if (outside_array->at(j).numel() != 0) {
+              inside_array[j].set_lod(outside_array->at(j).lod());
+              inside_array[j].ShareDataWith(outside_array->at(j));
            } else {
              PADDLE_ENFORCE_EQ(inside_array[j].numel(), 0);
            }
@@ -300,6 +305,7 @@ class WhileGradOp : public framework::OperatorBase {
      dev_ctx.Wait();
      const_cast<framework::Scope &>(scope).DeleteScope(&cur_scope);
    }
+    step_scopes->clear();
  }
 };


--- a/paddle/fluid/operators/sum_op.cc
+++ b/paddle/fluid/operators/sum_op.cc
@@ -141,7 +141,7 @@ class SumOp : public framework::OperatorWithKernel {
      for (auto& x_var : x_vars) {
        auto& array = x_var->Get<framework::LoDTensorArray>();
        for (auto& each : array) {
-          if (each.numel() != 0) {
+          if (each.numel() != 0 && each.IsInitialized()) {
            return framework::OpKernelType(each.type(), ctx.device_context(),
                                           layout, library);
          }

--- a/paddle/fluid/operators/sum_op.h
+++ b/paddle/fluid/operators/sum_op.h
@@ -97,11 +97,11 @@ void LodTensorArrayCompute(const framework::ExecutionContext &context) {
    auto &in_array = in_vars[i]->Get<framework::LoDTensorArray>();

    for (size_t i = 0; i < in_array.size(); ++i) {
-      if (in_array[i].numel() != 0) {
+      if (in_array[i].IsInitialized() && (in_array[i].numel() != 0)) {
        if (i >= out_array.size()) {
          out_array.resize(i + 1);
        }
-        if (out_array[i].numel() == 0) {
+        if (!out_array[i].IsInitialized() || (out_array[i].numel() == 0)) {
          framework::TensorCopy(in_array[i], in_array[i].place(),
                                context.device_context(), &out_array[i]);
          out_array[i].set_lod(in_array[i].lod());