diff --git a/paddle/fluid/framework/details/execution_strategy.h b/paddle/fluid/framework/details/execution_strategy.h index 68de1580e20a0221b9c9855c50849369eaaff871..b44e6b6a75a6f0375fe0c3e1eb47c5e4e6456d68 100644 --- a/paddle/fluid/framework/details/execution_strategy.h +++ b/paddle/fluid/framework/details/execution_strategy.h @@ -31,7 +31,7 @@ struct ExecutionStrategy { // iterations the framework cleans up a local execution scope. // In some models, the value of this parameter has a great // influence on the performance(about 15%) of the program. - size_t num_iteration_per_drop_scope_{1}; + size_t num_iteration_per_drop_scope_{100}; // At present, the kExperimental executor is the fastest in most models. ExecutorType type_{kExperimental}; // This debug option. diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index b3219208825cd1aea4c869064ff8f5fa8d3300fd..88ccbb51b4ee7140621714a177a6689d96e97bef 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -62,7 +62,7 @@ class WhileOp : public framework::OperatorBase { auto step_scopes = scope.FindVar(Output(kStepScopes))->GetMutable(); - + PADDLE_ENFORCE_EQ(step_scopes->size(), 0, "The StepScope should be empty."); PADDLE_ENFORCE(platform::is_cpu_place(cond.place()), "Condition of while op must in CPU memory."); @@ -197,17 +197,22 @@ class WhileGradOp : public framework::OperatorBase { inside_tensor.set_lod(outside_tensor.lod()); inside_tensor.ShareDataWith(outside_tensor); } else if (og_outside.IsType()) { - auto &outside_array = og_outside.Get(); + auto outside_array = + og_outside.GetMutable(); auto &inside_array = detail::Ref(og_inside.GetMutable()); - VLOG(8) << outside_og_name << " size = " << outside_array.size(); - inside_array.resize(outside_array.size()); + inside_array.clear(); + inside_array.resize(outside_array->size()); + VLOG(8) << outside_og_name << " size = " << outside_array->size(); for (size_t j = 0; j < inside_array.size(); ++j) { - VLOG(8) << j << " " << outside_array[j].numel(); - if (outside_array[j].numel() != 0) { - inside_array[j].set_lod(outside_array[j].lod()); - inside_array[j].ShareDataWith(outside_array[j]); + if (!outside_array->at(j).IsInitialized()) { + outside_array->at(j).Resize({0}); + } + VLOG(8) << j << " " << outside_array->at(j).numel(); + if (outside_array->at(j).numel() != 0) { + inside_array[j].set_lod(outside_array->at(j).lod()); + inside_array[j].ShareDataWith(outside_array->at(j)); } else { PADDLE_ENFORCE_EQ(inside_array[j].numel(), 0); } @@ -300,6 +305,7 @@ class WhileGradOp : public framework::OperatorBase { dev_ctx.Wait(); const_cast(scope).DeleteScope(&cur_scope); } + step_scopes->clear(); } }; diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index e6c8772642573f1a4f331e8f33a77b34de7646fe..3c64ebe995081f8c8f971173d3948371f2837fa9 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -141,7 +141,7 @@ class SumOp : public framework::OperatorWithKernel { for (auto& x_var : x_vars) { auto& array = x_var->Get(); for (auto& each : array) { - if (each.numel() != 0) { + if (each.numel() != 0 && each.IsInitialized()) { return framework::OpKernelType(each.type(), ctx.device_context(), layout, library); } diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 7a3fecace45e053bda736133e8d8a95060074fb8..1f9f9486145d1493b0beb49547b81e1b4b6d5107 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -97,11 +97,11 @@ void LodTensorArrayCompute(const framework::ExecutionContext &context) { auto &in_array = in_vars[i]->Get(); for (size_t i = 0; i < in_array.size(); ++i) { - if (in_array[i].numel() != 0) { + if (in_array[i].IsInitialized() && (in_array[i].numel() != 0)) { if (i >= out_array.size()) { out_array.resize(i + 1); } - if (out_array[i].numel() == 0) { + if (!out_array[i].IsInitialized() || (out_array[i].numel() == 0)) { framework::TensorCopy(in_array[i], in_array[i].place(), context.device_context(), &out_array[i]); out_array[i].set_lod(in_array[i].lod());