From b6d1d8901fe267cc7661935b073d4a496b5db6dd Mon Sep 17 00:00:00 2001 From: chengduo <30176695+chengduoZH@users.noreply.github.com> Date: Thu, 29 Aug 2019 06:18:54 +0800 Subject: [PATCH] Increase num_iteration_per_drop_scope (#19075) * increase num_iteration_per_drop_scope test=develop * Fix bug of while_op test=develop * fix bug of whileOp test=develop --- .../framework/details/execution_strategy.h | 2 +- .../fluid/operators/controlflow/while_op.cc | 22 ++++++++++++------- paddle/fluid/operators/sum_op.cc | 2 +- paddle/fluid/operators/sum_op.h | 4 ++-- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/paddle/fluid/framework/details/execution_strategy.h b/paddle/fluid/framework/details/execution_strategy.h index 68de1580e20..b44e6b6a75a 100644 --- a/paddle/fluid/framework/details/execution_strategy.h +++ b/paddle/fluid/framework/details/execution_strategy.h @@ -31,7 +31,7 @@ struct ExecutionStrategy { // iterations the framework cleans up a local execution scope. // In some models, the value of this parameter has a great // influence on the performance(about 15%) of the program. - size_t num_iteration_per_drop_scope_{1}; + size_t num_iteration_per_drop_scope_{100}; // At present, the kExperimental executor is the fastest in most models. ExecutorType type_{kExperimental}; // This debug option. diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index b3219208825..88ccbb51b4e 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -62,7 +62,7 @@ class WhileOp : public framework::OperatorBase { auto step_scopes = scope.FindVar(Output(kStepScopes))->GetMutable(); - + PADDLE_ENFORCE_EQ(step_scopes->size(), 0, "The StepScope should be empty."); PADDLE_ENFORCE(platform::is_cpu_place(cond.place()), "Condition of while op must in CPU memory."); @@ -197,17 +197,22 @@ class WhileGradOp : public framework::OperatorBase { inside_tensor.set_lod(outside_tensor.lod()); inside_tensor.ShareDataWith(outside_tensor); } else if (og_outside.IsType()) { - auto &outside_array = og_outside.Get(); + auto outside_array = + og_outside.GetMutable(); auto &inside_array = detail::Ref(og_inside.GetMutable()); - VLOG(8) << outside_og_name << " size = " << outside_array.size(); - inside_array.resize(outside_array.size()); + inside_array.clear(); + inside_array.resize(outside_array->size()); + VLOG(8) << outside_og_name << " size = " << outside_array->size(); for (size_t j = 0; j < inside_array.size(); ++j) { - VLOG(8) << j << " " << outside_array[j].numel(); - if (outside_array[j].numel() != 0) { - inside_array[j].set_lod(outside_array[j].lod()); - inside_array[j].ShareDataWith(outside_array[j]); + if (!outside_array->at(j).IsInitialized()) { + outside_array->at(j).Resize({0}); + } + VLOG(8) << j << " " << outside_array->at(j).numel(); + if (outside_array->at(j).numel() != 0) { + inside_array[j].set_lod(outside_array->at(j).lod()); + inside_array[j].ShareDataWith(outside_array->at(j)); } else { PADDLE_ENFORCE_EQ(inside_array[j].numel(), 0); } @@ -300,6 +305,7 @@ class WhileGradOp : public framework::OperatorBase { dev_ctx.Wait(); const_cast(scope).DeleteScope(&cur_scope); } + step_scopes->clear(); } }; diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index e6c87726425..3c64ebe9950 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -141,7 +141,7 @@ class SumOp : public framework::OperatorWithKernel { for (auto& x_var : x_vars) { auto& array = x_var->Get(); for (auto& each : array) { - if (each.numel() != 0) { + if (each.numel() != 0 && each.IsInitialized()) { return framework::OpKernelType(each.type(), ctx.device_context(), layout, library); } diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 7a3fecace45..1f9f9486145 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -97,11 +97,11 @@ void LodTensorArrayCompute(const framework::ExecutionContext &context) { auto &in_array = in_vars[i]->Get(); for (size_t i = 0; i < in_array.size(); ++i) { - if (in_array[i].numel() != 0) { + if (in_array[i].IsInitialized() && (in_array[i].numel() != 0)) { if (i >= out_array.size()) { out_array.resize(i + 1); } - if (out_array[i].numel() == 0) { + if (!out_array[i].IsInitialized() || (out_array[i].numel() == 0)) { framework::TensorCopy(in_array[i], in_array[i].place(), context.device_context(), &out_array[i]); out_array[i].set_lod(in_array[i].lod()); -- GitLab