diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc index 1e48f75958a3ada4d1cd5c8d0f920da4fed2157e..e587210b357ea6caa3272903d8aa6b3e4b2e8228 100644 --- a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc @@ -73,8 +73,9 @@ void NCCLAllReduceOpHandle::RunImpl() { for (size_t i = 0; i < local_scopes_.size(); ++i) { auto *s = local_scopes_[i]; + auto &local_scope = *s->FindVar(kLocalExecScopeName)->Get(); - auto &lod_tensor = s->FindVar(var_name)->Get(); + auto &lod_tensor = local_scope.FindVar(var_name)->Get(); lod_tensors.emplace_back(lod_tensor); } @@ -110,17 +111,21 @@ void NCCLAllReduceOpHandle::RunImpl() { } }); } else { // Special handle CPU only Operator's gradient. Like CRF - auto &trg = - *this->local_scopes_[0]->Var()->GetMutable(); + auto &trg = *this->local_scopes_[0] + ->FindVar(kLocalExecScopeName) + ->Get() + ->Var() + ->GetMutable(); // Reduce All Tensor to trg in CPU ReduceLoDTensor func(lod_tensors, &trg); VisitDataType(ToDataType(lod_tensors[0].type()), func); for (size_t i = 0; i < local_scopes_.size(); ++i) { - auto &scope = local_scopes_[i]; + auto &scope = + *local_scopes_[i]->FindVar(kLocalExecScopeName)->Get(); auto &p = places_[i]; - auto *var = scope->FindVar(var_name); + auto *var = scope.FindVar(var_name); auto *dev_ctx = dev_ctxes_[p]; RunAndRecordEvent(p, [&trg, var, dev_ctx, p] {