未验证 提交 fbe56247 编写于 作者: F fengjiayi 提交者: GitHub

Merge pull request #9994 from reyoung/feature/debug

Fix bugs in local_scopes
...@@ -73,8 +73,9 @@ void NCCLAllReduceOpHandle::RunImpl() { ...@@ -73,8 +73,9 @@ void NCCLAllReduceOpHandle::RunImpl() {
for (size_t i = 0; i < local_scopes_.size(); ++i) { for (size_t i = 0; i < local_scopes_.size(); ++i) {
auto *s = local_scopes_[i]; auto *s = local_scopes_[i];
auto &local_scope = *s->FindVar(kLocalExecScopeName)->Get<Scope *>();
auto &lod_tensor = s->FindVar(var_name)->Get<LoDTensor>(); auto &lod_tensor = local_scope.FindVar(var_name)->Get<LoDTensor>();
lod_tensors.emplace_back(lod_tensor); lod_tensors.emplace_back(lod_tensor);
} }
...@@ -110,17 +111,21 @@ void NCCLAllReduceOpHandle::RunImpl() { ...@@ -110,17 +111,21 @@ void NCCLAllReduceOpHandle::RunImpl() {
} }
}); });
} else { // Special handle CPU only Operator's gradient. Like CRF } else { // Special handle CPU only Operator's gradient. Like CRF
auto &trg = auto &trg = *this->local_scopes_[0]
*this->local_scopes_[0]->Var()->GetMutable<framework::LoDTensor>(); ->FindVar(kLocalExecScopeName)
->Get<Scope *>()
->Var()
->GetMutable<framework::LoDTensor>();
// Reduce All Tensor to trg in CPU // Reduce All Tensor to trg in CPU
ReduceLoDTensor func(lod_tensors, &trg); ReduceLoDTensor func(lod_tensors, &trg);
VisitDataType(ToDataType(lod_tensors[0].type()), func); VisitDataType(ToDataType(lod_tensors[0].type()), func);
for (size_t i = 0; i < local_scopes_.size(); ++i) { for (size_t i = 0; i < local_scopes_.size(); ++i) {
auto &scope = local_scopes_[i]; auto &scope =
*local_scopes_[i]->FindVar(kLocalExecScopeName)->Get<Scope *>();
auto &p = places_[i]; auto &p = places_[i];
auto *var = scope->FindVar(var_name); auto *var = scope.FindVar(var_name);
auto *dev_ctx = dev_ctxes_[p]; auto *dev_ctx = dev_ctxes_[p];
RunAndRecordEvent(p, [&trg, var, dev_ctx, p] { RunAndRecordEvent(p, [&trg, var, dev_ctx, p] {
......
...@@ -30,10 +30,11 @@ ScaleLossGradOpHandle::~ScaleLossGradOpHandle() {} ...@@ -30,10 +30,11 @@ ScaleLossGradOpHandle::~ScaleLossGradOpHandle() {}
void ScaleLossGradOpHandle::RunImpl() { void ScaleLossGradOpHandle::RunImpl() {
std::string var_name = static_cast<VarHandle *>(this->outputs_[0])->name_; std::string var_name = static_cast<VarHandle *>(this->outputs_[0])->name_;
auto &local_scope = *scope_->FindVar(kLocalExecScopeName)->Get<Scope *>();
float *tmp = float *tmp = local_scope.FindVar(var_name)
scope_->FindVar(var_name)->GetMutable<LoDTensor>()->mutable_data<float>( ->GetMutable<LoDTensor>()
make_ddim({1}), place_); ->mutable_data<float>(make_ddim({1}), place_);
if (platform::is_cpu_place(place_)) { if (platform::is_cpu_place(place_)) {
*tmp = coeff_; *tmp = coeff_;
......
...@@ -63,13 +63,14 @@ ParallelExecutor::ParallelExecutor( ...@@ -63,13 +63,14 @@ ParallelExecutor::ParallelExecutor(
// Step 1. Bcast the params to devs. // Step 1. Bcast the params to devs.
// Create local scopes // Create local scopes
if (local_scopes.empty()) { if (local_scopes.empty()) {
for (size_t i = 0; i < member_->places_.size(); ++i) { member_->local_scopes_.emplace_back(member_->global_scope_);
member_->local_scopes_.push_back(&scope->NewScope()); for (size_t i = 1; i < member_->places_.size(); ++i) {
member_->local_scopes_.emplace_back(&scope->NewScope());
} }
} else { } else {
PADDLE_ENFORCE_EQ(member_->places_.size(), local_scopes.size()); PADDLE_ENFORCE_EQ(member_->places_.size(), local_scopes.size());
for (size_t i = 0; i < member_->places_.size(); ++i) { for (size_t i = 0; i < member_->places_.size(); ++i) {
member_->local_scopes_.push_back(local_scopes[i]); member_->local_scopes_.emplace_back(local_scopes[i]);
} }
} }
...@@ -159,7 +160,9 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors, ...@@ -159,7 +160,9 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
const std::string &fetched_var_name) { const std::string &fetched_var_name) {
platform::RecordBlock b(0); platform::RecordBlock b(0);
// Create local scopes. // Create local scopes.
for (auto &scope : member_->local_scopes_) { for (auto it = member_->local_scopes_.rbegin();
it != member_->local_scopes_.rend(); ++it) {
auto &scope = *it;
Scope &local_scope = scope->NewScope(); Scope &local_scope = scope->NewScope();
*scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>() = *scope->Var(details::kLocalExecScopeName)->GetMutable<Scope *>() =
&local_scope; &local_scope;
...@@ -173,7 +176,7 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors, ...@@ -173,7 +176,7 @@ void ParallelExecutor::Run(const std::vector<std::string> &fetch_tensors,
InitializeVariable(scope->Var(std::get<0>(name_type_pair)), InitializeVariable(scope->Var(std::get<0>(name_type_pair)),
std::get<1>(name_type_pair)); std::get<1>(name_type_pair));
} else { } else {
InitializeVariable(scope->Var(std::get<0>(name_type_pair)), InitializeVariable(local_scope.Var(std::get<0>(name_type_pair)),
std::get<1>(name_type_pair)); std::get<1>(name_type_pair));
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册