You need to sign in or sign up before continuing.
提交 f71543ee 编写于 作者: S sandyhouse

Merge branch 'add_timeline' into pipeline_exe_run

...@@ -455,6 +455,7 @@ class SectionWorker : public DeviceWorker { ...@@ -455,6 +455,7 @@ class SectionWorker : public DeviceWorker {
std::vector<std::unique_ptr<OperatorBase>> ops_; std::vector<std::unique_ptr<OperatorBase>> ops_;
static std::mutex thread_mutex; static std::mutex thread_mutex;
static std::mutex cout_mutex;
static std::condition_variable thread_condition; static std::condition_variable thread_condition;
static bool threads_completed; static bool threads_completed;
std::shared_ptr<framework::ProgramDesc> program_; std::shared_ptr<framework::ProgramDesc> program_;
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#if defined(PADDLE_WITH_NCCL) #if defined(PADDLE_WITH_NCCL)
#include <map>
#include "paddle/fluid/framework/data_feed_factory.h" #include "paddle/fluid/framework/data_feed_factory.h"
#include "paddle/fluid/framework/device_worker_factory.h" #include "paddle/fluid/framework/device_worker_factory.h"
#include "paddle/fluid/framework/trainer.h" #include "paddle/fluid/framework/trainer.h"
...@@ -44,7 +45,6 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc, ...@@ -44,7 +45,6 @@ void PipelineTrainer::Initialize(const TrainerDesc& trainer_desc,
"must be 1 now, but the value you give is %d.", "must be 1 now, but the value you give is %d.",
num_readers)); num_readers));
auto* reader = readers[0]; auto* reader = readers[0];
feed_var_names_ = reader->GetUseSlotAlias();
workers_.resize(section_num_); workers_.resize(section_num_);
for (int i = 0; i < section_num_; ++i) { for (int i = 0; i < section_num_; ++i) {
...@@ -123,26 +123,36 @@ void PipelineTrainer::CopyParameters(int section_id, int microbatch_id, ...@@ -123,26 +123,36 @@ void PipelineTrainer::CopyParameters(int section_id, int microbatch_id,
const ProgramDesc& program, const ProgramDesc& program,
const platform::Place& place) { const platform::Place& place) {
auto& global_block = program.Block(0); auto& global_block = program.Block(0);
std::map<std::string, int> param_map;
for (auto& var : global_block.AllVars()) { for (auto& var : global_block.AllVars()) {
int is_feed_var = if (var->Persistable()) {
std::count(feed_var_names_.begin(), feed_var_names_.end(), var->Name()); param_map[var->Name()] = 1;
if ((var->Persistable() || is_feed_var) && microbatch_id == 0) { }
if (is_feed_var) { }
auto* new_ptr = minibatch_scopes_[section_id]->Var(var->Name()); for (auto& var : global_block.AllVars()) {
VLOG(3) << "data name: " << var->Name() << ", ptr: " << new_ptr; bool is_param_grad = false;
InitializeVariable(new_ptr, var->GetType()); size_t pos = 0;
} else { if ((pos = var->Name().find(kGradVarSuffix)) != std::string::npos) {
auto* ptr = root_scope_->FindVar(var->Name()); auto prefix_name = var->Name().substr(0, pos);
auto* new_ptr = minibatch_scopes_[section_id]->Var(var->Name()); if (param_map.find(prefix_name) != param_map.end()) {
VLOG(3) << "Create persistable var " << var->Name() << " for minibatch " is_param_grad = true;
<< section_id << ", which pointer is " << new_ptr; }
InitializeVariable(new_ptr, var->GetType()); }
const LoDTensor& root_tensor = ptr->Get<LoDTensor>(); VLOG(3) << "Var name: " << var->Name();
LoDTensor* minibatch_tensor = new_ptr->GetMutable<LoDTensor>(); if ((var->Persistable() || is_param_grad) && microbatch_id == 0) {
TensorCopy(*static_cast<const Tensor*>(&root_tensor), place, auto* ptr = root_scope_->FindVar(var->Name());
static_cast<Tensor*>(minibatch_tensor)); auto* new_ptr = minibatch_scopes_[section_id]->Var(var->Name());
VLOG(3) << "Create persistable var " << var->Name() << " for minibatch "
<< section_id << ", which pointer is " << new_ptr;
InitializeVariable(new_ptr, var->GetType());
if (is_param_grad) {
continue;
} }
} else if (!var->Persistable() && !is_feed_var) { const LoDTensor& root_tensor = ptr->Get<LoDTensor>();
LoDTensor* minibatch_tensor = new_ptr->GetMutable<LoDTensor>();
TensorCopy(*static_cast<const Tensor*>(&root_tensor), place,
static_cast<Tensor*>(minibatch_tensor));
} else if (!var->Persistable() && !is_param_grad) {
auto* ptr = auto* ptr =
microbatch_scopes_[section_id][microbatch_id]->Var(var->Name()); microbatch_scopes_[section_id][microbatch_id]->Var(var->Name());
VLOG(3) << "Create variable " << var->Name() << " for section " VLOG(3) << "Create variable " << var->Name() << " for section "
...@@ -244,7 +254,7 @@ void PipelineTrainer::Finalize() { ...@@ -244,7 +254,7 @@ void PipelineTrainer::Finalize() {
const LoDTensor& minibatch_tensor = minibatch_ptr->Get<LoDTensor>(); const LoDTensor& minibatch_tensor = minibatch_ptr->Get<LoDTensor>();
TensorCopy(*static_cast<const Tensor*>(&minibatch_tensor), places_[0], TensorCopy(*static_cast<const Tensor*>(&minibatch_tensor), places_[0],
static_cast<Tensor*>(root_tensor)); static_cast<Tensor*>(root_tensor));
VLOG(4) << "Copy persitable var " << var->Name() << " to root scope"; VLOG(3) << "Copy persitable var " << var->Name() << " to root scope";
} }
} }
} }
......
...@@ -223,7 +223,6 @@ class PipelineTrainer : public TrainerBase { ...@@ -223,7 +223,6 @@ class PipelineTrainer : public TrainerBase {
int section_num_; int section_num_;
int num_microbatches_; int num_microbatches_;
int start_cpu_core_id_; int start_cpu_core_id_;
std::vector<std::string> feed_var_names_;
std::vector<platform::Place> places_; std::vector<platform::Place> places_;
std::vector<std::vector<std::string>> skip_vars_; std::vector<std::vector<std::string>> skip_vars_;
TrainerDesc trainer_desc_; TrainerDesc trainer_desc_;
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册