未验证 提交 a647b80a 编写于 作者: T Thunderbrook 提交者: GitHub

[HeterPs] merge dense && data norm && g2sum (#35029)

* merge dense

* log level

* tensor copy sync

* format
上级 264ff9ef
...@@ -44,7 +44,7 @@ class Optimizer { ...@@ -44,7 +44,7 @@ class Optimizer {
if (w < optimizer_config::min_bound) w = optimizer_config::min_bound; if (w < optimizer_config::min_bound) w = optimizer_config::min_bound;
if (w > optimizer_config::max_bound) w = optimizer_config::max_bound; if (w > optimizer_config::max_bound) w = optimizer_config::max_bound;
add_g2sum = scaled_grad * scaled_grad; add_g2sum += scaled_grad * scaled_grad;
g2sum += add_g2sum; g2sum += add_g2sum;
} }
...@@ -64,7 +64,7 @@ class Optimizer { ...@@ -64,7 +64,7 @@ class Optimizer {
w[i] = optimizer_config::mf_min_bound; w[i] = optimizer_config::mf_min_bound;
if (w[i] > optimizer_config::mf_max_bound) if (w[i] > optimizer_config::mf_max_bound)
w[i] = optimizer_config::mf_max_bound; w[i] = optimizer_config::mf_max_bound;
add_g2sum = scaled_grad * scaled_grad; add_g2sum += scaled_grad * scaled_grad;
} }
g2sum += add_g2sum / n; g2sum += add_g2sum / n;
......
...@@ -57,8 +57,6 @@ void PSGPUTrainer::Initialize(const TrainerDesc& trainer_desc, ...@@ -57,8 +57,6 @@ void PSGPUTrainer::Initialize(const TrainerDesc& trainer_desc,
trainer_desc.downpour_param().stat_var_names(i)); trainer_desc.downpour_param().stat_var_names(i));
} }
VLOG(3) << "going to initialize pull dense worker"; VLOG(3) << "going to initialize pull dense worker";
pull_dense_worker_ = PullDenseWorker::GetInstance();
pull_dense_worker_->Initialize(trainer_desc);
SetDebug(trainer_desc.debug()); SetDebug(trainer_desc.debug());
trainer_desc_ = trainer_desc; trainer_desc_ = trainer_desc;
workers_.resize(place_num); workers_.resize(place_num);
...@@ -112,15 +110,21 @@ void PSGPUTrainer::InitTrainerEnv(const ProgramDesc& main_program, ...@@ -112,15 +110,21 @@ void PSGPUTrainer::InitTrainerEnv(const ProgramDesc& main_program,
} }
} }
} }
for (auto& var : main_program.Block(0).AllVars()) {
if (var->Persistable()) {
auto it = std::find(need_merge_var_names_.begin(),
need_merge_var_names_.end(), var->Name());
if (it == need_merge_var_names_.end()) {
VLOG(2) << "train param: " << var->Name();
trainable_param_.push_back(var->Name());
}
}
}
place_ = place; place_ = place;
return; return;
} }
void PSGPUTrainer::InitOtherEnv(const ProgramDesc& main_program) { void PSGPUTrainer::InitOtherEnv(const ProgramDesc& main_program) {
pull_dense_worker_->SetRootScope(root_scope_);
for (size_t i = 0; i < places_.size(); ++i) {
pull_dense_worker_->AddThreadScope(workers_[i]->GetThreadScope());
}
VLOG(3) << "init other env done."; VLOG(3) << "init other env done.";
} }
...@@ -141,15 +145,27 @@ Scope* PSGPUTrainer::GetWorkerScope(int thread_id) { return nullptr; } ...@@ -141,15 +145,27 @@ Scope* PSGPUTrainer::GetWorkerScope(int thread_id) { return nullptr; }
template <typename T> template <typename T>
void PSGPUTrainer::MergeToRootScope(LoDTensor* root_tensor, LoDTensor* tensor) { void PSGPUTrainer::MergeToRootScope(LoDTensor* root_tensor, LoDTensor* tensor) {
LoDTensor tmp_root; LoDTensor tmp_root;
TensorCopy(*root_tensor, platform::CPUPlace(), &tmp_root); TensorCopySync(*root_tensor, platform::CPUPlace(), &tmp_root);
T* tmp_root_data = tmp_root.data<T>(); T* tmp_root_data = tmp_root.data<T>();
LoDTensor tmp_tensor; LoDTensor tmp_tensor;
TensorCopy(*tensor, platform::CPUPlace(), &tmp_tensor); TensorCopySync(*tensor, platform::CPUPlace(), &tmp_tensor);
T* data = tmp_tensor.data<T>(); T* data = tmp_tensor.data<T>();
for (int i = 0; i < tmp_tensor.numel(); i++) { for (int i = 0; i < tmp_tensor.numel(); i++) {
tmp_root_data[i] += data[i]; tmp_root_data[i] += data[i];
} }
TensorCopy(tmp_root, platform::CPUPlace(), root_tensor); TensorCopySync(tmp_root, platform::CPUPlace(), root_tensor);
}
void PSGPUTrainer::MergeDenseParam() {
auto thread_scope = workers_[0]->GetThreadScope();
for (auto& name : trainable_param_) {
VLOG(2) << "merge var " << name << " to root scope";
Variable* root_var = root_scope_->FindVar(name);
LoDTensor* root_tensor = root_var->GetMutable<LoDTensor>();
Variable* var = thread_scope->FindVar(name);
LoDTensor* tensor = var->GetMutable<LoDTensor>();
TensorCopySync((*tensor), root_tensor->place(), root_tensor);
}
} }
void PSGPUTrainer::Finalize() { void PSGPUTrainer::Finalize() {
...@@ -187,7 +203,7 @@ void PSGPUTrainer::Finalize() { ...@@ -187,7 +203,7 @@ void PSGPUTrainer::Finalize() {
_ForEachDataType_(MergeCallback); _ForEachDataType_(MergeCallback);
} }
} }
pull_dense_worker_->MergeDenseParam(); MergeDenseParam();
root_scope_->DropKids(); root_scope_->DropKids();
} }
} // namespace framework } // namespace framework
......
...@@ -265,6 +265,7 @@ class PSGPUTrainer : public TrainerBase { ...@@ -265,6 +265,7 @@ class PSGPUTrainer : public TrainerBase {
} }
virtual std::string GetDumpPath(int tid) { return ""; } virtual std::string GetDumpPath(int tid) { return ""; }
virtual void InitDumpEnv() {} virtual void InitDumpEnv() {}
virtual void MergeDenseParam();
template <typename T> template <typename T>
void MergeToRootScope(LoDTensor* root_tensor, LoDTensor* thread_tensor); void MergeToRootScope(LoDTensor* root_tensor, LoDTensor* thread_tensor);
...@@ -274,6 +275,7 @@ class PSGPUTrainer : public TrainerBase { ...@@ -274,6 +275,7 @@ class PSGPUTrainer : public TrainerBase {
DownpourWorkerParameter param_; DownpourWorkerParameter param_;
std::map<uint64_t, std::vector<std::string>> dense_grad_names_; std::map<uint64_t, std::vector<std::string>> dense_grad_names_;
std::vector<std::string> need_merge_var_names_; std::vector<std::string> need_merge_var_names_;
std::vector<std::string> trainable_param_;
float scale_datanorm_; float scale_datanorm_;
paddle::platform::Place place_; paddle::platform::Place place_;
ProgramDesc program_; ProgramDesc program_;
......
...@@ -412,11 +412,13 @@ class DistributedAdam(DistributedOptimizerImplBase): ...@@ -412,11 +412,13 @@ class DistributedAdam(DistributedOptimizerImplBase):
sparse_table_index = 0 sparse_table_index = 0
for num in range(len(losses)): for num in range(len(losses)):
loss = losses[num] loss = losses[num]
parameters = None
if parameter_list != None:
parameters = parameter_list[num]
prog_id = str(id(loss.block.program)) prog_id = str(id(loss.block.program))
# param_grads of program # param_grads of program
params_grads = sorted( params_grads = sorted(
fluid.backward.append_backward(loss, parameter_list, fluid.backward.append_backward(loss, parameters, no_grad_set),
no_grad_set),
key=lambda x: x[0].name) key=lambda x: x[0].name)
flag_use_ps_gpu = strategy.get("use_ps_gpu", False) flag_use_ps_gpu = strategy.get("use_ps_gpu", False)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册