提交 60d0a059 编写于 作者: C chengduoZH

refine parallel

上级 5a8b05f0
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/framework/parallel_executor.h"
#include <string>
#include "ThreadPool.h" #include "ThreadPool.h"
...@@ -102,15 +103,18 @@ void ParallelExecutor::BCastParamsToGPUs( ...@@ -102,15 +103,18 @@ void ParallelExecutor::BCastParamsToGPUs(
auto *main_scope = member_->local_scopes_[0]; auto *main_scope = member_->local_scopes_[0];
for (auto *var_desc : startup_program.Block(0).AllVars()) { for (auto *var_desc : startup_program.Block(0).AllVars()) {
size_t idx = var_desc->Name().find("@GRAD");
if (idx != std::string::npos) continue;
if (var_desc->GetType() == proto::VarType::LOD_TENSOR) { if (var_desc->GetType() == proto::VarType::LOD_TENSOR) {
auto &main_tensor = auto &main_tensor =
main_scope->FindVar(var_desc->Name())->Get<LoDTensor>(); main_scope->FindVar(var_desc->Name())->Get<LoDTensor>();
ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
auto &dims = main_tensor.dims(); auto &dims = main_tensor.dims();
size_t numel = main_tensor.numel();
if (paddle::platform::is_gpu_place(main_tensor.place())) {
size_t numel = main_tensor.numel();
ncclDataType_t data_type = platform::ToNCCLDataType(main_tensor.type());
platform::NCCLGroupGuard guard; platform::NCCLGroupGuard guard;
for (size_t i = 0; i < member_->places_.size(); ++i) { for (size_t i = 0; i < member_->places_.size(); ++i) {
auto place = member_->places_[i]; auto place = member_->places_[i];
void *buffer; void *buffer;
...@@ -118,15 +122,25 @@ void ParallelExecutor::BCastParamsToGPUs( ...@@ -118,15 +122,25 @@ void ParallelExecutor::BCastParamsToGPUs(
buffer = const_cast<void *>(main_tensor.data<void>()); buffer = const_cast<void *>(main_tensor.data<void>());
} else { } else {
auto local_scope = member_->local_scopes_[i]; auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var_desc->Name())->GetMutable<LoDTensor>(); auto *t =
local_scope->Var(var_desc->Name())->GetMutable<LoDTensor>();
t->Resize(dims); t->Resize(dims);
buffer = t->mutable_data(place, main_tensor.type()); buffer = t->mutable_data(place, main_tensor.type());
} }
auto &nccl_ctx = member_->nccl_ctxs_->at(place); auto &nccl_ctx = member_->nccl_ctxs_->at(place);
platform::dynload::ncclBcast(buffer, numel, data_type, 0, platform::dynload::ncclBcast(buffer, numel, data_type, 0,
nccl_ctx.comm_, nccl_ctx.stream()); nccl_ctx.comm_, nccl_ctx.stream());
} }
} else {
platform::CPUPlace cpu;
for (size_t i = 1; i < member_->places_.size(); ++i) {
auto local_scope = member_->local_scopes_[i];
auto *t = local_scope->Var(var_desc->Name())->GetMutable<LoDTensor>();
t->Resize(dims);
t->mutable_data(cpu, main_tensor.type());
paddle::framework::TensorCopy(main_tensor, cpu, t);
}
}
} }
member_->nccl_ctxs_->WaitAll(); member_->nccl_ctxs_->WaitAll();
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册