提交 d24e046c 编写于 作者: C chengduoZH

fix allReduce bug

上级 a57e8a43
...@@ -107,7 +107,7 @@ void AllReduceOpHandle::RunImpl() { ...@@ -107,7 +107,7 @@ void AllReduceOpHandle::RunImpl() {
auto &trg = *this->local_scopes_[0] auto &trg = *this->local_scopes_[0]
->FindVar(kLocalExecScopeName) ->FindVar(kLocalExecScopeName)
->Get<Scope *>() ->Get<Scope *>()
->Var() ->FindVar(in_var_handles[0]->name_)
->GetMutable<framework::LoDTensor>(); ->GetMutable<framework::LoDTensor>();
// Reduce All Tensor to trg in CPU // Reduce All Tensor to trg in CPU
......
...@@ -121,8 +121,9 @@ class ParallelExecutor(object): ...@@ -121,8 +121,9 @@ class ParallelExecutor(object):
# performance. Worth tunning for other models in the future. # performance. Worth tunning for other models in the future.
exec_strategy.num_threads = len(self._places) * 4 exec_strategy.num_threads = len(self._places) * 4
else: else:
# Currently num_threads must be 1. cpu_num = int(
exec_strategy.num_threads = 1 os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
exec_strategy.num_threads = cpu_num
if build_strategy is None: if build_strategy is None:
build_strategy = BuildStrategy() build_strategy = BuildStrategy()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册