提交 494c262a 编写于 作者: W whs 提交者: Yi Wang

Fix average_accumulate_op for parallel executor. (#9852)

上级 630943c7
...@@ -25,12 +25,14 @@ void GetAccumulators<paddle::platform::CUDADeviceContext>( ...@@ -25,12 +25,14 @@ void GetAccumulators<paddle::platform::CUDADeviceContext>(
auto* in_num_accumulates = ctx.Input<Tensor>("in_num_accumulates"); auto* in_num_accumulates = ctx.Input<Tensor>("in_num_accumulates");
auto* in_num_updates = ctx.Input<Tensor>("in_num_updates"); auto* in_num_updates = ctx.Input<Tensor>("in_num_updates");
auto stream = ctx.cuda_device_context().stream(); auto stream = ctx.cuda_device_context().stream();
memory::Copy(platform::CPUPlace(), old_num_accumulates_, auto cuda_place =
platform::CUDAPlace(), in_old_num_accumulates->data<int64_t>(), boost::get<platform::CUDAPlace>(in_old_num_accumulates->place());
sizeof(int64_t), stream); memory::Copy(platform::CPUPlace(), old_num_accumulates_, cuda_place,
memory::Copy(platform::CPUPlace(), num_accumulates_, platform::CUDAPlace(), in_old_num_accumulates->data<int64_t>(), sizeof(int64_t),
stream);
memory::Copy(platform::CPUPlace(), num_accumulates_, cuda_place,
in_num_accumulates->data<int64_t>(), sizeof(int64_t), stream); in_num_accumulates->data<int64_t>(), sizeof(int64_t), stream);
memory::Copy(platform::CPUPlace(), num_updates_, platform::CUDAPlace(), memory::Copy(platform::CPUPlace(), num_updates_, cuda_place,
in_num_updates->data<int64_t>(), sizeof(int64_t), stream); in_num_updates->data<int64_t>(), sizeof(int64_t), stream);
} }
...@@ -42,14 +44,16 @@ void SetAccumulators<paddle::platform::CUDADeviceContext>( ...@@ -42,14 +44,16 @@ void SetAccumulators<paddle::platform::CUDADeviceContext>(
auto* out_old_num_accumulates = ctx.Output<Tensor>("out_old_num_accumulates"); auto* out_old_num_accumulates = ctx.Output<Tensor>("out_old_num_accumulates");
auto* out_num_accumulates = ctx.Output<Tensor>("out_num_accumulates"); auto* out_num_accumulates = ctx.Output<Tensor>("out_num_accumulates");
auto* out_num_updates = ctx.Output<Tensor>("out_num_updates"); auto* out_num_updates = ctx.Output<Tensor>("out_num_updates");
auto cuda_place =
boost::get<platform::CUDAPlace>(out_old_num_accumulates->place());
memory::Copy(platform::CUDAPlace(), out_old_num_accumulates->data<int64_t>(), memory::Copy(cuda_place, out_old_num_accumulates->data<int64_t>(),
platform::CPUPlace(), &old_num_accumulates_, sizeof(int64_t), platform::CPUPlace(), &old_num_accumulates_, sizeof(int64_t),
stream); stream);
memory::Copy(platform::CUDAPlace(), out_num_accumulates->data<int64_t>(), memory::Copy(cuda_place, out_num_accumulates->data<int64_t>(),
platform::CPUPlace(), &num_accumulates_, sizeof(int64_t), platform::CPUPlace(), &num_accumulates_, sizeof(int64_t),
stream); stream);
memory::Copy(platform::CUDAPlace(), out_num_updates->data<int64_t>(), memory::Copy(cuda_place, out_num_updates->data<int64_t>(),
platform::CPUPlace(), &num_updates_, sizeof(int64_t), stream); platform::CPUPlace(), &num_updates_, sizeof(int64_t), stream);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册