From 24db3a70d682e3fa8022e240bc9cc96c30d1ff22 Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Sat, 13 Apr 2019 20:25:30 +0800 Subject: [PATCH] fix the hang bugs of memory copying. test=develop --- paddle/fluid/operators/fake_quantize_op.cu | 25 ++++++++++++---------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/operators/fake_quantize_op.cu b/paddle/fluid/operators/fake_quantize_op.cu index 33bd275e5..7d5511067 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu +++ b/paddle/fluid/operators/fake_quantize_op.cu @@ -235,11 +235,13 @@ struct FindRangeAbsMaxFunctor { int g_find_max; memory::Copy(platform::CPUPlace(), &g_find_max, gpu_place, find_max, - sizeof(int), 0); + sizeof(int), ctx.stream()); + ctx.Wait(); if (g_find_max) { int len; memory::Copy(platform::CPUPlace(), &len, gpu_place, out_size_data, - sizeof(int), 0); + sizeof(int), ctx.stream()); + ctx.Wait(); FindAbsMaxFunctor()(ctx, scale_arr, len, out_scale_data); } @@ -258,25 +260,26 @@ struct FindMovingAverageAbsMaxFunctor { const auto gpu_place = boost::get(ctx.GetPlace()); T accum; - memory::Copy(platform::CPUPlace(), &accum, gpu_place, in_accum.data(), - sizeof(T), 0); T state; - memory::Copy(platform::CPUPlace(), &state, gpu_place, in_state.data(), - sizeof(T), 0); T scale; + memory::Copy(platform::CPUPlace(), &accum, gpu_place, in_accum.data(), + sizeof(T), ctx.stream()); + memory::Copy(platform::CPUPlace(), &state, gpu_place, in_state.data(), + sizeof(T), ctx.stream()); memory::Copy(platform::CPUPlace(), &scale, gpu_place, cur_scale, sizeof(T), - 0); - + ctx.stream()); + ctx.Wait(); state = rate * state + 1; accum = rate * accum + scale; scale = accum / state; memory::Copy(gpu_place, out_accum->mutable_data(gpu_place), - platform::CPUPlace(), &accum, sizeof(T), 0); + platform::CPUPlace(), &accum, sizeof(T), ctx.stream()); memory::Copy(gpu_place, out_state->mutable_data(gpu_place), - platform::CPUPlace(), &state, sizeof(T), 0); + platform::CPUPlace(), &state, sizeof(T), ctx.stream()); memory::Copy(gpu_place, out_scale->mutable_data(gpu_place), - platform::CPUPlace(), &scale, sizeof(T), 0); + platform::CPUPlace(), &scale, sizeof(T), ctx.stream()); + ctx.Wait(); } }; -- GitLab