diff --git a/paddle/framework/mixed_vector.h b/paddle/framework/mixed_vector.h index d388da4f2c79de9e60c972824254aeb014f8aecf..f776f0317a2bed69cf8795c2a12a467c50ba38d3 100644 --- a/paddle/framework/mixed_vector.h +++ b/paddle/framework/mixed_vector.h @@ -277,11 +277,15 @@ class Vector { kDirty = 0x10 }; + void CopyToCPU() const { + // COPY GPU Data To CPU + Copy(cuda_vec_, platform::CPUPlace(), &cpu_vec_); + WaitPlace(cuda_vec_.place()); + } + void MutableCPU() { if (IsInCUDA() && IsDirty()) { - // COPY GPU Data To CPU - Copy(cuda_vec_, platform::CPUPlace(), &cpu_vec_); - WaitPlace(cuda_vec_.place()); + CopyToCPU(); } flag_ = kDirty | kDataInCPU; } @@ -311,8 +315,10 @@ class Vector { SetFlag(kDataInCUDA); } else if (!(place == cuda_vec_.place())) { framework::Tensor tmp; + WaitPlace(cuda_vec_.place()); Copy(cuda_vec_, boost::get(place), &tmp); WaitPlace(cuda_vec_.place()); + WaitPlace(place); cuda_vec_.ShareDataWith(tmp); } else { // Not Dirty && DataInCUDA && Device is same @@ -324,8 +330,7 @@ class Vector { void ImmutableCPU() const { if (IsDirty() && !IsInCPU()) { // If data has been changed in CUDA, or CPU has no data. - Copy(cuda_vec_, platform::CPUPlace(), &cpu_vec_); - WaitPlace(cuda_vec_.place()); + CopyToCPU(); UnsetFlag(kDirty); } SetFlag(kDataInCPU); diff --git a/paddle/framework/mixed_vector_test.cu b/paddle/framework/mixed_vector_test.cu index a037cc3b9900c4a9ad88d530e19b525a8bd2cb53..f02db8f612c498ddd103a6b96e80d265e13c0f52 100644 --- a/paddle/framework/mixed_vector_test.cu +++ b/paddle/framework/mixed_vector_test.cu @@ -81,10 +81,12 @@ TEST(mixed_vector, MultiGPU) { } ASSERT_EQ(tmp.size(), 10); paddle::platform::CUDAPlace gpu0(0); + paddle::platform::SetDeviceId(0); multiply_10<<<1, 1, 0, GetCUDAStream(gpu0)>>>(tmp.MutableData(gpu0)); paddle::platform::CUDAPlace gpu1(1); - multiply_10<<<1, 1, 0, GetCUDAStream(gpu1)>>>(tmp.MutableData(gpu1)); - + auto* gpu1_ptr = tmp.MutableData(gpu1); + paddle::platform::SetDeviceId(1); + multiply_10<<<1, 1, 0, GetCUDAStream(gpu1)>>>(gpu1_ptr); for (int i = 0; i < 10; ++i) { ASSERT_EQ(tmp[i], i * 100); } diff --git a/paddle/operators/math/selected_rows_functor.cu b/paddle/operators/math/selected_rows_functor.cu index 5c3a53ae1ba92dbd11f3158789f53bd205747149..54a41a67d063fdc9e4453cb339d92d4a406cecc2 100644 --- a/paddle/operators/math/selected_rows_functor.cu +++ b/paddle/operators/math/selected_rows_functor.cu @@ -154,7 +154,9 @@ struct SelectedRowsAddTo { auto* in2_value = input2->mutable_value(); // concat rows - in2_rows.Extend(in1_rows.begin(), in1_rows.end()); + if (in1_rows.size()) { + in2_rows.Extend(in1_rows.begin(), in1_rows.end()); + } auto in1_place = input1.place(); PADDLE_ENFORCE(platform::is_gpu_place(in1_place));