From 81217a94d87362c07c50e837c538e3ae2eb28137 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Tue, 19 Jan 2021 15:27:39 +0800 Subject: [PATCH] unify calling cudaSetDevice (#30470) * unify calling cudaSetDevice * fix compile --- paddle/fluid/framework/details/nccl_op_handle.h | 2 +- paddle/fluid/framework/details/op_handle_base.cc | 2 +- paddle/fluid/framework/fleet/nccl_wrapper.cc | 2 +- paddle/fluid/inference/tensorrt/engine.cc | 2 +- paddle/fluid/memory/malloc_test.cu | 4 ++-- paddle/fluid/platform/collective_helper.cc | 2 +- paddle/fluid/platform/gpu_info.cc | 2 +- paddle/fluid/platform/nccl_helper.h | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/paddle/fluid/framework/details/nccl_op_handle.h b/paddle/fluid/framework/details/nccl_op_handle.h index 22a059773f5..eb536560b62 100644 --- a/paddle/fluid/framework/details/nccl_op_handle.h +++ b/paddle/fluid/framework/details/nccl_op_handle.h @@ -94,7 +94,7 @@ class NCCLOpHandleBase : public OpHandleBase { continue; } - PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id)); + platform::SetDeviceId(dev_id); PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventCreateWithFlags( &inter_events_[dev_id], cudaEventDisableTiming)); PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventCreateWithFlags( diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc index eeff0f3d46d..240be51a442 100644 --- a/paddle/fluid/framework/details/op_handle_base.cc +++ b/paddle/fluid/framework/details/op_handle_base.cc @@ -47,7 +47,7 @@ void OpHandleBase::InitCUDA() { #ifdef PADDLE_WITH_CUDA for (auto &p : dev_ctxes_) { int dev_id = BOOST_GET_CONST(platform::CUDAPlace, p.first).device; - PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id)); + platform::SetDeviceId(dev_id); PADDLE_ENFORCE_CUDA_SUCCESS( cudaEventCreateWithFlags(&events_[dev_id], cudaEventDisableTiming)); } diff --git a/paddle/fluid/framework/fleet/nccl_wrapper.cc b/paddle/fluid/framework/fleet/nccl_wrapper.cc index ed92e2e9aad..8ba94f4fd7a 100644 --- a/paddle/fluid/framework/fleet/nccl_wrapper.cc +++ b/paddle/fluid/framework/fleet/nccl_wrapper.cc @@ -50,7 +50,7 @@ void NCCLWrapper::SetRankInfo(const int local_rank, const int global_rank, nccl_info_.local_rank_ = local_rank; nccl_info_.my_global_rank_ = global_rank; nccl_info_.global_ranks_ = ranks; - PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(local_rank)); + platform::SetDeviceId(local_rank); PADDLE_ENFORCE_CUDA_SUCCESS(cudaStreamCreate(&(nccl_info_.stream_))); #endif return; diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 1f7ea7ea044..90b3e2c0e97 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -339,7 +339,7 @@ void TensorRTEngine::freshDeviceId() { platform::errors::OutOfRange( "Device id %d exceeds the current device count: %d.", device_id_, count)); - cudaSetDevice(device_id_); + platform::SetDeviceId(device_id_); } } // namespace tensorrt diff --git a/paddle/fluid/memory/malloc_test.cu b/paddle/fluid/memory/malloc_test.cu index 89853e159bd..c9fbaf351ea 100644 --- a/paddle/fluid/memory/malloc_test.cu +++ b/paddle/fluid/memory/malloc_test.cu @@ -64,7 +64,7 @@ void MultiStreamCompute(float **data, float **second_data, TEST(Malloc, CUDADeviceContextMultiStream) { auto place = platform::CUDAPlace(0); - EXPECT_TRUE(cudaSuccess == cudaSetDevice(0)); + platform::SetDeviceId(0); AllocationPtr main_stream_alloc_ptr = Alloc(place, N * sizeof(float)); EXPECT_GE(main_stream_alloc_ptr->size(), N * sizeof(float)); @@ -94,7 +94,7 @@ TEST(Malloc, CUDADeviceContextMultiStream) { TEST(Malloc, CUDADeviceContextMultiThreadMultiStream) { auto place = platform::CUDAPlace(0); - EXPECT_TRUE(cudaSuccess == cudaSetDevice(0)); + platform::SetDeviceId(0); AllocationPtr main_stream_alloc_ptr = Alloc(place, N * sizeof(float)); EXPECT_GE(main_stream_alloc_ptr->size(), N * sizeof(float)); diff --git a/paddle/fluid/platform/collective_helper.cc b/paddle/fluid/platform/collective_helper.cc index d2d9b41fcce..08d70404a24 100644 --- a/paddle/fluid/platform/collective_helper.cc +++ b/paddle/fluid/platform/collective_helper.cc @@ -75,7 +75,7 @@ NCCLComm* NCCLCommContext::CreateNCCLComm(ncclUniqueId* nccl_id, int nranks, "Expected dev_id >= 0. But received dev_id is %d.", dev_id)); ncclComm_t comm = nullptr; - PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(dev_id)); + SetDeviceId(dev_id); PADDLE_ENFORCE_CUDA_SUCCESS( platform::dynload::ncclCommInitRank(&comm, nranks, *nccl_id, rank)); diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index 2a6714c39a1..f4c58920b8e 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -226,7 +226,7 @@ void SetDeviceId(int id) { "Device id must be less than GPU count, " "but received id is: %d. GPU count is: %d.", id, GetCUDADeviceCount())); - PADDLE_ENFORCE_CUDA_SUCCESS(cudaSetDevice(id)); + PADDLE_RETRY_CUDA_SUCCESS(cudaSetDevice(id)); } void GpuMemoryUsage(size_t *available, size_t *total) { diff --git a/paddle/fluid/platform/nccl_helper.h b/paddle/fluid/platform/nccl_helper.h index c2f4d6ff2ff..e6c5f06c4c4 100644 --- a/paddle/fluid/platform/nccl_helper.h +++ b/paddle/fluid/platform/nccl_helper.h @@ -132,7 +132,7 @@ struct NCCLContextMap { } VLOG(1) << "init nccl rank:" << rank << ", nranks:" << nranks << ", gpu_id:" << gpu_id << ", dev_id:" << order_[i]; - PADDLE_RETRY_CUDA_SUCCESS(cudaSetDevice(gpu_id)); + SetDeviceId(gpu_id); PADDLE_RETRY_CUDA_SUCCESS(platform::dynload::ncclCommInitRank( comms.get() + i, nranks, *nccl_id, rank)); } -- GitLab