diff --git a/paddle/fluid/platform/cuda_device_guard.h b/paddle/fluid/platform/cuda_device_guard.h index 08beed532a7ec1bbc9cd866c90c938493a15f5c1..5361828604076f29bde4cc9d789b1e5c78be2d83 100644 --- a/paddle/fluid/platform/cuda_device_guard.h +++ b/paddle/fluid/platform/cuda_device_guard.h @@ -30,8 +30,21 @@ class CUDADeviceGuard { CUDADeviceGuard() {} ~CUDADeviceGuard() { + static thread_local bool is_first_time_ = true; if (prev_id_ != -1) { - platform::SetDeviceId(prev_id_); + // Do not set device back for the first time, since + // `cudaGetDevice` returns 0 when `cudaSetDevice` is + // not called. + // In that case, if CUDADeviceGuard(7) is called, + // prev_id will be 0 and we don`t need to set it back to 0. + // If cudaSetDevice(0) is called, it may use hundreds MB of + // the gpu memory. + VLOG(10) << __func__ << " prev_id: " << prev_id_ << ", is_first_time_" + << is_first_time_; + if (!(is_first_time_ && prev_id_ == 0)) { + platform::SetDeviceId(prev_id_); + is_first_time_ = false; + } } }