diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index ecbce3b6102f47100736dbb16e3c30f297c601d4..66e8f93736a5b1faf1a217af7e84cb93615d0cd9 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -28,6 +28,7 @@ #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif +#include "paddle/fluid/platform/device/gpu/gpu_info.h" PADDLE_DEFINE_EXPORTED_bool(new_executor_use_inplace, true, @@ -98,6 +99,11 @@ InterpreterCore::~InterpreterCore() { interpreter::CostInfo InterpreterCore::DryRun( const std::vector& feed_names, const std::vector& feed_tensors) { +#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS) + if (platform::is_gpu_place(place_)) { + platform::SetDeviceId(place_.device); + } +#endif Prepare(feed_names, feed_tensors, true); interpreter::CostInfo cost_info; { @@ -122,6 +128,11 @@ interpreter::CostInfo InterpreterCore::DryRun( paddle::framework::FetchList InterpreterCore::Run( const std::vector& feed_names, const std::vector& feed_tensors) { +#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS) + if (platform::is_gpu_place(place_)) { + platform::SetDeviceId(place_.device); + } +#endif #ifdef PADDLE_WITH_MKLDNN platform::AttachPointerHashToMKLDNNKey(this, place_); #endif @@ -153,6 +164,11 @@ paddle::framework::FetchList InterpreterCore::Run( paddle::framework::FetchList InterpreterCore::Run( const std::vector& feed_names) { +#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS) + if (platform::is_gpu_place(place_)) { + platform::SetDeviceId(place_.device); + } +#endif #ifdef PADDLE_WITH_MKLDNN platform::AttachPointerHashToMKLDNNKey(this, place_); #endif diff --git a/paddle/fluid/platform/event.h b/paddle/fluid/platform/event.h index f6c7bb04353654caab8e491bcbd3794c7ebddc73..f1d7bad90f232dbbbbb1d327df8747777007d096 100644 --- a/paddle/fluid/platform/event.h +++ b/paddle/fluid/platform/event.h @@ -141,6 +141,7 @@ class CudaEvent { #else cudaEventCreateWithFlags(&event_, flags_); #endif + VLOG(4) << "CudaEvent " << event_; } explicit CudaEvent(unsigned int flags) : flags_(flags) { @@ -149,6 +150,7 @@ class CudaEvent { #else cudaEventCreateWithFlags(&event_, flags_); #endif + VLOG(4) << "CudaEvent " << event_; } ~CudaEvent() { diff --git a/paddle/phi/backends/gpu/cuda/cuda_info.cc b/paddle/phi/backends/gpu/cuda/cuda_info.cc index 7be21e85f0005b9bfe7849ac6f12561cf108c7e3..4b5de3db54d1980287246bd252a627c7eb2f9840 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_info.cc +++ b/paddle/phi/backends/gpu/cuda/cuda_info.cc @@ -241,6 +241,7 @@ void SetDeviceId(int id) { id, GetGPUDeviceCount())); PADDLE_RETRY_CUDA_SUCCESS(cudaSetDevice(id)); + VLOG(4) << "SetDeviceId " << id; } void GpuMemcpyAsync(void *dst,