diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc
index ecbce3b6102f47100736dbb16e3c30f297c601d4..66e8f93736a5b1faf1a217af7e84cb93615d0cd9 100644
--- a/paddle/fluid/framework/new_executor/interpretercore.cc
+++ b/paddle/fluid/framework/new_executor/interpretercore.cc
@@ -28,6 +28,7 @@
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
+#include "paddle/fluid/platform/device/gpu/gpu_info.h"
 
 PADDLE_DEFINE_EXPORTED_bool(new_executor_use_inplace,
                             true,
@@ -98,6 +99,11 @@ InterpreterCore::~InterpreterCore() {
 interpreter::CostInfo InterpreterCore::DryRun(
     const std::vector<std::string>& feed_names,
     const std::vector<framework::LoDTensor>& feed_tensors) {
+#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
+  if (platform::is_gpu_place(place_)) {
+    platform::SetDeviceId(place_.device);
+  }
+#endif
   Prepare(feed_names, feed_tensors, true);
   interpreter::CostInfo cost_info;
   {
@@ -122,6 +128,11 @@ interpreter::CostInfo InterpreterCore::DryRun(
 paddle::framework::FetchList InterpreterCore::Run(
     const std::vector<std::string>& feed_names,
     const std::vector<framework::LoDTensor>& feed_tensors) {
+#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
+  if (platform::is_gpu_place(place_)) {
+    platform::SetDeviceId(place_.device);
+  }
+#endif
 #ifdef PADDLE_WITH_MKLDNN
   platform::AttachPointerHashToMKLDNNKey(this, place_);
 #endif
@@ -153,6 +164,11 @@ paddle::framework::FetchList InterpreterCore::Run(
 
 paddle::framework::FetchList InterpreterCore::Run(
     const std::vector<std::string>& feed_names) {
+#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
+  if (platform::is_gpu_place(place_)) {
+    platform::SetDeviceId(place_.device);
+  }
+#endif
 #ifdef PADDLE_WITH_MKLDNN
   platform::AttachPointerHashToMKLDNNKey(this, place_);
 #endif
diff --git a/paddle/fluid/platform/event.h b/paddle/fluid/platform/event.h
index f6c7bb04353654caab8e491bcbd3794c7ebddc73..f1d7bad90f232dbbbbb1d327df8747777007d096 100644
--- a/paddle/fluid/platform/event.h
+++ b/paddle/fluid/platform/event.h
@@ -141,6 +141,7 @@ class CudaEvent {
 #else
     cudaEventCreateWithFlags(&event_, flags_);
 #endif
+    VLOG(4) << "CudaEvent " << event_;
   }
 
   explicit CudaEvent(unsigned int flags) : flags_(flags) {
@@ -149,6 +150,7 @@ class CudaEvent {
 #else
     cudaEventCreateWithFlags(&event_, flags_);
 #endif
+    VLOG(4) << "CudaEvent " << event_;
   }
 
   ~CudaEvent() {
diff --git a/paddle/phi/backends/gpu/cuda/cuda_info.cc b/paddle/phi/backends/gpu/cuda/cuda_info.cc
index 7be21e85f0005b9bfe7849ac6f12561cf108c7e3..4b5de3db54d1980287246bd252a627c7eb2f9840 100644
--- a/paddle/phi/backends/gpu/cuda/cuda_info.cc
+++ b/paddle/phi/backends/gpu/cuda/cuda_info.cc
@@ -241,6 +241,7 @@ void SetDeviceId(int id) {
                                    id,
                                    GetGPUDeviceCount()));
   PADDLE_RETRY_CUDA_SUCCESS(cudaSetDevice(id));
+  VLOG(4) << "SetDeviceId " << id;
 }
 
 void GpuMemcpyAsync(void *dst,