[new-exec] set cuda device before run (#44985)

* set cuda device before run * add header file * fix compile

[new-exec] set cuda device before run (#44985)
* set cuda device before run * add header file * fix compile
68b06ba6 · Leo Chen · GitHub · 9c98ee3e · 68b06ba6 · 68b06ba6
3 changed file
--- a/paddle/fluid/framework/new_executor/interpretercore.cc
+++ b/paddle/fluid/framework/new_executor/interpretercore.cc
@@ -28,6 +28,7 @@
 #ifdef PADDLE_WITH_MKLDNN
 #include "paddle/fluid/platform/mkldnn_helper.h"
 #endif
+#include "paddle/fluid/platform/device/gpu/gpu_info.h"

 PADDLE_DEFINE_EXPORTED_bool(new_executor_use_inplace,
                            true,
@@ -98,6 +99,11 @@ InterpreterCore::~InterpreterCore() {
 interpreter::CostInfo InterpreterCore::DryRun(
    const std::vector<std::string>& feed_names,
    const std::vector<framework::LoDTensor>& feed_tensors) {
+#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
+  if (platform::is_gpu_place(place_)) {
+    platform::SetDeviceId(place_.device);
+  }
+#endif
  Prepare(feed_names, feed_tensors, true);
  interpreter::CostInfo cost_info;
  {
@@ -122,6 +128,11 @@ interpreter::CostInfo InterpreterCore::DryRun(
 paddle::framework::FetchList InterpreterCore::Run(
    const std::vector<std::string>& feed_names,
    const std::vector<framework::LoDTensor>& feed_tensors) {
+#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
+  if (platform::is_gpu_place(place_)) {
+    platform::SetDeviceId(place_.device);
+  }
+#endif
 #ifdef PADDLE_WITH_MKLDNN
  platform::AttachPointerHashToMKLDNNKey(this, place_);
 #endif
@@ -153,6 +164,11 @@ paddle::framework::FetchList InterpreterCore::Run(

 paddle::framework::FetchList InterpreterCore::Run(
    const std::vector<std::string>& feed_names) {
+#if defined(PADDLE_WITH_CUDA) && defined(PADDLE_WITH_HETERPS)
+  if (platform::is_gpu_place(place_)) {
+    platform::SetDeviceId(place_.device);
+  }
+#endif
 #ifdef PADDLE_WITH_MKLDNN
  platform::AttachPointerHashToMKLDNNKey(this, place_);
 #endif

--- a/paddle/fluid/platform/event.h
+++ b/paddle/fluid/platform/event.h
@@ -141,6 +141,7 @@ class CudaEvent {
 #else
    cudaEventCreateWithFlags(&event_, flags_);
 #endif
+    VLOG(4) << "CudaEvent " << event_;
  }

  explicit CudaEvent(unsigned int flags) : flags_(flags) {
@@ -149,6 +150,7 @@ class CudaEvent {
 #else
    cudaEventCreateWithFlags(&event_, flags_);
 #endif
+    VLOG(4) << "CudaEvent " << event_;
  }

  ~CudaEvent() {

--- a/paddle/phi/backends/gpu/cuda/cuda_info.cc
+++ b/paddle/phi/backends/gpu/cuda/cuda_info.cc
@@ -241,6 +241,7 @@ void SetDeviceId(int id) {
                                   id,
                                   GetGPUDeviceCount()));
  PADDLE_RETRY_CUDA_SUCCESS(cudaSetDevice(id));
+  VLOG(4) << "SetDeviceId " << id;
 }

 void GpuMemcpyAsync(void *dst,