diff --git a/paddle/fluid/memory/allocation/legacy_allocator.cc b/paddle/fluid/memory/allocation/legacy_allocator.cc
index 514ac7883ad2effdf3518be8afe3f448a5ac10b2..2ecb44ff15fec23e9b2a0045a959a2f6ed8a0a8c 100644
--- a/paddle/fluid/memory/allocation/legacy_allocator.cc
+++ b/paddle/fluid/memory/allocation/legacy_allocator.cc
@@ -29,6 +29,9 @@
 #include "paddle/fluid/platform/profiler.h"
 #include "paddle/fluid/string/printf.h"
 #include "paddle/fluid/string/split.h"
+#ifdef PADDLE_WITH_CUDA
+#include "paddle/fluid/platform/cuda_device_guard.h"
+#endif
 
 DEFINE_bool(init_allocated_mem, false,
             "It is a mistake that the values of the memory allocated by "
@@ -142,7 +145,6 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
   std::call_once(init_flag, [gpu_id]() {
     devices = platform::GetSelectedDevices();
     int gpu_num = devices.size();
-
     allocation::GPUMemMonitor.Initialize(devices.size());
 
     a_arr = new BuddyAllocator *[gpu_num];
@@ -168,9 +170,9 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
                << ". Current 'FLAGS_reallocate_gpu_memory_in_mb' value is "
                << FLAGS_reallocate_gpu_memory_in_mb << "\n\n";
     }
+    platform::SetDeviceId(gpu_id);
   });
 
-  platform::SetDeviceId(gpu_id);
   auto pos = std::distance(devices.begin(),
                            std::find(devices.begin(), devices.end(), gpu_id));
   return a_arr[pos];
@@ -193,8 +195,7 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place,
   auto *buddy_allocator = GetGPUBuddyAllocator(place.device);
   auto *ptr = buddy_allocator->Alloc(size);
   if (ptr == nullptr) {
-    int cur_dev = platform::GetCurrentDeviceId();
-    platform::SetDeviceId(place.device);
+    platform::CUDADeviceGuard(place.device);
     size_t avail, total;
     platform::GpuMemoryUsage(&avail, &total);
     LOG(FATAL) << "Cannot allocate " << string::HumanReadableSize(size)
@@ -206,7 +207,6 @@ void *Alloc<platform::CUDAPlace>(const platform::CUDAPlace &place,
                << string::HumanReadableSize(buddy_allocator->GetMaxChunkSize())
                << "GPU memory used: "
                << string::HumanReadableSize(Used<platform::CUDAPlace>(place));
-    platform::SetDeviceId(cur_dev);
   } else {
     if (FLAGS_benchmark) {
       allocation::GPUMemMonitor.Add(place.device, size);
diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc
index 41d79c5beb1367907a401b572d3d0eaf3a8ac67b..b0f48c455caf4606a4af63b54b6510f33f68894d 100644
--- a/paddle/fluid/memory/detail/system_allocator.cc
+++ b/paddle/fluid/memory/detail/system_allocator.cc
@@ -29,6 +29,9 @@ limitations under the License. */
 #include "paddle/fluid/platform/cpu_info.h"
 #include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/gpu_info.h"
+#ifdef PADDLE_WITH_CUDA
+#include "paddle/fluid/platform/cuda_device_guard.h"
+#endif
 
 DECLARE_bool(use_pinned_memory);
 DECLARE_double(fraction_of_gpu_memory_to_use);
@@ -104,18 +107,11 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
   // CUDA documentation doesn't explain if cudaMalloc returns nullptr
   // if size is 0.  We just make sure it does.
   if (size <= 0) return nullptr;
-  void* p;
-  int prev_id;
-  cudaGetDevice(&prev_id);
-  if (prev_id != gpu_id_) {
-    cudaSetDevice(gpu_id_);
-  }
 
-  cudaError_t result = cudaMalloc(&p, size);
+  paddle::platform::CUDADeviceGuard guard(gpu_id_);
 
-  if (prev_id != gpu_id_) {
-    cudaSetDevice(prev_id);
-  }
+  void* p;
+  cudaError_t result = cudaMalloc(&p, size);
 
   if (result == cudaSuccess) {
     *index = 0;
@@ -140,7 +136,6 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) {
 
 void GPUAllocator::Free(void* p, size_t size, size_t index) {
   cudaError_t err;
-
   if (index == 0) {
     PADDLE_ASSERT(gpu_alloc_size_ >= size);
     gpu_alloc_size_ -= size;