diff --git a/paddle/fluid/framework/details/op_handle_base.cc b/paddle/fluid/framework/details/op_handle_base.cc
index 25b5eefc05cda306bb040dda6a9ad2fd478a23a0..fe21a62efd0874bba8a039e07ff90fb789daa731 100644
--- a/paddle/fluid/framework/details/op_handle_base.cc
+++ b/paddle/fluid/framework/details/op_handle_base.cc
@@ -110,9 +110,7 @@ void OpHandleBase::InitXPU() {
                           "%s should have only one dev_ctx.", Name()));
     auto &place = dev_ctxes_.begin()->first;
     int dev_id = BOOST_GET_CONST(platform::XPUPlace, place).device;
-    PADDLE_ENFORCE_EQ(
-        xpu_set_device(dev_id), XPU_SUCCESS,
-        platform::errors::PreconditionNotMet("xpu_set_device failed"));
+    platform::SetXPUDeviceId(dev_id);
     for (auto &out_var : outputs_) {
       auto *out_var_handle = dynamic_cast<VarHandle *>(out_var);
       if (out_var_handle) {
diff --git a/paddle/fluid/framework/heterxpu_trainer.cc b/paddle/fluid/framework/heterxpu_trainer.cc
index 93b7869cc1d25053b4473f16eb2b0d63d4a2f3d0..ebd737c2d5794a3ecc886aa7f10ab9c008441f4a 100644
--- a/paddle/fluid/framework/heterxpu_trainer.cc
+++ b/paddle/fluid/framework/heterxpu_trainer.cc
@@ -122,7 +122,8 @@ void HeterXpuTrainer::CreateThreadParam(const ProgramDesc& program, int num) {
 #endif
 
 #ifdef PADDLE_WITH_XPU
-  xpu_set_device(BOOST_GET_CONST(platform::XPUPlace, place).device);
+  auto dev_id = BOOST_GET_CONST(platform::XPUPlace, place).device;
+  platform::XPUDeviceGuard guard(dev_id);
 #endif
 
   auto& block = program.Block(0);
@@ -343,7 +344,8 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
 #endif
 #ifdef PADDLE_WITH_XPU
         auto place = thread_tensor->place();
-        xpu_set_device(BOOST_GET_CONST(platform::XPUPlace, place).device);
+        auto dev_id = BOOST_GET_CONST(platform::XPUPlace, place).device;
+        platform::XPUDeviceGuard guard(dev_id);
         platform::DeviceContextPool& pool =
             platform::DeviceContextPool::Instance();
         platform::DeviceContext* dev_ctx = pool.Get(place);
@@ -370,7 +372,8 @@ int HeterXpuTrainer::EndPass(const HeterRequest* request,
 #endif
 #ifdef PADDLE_WITH_XPU
       auto place = root_tensor->place();
-      xpu_set_device(BOOST_GET_CONST(platform::XPUPlace, place).device);
+      auto dev_id = BOOST_GET_CONST(platform::XPUPlace, place).device;
+      platform::XPUDeviceGuard guard(dev_id);
       platform::DeviceContextPool& pool =
           platform::DeviceContextPool::Instance();
       platform::DeviceContext* dev_ctx = pool.Get(place);
@@ -416,7 +419,7 @@ int HeterXpuTrainer::RunTask(const HeterRequest* request,
   std::shared_ptr<HeterServiceContext> context = object_pool_.Get();
 
   if (!context->scope_) {
-    int num = rand() % places_.size();
+    int num = rand_r() % places_.size();
     context->place_num_ = num;
     auto place = places_[num];
     context->scope_ = &(place_scopes_[num]->NewScope());
diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
index d2319dacdd33f8ddd9eb1df625c76cd4b7e59b96..8710bbe6ce98bfb0d6e2f141359ecafc6ee22689 100644
--- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
+++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc
@@ -29,15 +29,7 @@
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #include "paddle/fluid/platform/cuda_device_guard.h"
 #endif
-#ifdef PADDLE_WITH_XPU
-#include "paddle/fluid/platform/device/xpu/xpu_header.h"
-#endif
-#ifdef PADDLE_WITH_ASCEND_CL
-#include "paddle/fluid/platform/device/npu/npu_info.h"
-#endif
-#ifdef PADDLE_WITH_MLU
-#include "paddle/fluid/platform/device/mlu/mlu_info.h"
-#endif
+#include "paddle/fluid/platform/device/device_wrapper.h"
 
 PADDLE_DEFINE_EXPORTED_bool(
     init_allocated_mem, false,
@@ -153,24 +145,9 @@ void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) {
 #ifdef PADDLE_WITH_XPU
   VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
   void *p = nullptr;
-  int dev_id = -1;
-  int ret = xpu_current_device(&dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
-  if (dev_id >= 64) {
-    // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id
-    dev_id -= 64;
-  }
-  ret = xpu_set_device(place.device);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
-  ret = xpu_malloc(reinterpret_cast<void **>(&p), size);
+
+  platform::XPUDeviceGuard gurad(place.device);
+  int ret = xpu_malloc(reinterpret_cast<void **>(&p), size);
   if (ret != XPU_SUCCESS) {
     std::cout << "xpu memory malloc(" << size << ") failed, try again\n";
     xpu_wait();
@@ -184,12 +161,6 @@ void *Alloc<platform::XPUPlace>(const platform::XPUPlace &place, size_t size) {
     PADDLE_THROW(platform::errors::Unimplemented(
         "xpu memory FLAGS_init_allocated_mem is not implemented."));
   }
-  ret = xpu_set_device(dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
   VLOG(10) << "  pointer=" << p;
   return p;
 #else
@@ -205,30 +176,9 @@ void Free<platform::XPUPlace>(const platform::XPUPlace &place, void *p,
 #ifdef PADDLE_WITH_XPU
   VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
   VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
-  int dev_id = -1;
-  int ret = xpu_current_device(&dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
-  if (dev_id >= 64) {
-    // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id
-    dev_id -= 64;
-  }
-  ret = xpu_set_device(place.device);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
+
+  platform::XPUDeviceGuard gurad(place.device);
   xpu_free(p);
-  ret = xpu_set_device(dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
 #else
   PADDLE_THROW(
       platform::errors::PermissionDenied("'XPUPlace' is not supported."));
diff --git a/paddle/fluid/memory/detail/system_allocator_test.cc b/paddle/fluid/memory/detail/system_allocator_test.cc
index d818459fb03a0a0e442c35b67c744b3e124c2e83..dbf3fad6c3373b2a616bfad2cffb63e9ad038c9c 100644
--- a/paddle/fluid/memory/detail/system_allocator_test.cc
+++ b/paddle/fluid/memory/detail/system_allocator_test.cc
@@ -19,12 +19,7 @@ limitations under the License. */
 #include "gflags/gflags.h"
 #include "gtest/gtest.h"
 #include "paddle/fluid/memory/allocation/allocator.h"
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-#include "paddle/fluid/platform/device/gpu/gpu_info.h"
-#endif
-#ifdef PADDLE_WITH_MLU
-#include "paddle/fluid/platform/device/mlu/enforce.h"
-#endif
+#include "paddle/fluid/platform/device/device_wrapper.h"
 
 DECLARE_bool(use_pinned_memory);
 
diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc
index 2814f2f9501a8bef01526e9b9bc89e7d63fdca11..4a10922adbf757355e8f6879480be6720afe504b 100644
--- a/paddle/fluid/memory/memcpy.cc
+++ b/paddle/fluid/memory/memcpy.cc
@@ -14,18 +14,10 @@ limitations under the License. */
 
 #include "paddle/fluid/memory/memcpy.h"
 
+#include "paddle/fluid/platform/device/device_wrapper.h"
 #include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/profiler.h"
 
-#ifdef PADDLE_WITH_XPU
-#include "paddle/fluid/platform/device/xpu/xpu_header.h"
-#endif
-
-#ifdef PADDLE_WITH_MLU
-#include "paddle/fluid/platform/device/mlu/mlu_info.h"
-#endif
-
 namespace paddle {
 namespace memory {
 
@@ -74,41 +66,7 @@ void Copy<platform::XPUPlace, platform::CPUPlace>(platform::XPUPlace dst_place,
     VLOG(1) << "memcpy XPU_HOST_TO_DEVICE size <= 0 (" << num << ")";
     return;
   }
-  int dev_id = -1;
-  int ret = xpu_current_device(&dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
-  if (dev_id >= 64) {
-    // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id
-    dev_id -= 64;
-  }
-  if (dev_id != dst_place.device) {
-    ret = xpu_set_device(dst_place.device);
-    PADDLE_ENFORCE_EQ(
-        ret, XPU_SUCCESS,
-        platform::errors::External(
-            "XPU API return wrong value[%d], please check whether "
-            "Baidu Kunlun Card is properly installed.",
-            ret));
-  }
-  ret = xpu_memcpy(dst, src, num, XPUMemcpyKind::XPU_HOST_TO_DEVICE);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
-  if (dev_id != dst_place.device) {
-    ret = xpu_set_device(dev_id);
-    PADDLE_ENFORCE_EQ(
-        ret, XPU_SUCCESS,
-        platform::errors::External(
-            "XPU API return wrong value[%d], please check whether "
-            "Baidu Kunlun Card is properly installed.",
-            ret));
-  }
+  platform::MemcpySyncH2D(dst, src, num, dst_place.device);
 }
 
 template <>
@@ -120,46 +78,7 @@ void Copy<platform::CPUPlace, platform::XPUPlace>(platform::CPUPlace dst_place,
     VLOG(1) << "memcpy XPU_DEVICE_TO_HOST size <= 0 (" << num << ")";
     return;
   }
-  int dev_id = -1;
-  int ret = xpu_current_device(&dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
-  if (dev_id >= 64) {
-    // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id
-    dev_id -= 64;
-  }
-  if (dev_id != src_place.device) {
-    ret = xpu_set_device(src_place.device);
-    PADDLE_ENFORCE_EQ(
-        ret, XPU_SUCCESS,
-        platform::errors::External(
-            "XPU API return wrong value[%d], please check whether "
-            "Baidu Kunlun Card is properly installed.",
-            ret));
-  }
-
-  platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
-  auto* dev_ctx = pool.GetByPlace(src_place);
-  dev_ctx->Wait();
-
-  ret = xpu_memcpy(dst, src, num, XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
-  if (dev_id != src_place.device) {
-    ret = xpu_set_device(dev_id);
-    PADDLE_ENFORCE_EQ(
-        ret, XPU_SUCCESS,
-        platform::errors::External(
-            "XPU API return wrong value[%d], please check whether "
-            "Baidu Kunlun Card is properly installed.",
-            ret));
-  }
+  platform::MemcpySyncD2H(dst, src, num, src_place.device);
 }
 
 template <>
@@ -171,69 +90,7 @@ void Copy<platform::XPUPlace, platform::XPUPlace>(platform::XPUPlace dst_place,
     VLOG(1) << "memcpy XPU_DEVICE_TO_DEVICE size <= 0 (" << num << ")";
     return;
   }
-  int dev_id = -1;
-  int ret = xpu_current_device(&dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
-  if (dev_id >= 64) {
-    // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id
-    dev_id -= 64;
-  }
-  if (dev_id != src_place.device || dev_id != dst_place.device) {
-    ret = xpu_set_device(src_place.device);
-    PADDLE_ENFORCE_EQ(
-        ret, XPU_SUCCESS,
-        platform::errors::External(
-            "XPU API return wrong value[%d], please check whether "
-            "Baidu Kunlun Card is properly installed.",
-            ret));
-    void* tmp = malloc(num);
-
-    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
-    auto* dev_ctx = pool.GetByPlace(src_place);
-    dev_ctx->Wait();
-
-    ret = xpu_memcpy(tmp, src, num, XPUMemcpyKind::XPU_DEVICE_TO_HOST);
-    PADDLE_ENFORCE_EQ(
-        ret, XPU_SUCCESS,
-        platform::errors::External(
-            "XPU API return wrong value[%d], please check whether "
-            "Baidu Kunlun Card is properly installed.",
-            ret));
-    ret = xpu_set_device(dst_place.device);
-    PADDLE_ENFORCE_EQ(
-        ret, XPU_SUCCESS,
-        platform::errors::External(
-            "XPU API return wrong value[%d], please check whether "
-            "Baidu Kunlun Card is properly installed.",
-            ret));
-    ret = xpu_memcpy(dst, tmp, num, XPUMemcpyKind::XPU_HOST_TO_DEVICE);
-    PADDLE_ENFORCE_EQ(
-        ret, XPU_SUCCESS,
-        platform::errors::External(
-            "XPU API return wrong value[%d], please check whether "
-            "Baidu Kunlun Card is properly installed.",
-            ret));
-    ret = xpu_set_device(dev_id);
-    PADDLE_ENFORCE_EQ(
-        ret, XPU_SUCCESS,
-        platform::errors::External(
-            "XPU API return wrong value[%d], please check whether "
-            "Baidu Kunlun Card is properly installed.",
-            ret));
-    free(tmp);
-  } else {
-    platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
-    auto* dev_ctx = pool.GetByPlace(src_place);
-    int ret = xpu::copy(dev_ctx->x_context(), static_cast<const int8_t*>(src),
-                        static_cast<int8_t*>(dst), num);
-    PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, platform::errors::External(
-                                            "XPU API return wrong value[%d %s]",
-                                            ret, XPUAPIErrorMsg[ret]));
-  }
+  platform::MemcpySyncD2D(dst, dst_place.device, src, src_place.device, num);
 }
 #endif
 
diff --git a/paddle/fluid/operators/masked_select_op_xpu.cc b/paddle/fluid/operators/masked_select_op_xpu.cc
index d86ad8f89b9fd96dc47af5b5bbde295cfe78690d..aafc2510a8c44e7bd73261c7ef44227325a8e6b2 100644
--- a/paddle/fluid/operators/masked_select_op_xpu.cc
+++ b/paddle/fluid/operators/masked_select_op_xpu.cc
@@ -12,6 +12,7 @@ limitations under the License. */
 #ifdef PADDLE_WITH_XPU
 
 #include "paddle/fluid/operators/masked_select_op.h"
+#include "paddle/fluid/platform/device/device_wrapper.h"
 
 namespace paddle {
 namespace operators {
@@ -41,13 +42,8 @@ class MaskedSelectXPUKernel : public framework::OpKernel<T> {
     int* out_size = RAII_GUARD.alloc_l3_or_gm<int32_t>(1);
     int out_size_cpu;
 
-    int ret = xpu::nonzero_count(dev_ctx.x_context(), mask_data, out_size,
-                                 mask->numel());
-    PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                      platform::errors::External(
-                          "XPU nonzero_count kernel return wrong value[%d %s]",
-                          ret, XPUAPIErrorMsg[ret]));
-
+    PADDLE_ENFORCE_XPU_SUCCESS(xpu::nonzero_count(
+        dev_ctx.x_context(), mask_data, out_size, mask->numel()));
     memory::Copy(platform::CPUPlace(), static_cast<void*>(&out_size_cpu),
                  BOOST_GET_CONST(platform::XPUPlace, mask->place()),
                  static_cast<void*>(out_size), sizeof(int32_t));
@@ -59,12 +55,9 @@ class MaskedSelectXPUKernel : public framework::OpKernel<T> {
     auto input_shape = framework::vectorize<int>(input_dim);
     auto mask_shape = framework::vectorize<int>(mask_dim);
 
-    ret = xpu::masked_select(dev_ctx.x_context(), input_data, mask_data,
-                             out_data, input_shape, mask_shape);
-    PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                      platform::errors::External(
-                          "XPU masked_select kernel return wrong value[%d %s]",
-                          ret, XPUAPIErrorMsg[ret]));
+    PADDLE_ENFORCE_XPU_SUCCESS(
+        xpu::masked_select(dev_ctx.x_context(), input_data, mask_data, out_data,
+                           input_shape, mask_shape));
   }
 };
 
diff --git a/paddle/fluid/platform/collective_helper.cc b/paddle/fluid/platform/collective_helper.cc
index 25f8f3ed9f3d8eba7f746c5b38d793d7cc2b0d39..7d2ea57545d084c6ba795db60b0fe7b82c3c6a9f 100644
--- a/paddle/fluid/platform/collective_helper.cc
+++ b/paddle/fluid/platform/collective_helper.cc
@@ -15,6 +15,7 @@
 #include "paddle/fluid/platform/collective_helper.h"
 #include <utility>
 
+#include "paddle/fluid/platform/device/device_wrapper.h"
 #include "paddle/fluid/platform/device/gpu/gpu_resource_pool.h"
 
 namespace paddle {
@@ -292,17 +293,8 @@ BKCLComm* BKCLCommContext::CreateComm(BKCLUniqueId* bkcl_id, int nranks,
           "Expected dev_id >= 0. But received dev_id is %d.", dev_id));
 
   BKCLContext_t comm = nullptr;
-  auto ret = xpu_set_device(dev_id);
-  PADDLE_ENFORCE_EQ(
-      ret, XPU_SUCCESS,
-      platform::errors::PreconditionNotMet(
-          "XPU API return wrong value[%d %s], please check whether "
-          "Baidu Kunlun Card is properly installed.",
-          ret, XPUAPIErrorMsg[ret]));
-  ret = bkcl_init_rank(&comm, rank, nranks, bkcl_id);
-  PADDLE_ENFORCE_EQ(ret, BKCL_SUCCESS,
-                    platform::errors::PreconditionNotMet(
-                        "bkcl_init_rank failed, got wrong value [%d].", ret));
+  platform::SetXPUDeviceId(dev_id);
+  PADDLE_ENFORCE_XPU_SUCCESS(bkcl_init_rank(&comm, rank, nranks, bkcl_id));
 
   auto* comm_wrapper = AssignBKCLComm(comm, nranks, rank, dev_id, ring_id);
 
diff --git a/paddle/fluid/platform/device/device_wrapper.h b/paddle/fluid/platform/device/device_wrapper.h
new file mode 100644
index 0000000000000000000000000000000000000000..43408ca207d1d2c10ba29b32b487e8a7ea99917f
--- /dev/null
+++ b/paddle/fluid/platform/device/device_wrapper.h
@@ -0,0 +1,36 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+/**************************** Enforce Wrapper **************************/
+
+#pragma once
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#include "paddle/fluid/platform/device/gpu/gpu_info.h"
+#endif
+
+#ifdef PADDLE_WITH_XPU
+#include "paddle/fluid/platform/device/xpu/enforce_xpu.h"
+#include "paddle/fluid/platform/device/xpu/xpu_info.h"
+#endif
+
+#ifdef PADDLE_WITH_ASCEND_CL
+#include "paddle/fluid/platform/device/npu/enforce_npu.h"
+#include "paddle/fluid/platform/device/npu/npu_info.h"
+#endif
+
+#ifdef PADDLE_WITH_MLU
+#include "paddle/fluid/platform/device/mlu/enforce.h"
+#include "paddle/fluid/platform/device/mlu/mlu_info.h"
+#endif
diff --git a/paddle/fluid/platform/device/xpu/CMakeLists.txt b/paddle/fluid/platform/device/xpu/CMakeLists.txt
index 17f492f93e534606bc4cf3f412d9badf0d6c45a7..b1fc9a0cedd0bbcebf96313e7a0643216c187446 100644
--- a/paddle/fluid/platform/device/xpu/CMakeLists.txt
+++ b/paddle/fluid/platform/device/xpu/CMakeLists.txt
@@ -6,3 +6,5 @@ set(XPU_CTX_DEPS xpulib ssl crypto rt z resolv dl)
 
 cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib)
 cc_library(xpu_op_list SRCS xpu_op_list.cc DEPS gflags glog enforce xpulib device_context)
+
+add_subdirectory(tests)
diff --git a/paddle/fluid/platform/device/xpu/bkcl_helper.h b/paddle/fluid/platform/device/xpu/bkcl_helper.h
index cccee157194881b12f2baaa1249aed69a8e1f20d..d9ffbfe011f9127f13639bd5b8c497fc00438a4f 100644
--- a/paddle/fluid/platform/device/xpu/bkcl_helper.h
+++ b/paddle/fluid/platform/device/xpu/bkcl_helper.h
@@ -26,8 +26,8 @@
 
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/platform/device/xpu/enforce_xpu.h"
 #include "paddle/fluid/platform/device_context.h"
-#include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/platform/float16.h"
 #include "paddle/fluid/platform/place.h"
 #include "xpu/bkcl.h"
@@ -73,13 +73,9 @@ struct InitBKCLPara {
 
 static void *init_bkcl_context_func(void *args) {
   struct InitBKCLPara *para = (struct InitBKCLPara *)args;
-  PADDLE_ENFORCE_EQ(xpu_set_device(para->dev_id), XPU_SUCCESS,
-                    platform::errors::PreconditionNotMet(
-                        "xpu_set_device failed[%d]", para->dev_id));
-  PADDLE_ENFORCE_EQ(
-      bkcl_init_rank(para->ctx, para->rank, para->nranks, para->bkcl_id),
-      BKCL_SUCCESS,
-      platform::errors::PreconditionNotMet("bkcl_init_rank failed"));
+  platform::SetXPUDeviceId(para->dev_id);
+  PADDLE_ENFORCE_XPU_SUCCESS(
+      bkcl_init_rank(para->ctx, para->rank, para->nranks, para->bkcl_id));
   return nullptr;
 }
 
diff --git a/paddle/fluid/platform/device/xpu/enforce_xpu.h b/paddle/fluid/platform/device/xpu/enforce_xpu.h
new file mode 100644
index 0000000000000000000000000000000000000000..839f14067782dd43bc922920beec80560728dafd
--- /dev/null
+++ b/paddle/fluid/platform/device/xpu/enforce_xpu.h
@@ -0,0 +1,160 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/fluid/platform/device/xpu/xpu_header.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "xpu/bkcl.h"
+
+namespace paddle {
+namespace platform {
+
+// Note: XPU runtime api return int, not XPUError_t
+inline const char* xpuGetErrorString(int stat) {
+  switch (stat) {
+    case XPU_SUCCESS:
+      return "Success";
+    case XPUERR_INVALID_DEVICE:
+      return "Invalid XPU device";
+    case XPUERR_UNINIT:
+      return "XPU runtime not properly inited";
+    case XPUERR_NOMEM:
+      return "Device memory not enough";
+    case XPUERR_NOCPUMEM:
+      return "CPU memory not enough";
+    case XPUERR_INVALID_PARAM:
+      return "Invalid parameter";
+    case XPUERR_NOXPUFUNC:
+      return "Cannot get XPU Func";
+    case XPUERR_LDSO:
+      return "Error loading dynamic library";
+    case XPUERR_LDSYM:
+      return "Error loading func from dynamic library";
+    case XPUERR_SIMULATOR:
+      return "Error from XPU Simulator";
+    case XPUERR_NOSUPPORT:
+      return "Operation not supported";
+    case XPUERR_ABNORMAL:
+      return "Device abnormal due to previous error";
+    case XPUERR_KEXCEPTION:
+      return "Exception in kernel execution";
+    case XPUERR_TIMEOUT:
+      return "Kernel execution timed out";
+    case XPUERR_BUSY:
+      return "Resource busy";
+    case XPUERR_USEAFCLOSE:
+      return "Use a stream after closed";
+    case XPUERR_UCECC:
+      return "Uncorrectable ECC";
+    case XPUERR_OVERHEAT:
+      return "Overheat";
+    case XPUERR_UNEXPECT:
+      return "Execution error, reach unexpected control flow";
+    case XPUERR_DEVRESET:
+      return "Device is being reset, try again later";
+    case XPUERR_HWEXCEPTION:
+      return "Hardware module exception";
+    case XPUERR_HBM_INIT:
+      return "Error init HBM";
+    case XPUERR_DEVINIT:
+      return "Error init device";
+    case XPUERR_PEERRESET:
+      return "Device is being reset, try again later";
+    case XPUERR_MAXDEV:
+      return "Device count exceed limit";
+    case XPUERR_NOIOC:
+      return "Unknown IOCTL command";
+    case XPUERR_DMATIMEOUT:
+      return "DMA timed out, a reboot maybe needed";
+    case XPUERR_DMAABORT:
+      return "DMA aborted due to error, possibly wrong address or hardware "
+             "state";
+    case XPUERR_MCUUNINIT:
+      return "Firmware not initialized";
+    case XPUERR_OLDFW:
+      return "Firmware version too old (<15), please update.";
+    case XPUERR_PCIE:
+      return "Error in PCIE";
+    case XPUERR_FAULT:
+      return "Error copy between kernel and user space";
+    case XPUERR_INTERRUPTED:
+      return "Execution interrupted by user";
+    default:
+      return "unkonwn error";
+  }
+}
+
+inline const char* bkclGetErrorString(BKCLResult_t stat) {
+  switch (stat) {
+    case BKCL_SUCCESS:
+      return "BKCL_SUCCESS";
+    case BKCL_INVALID_ARGUMENT:
+      return "BKCL_INVALID_ARGUMENT";
+    case BKCL_RUNTIME_ERROR:
+      return "BKCL_RUNTIME_ERROR";
+    case BKCL_SYSTEM_ERROR:
+      return "BKCL_SYSTEM_ERROR";
+    case BKCL_INTERNAL_ERROR:
+      return "BKCL_INTERNAL_ERROR";
+    default:
+      return "Unknown BKCL status";
+  }
+}
+
+inline std::string build_xpu_error_msg(int stat) {
+  std::string msg("XPU Error <" + std::to_string(stat) + ">, ");
+  return msg + xpuGetErrorString(stat) + " ";
+}
+
+inline std::string build_xpu_error_msg(BKCLResult_t stat) {
+  std::string msg("BKCL Error, ");
+  return msg + bkclGetErrorString(stat) + " ";
+}
+
+namespace details {
+
+template <typename T>
+struct ExternalApiType {};
+
+#define DEFINE_EXTERNAL_API_TYPE(type, success_value) \
+  template <>                                         \
+  struct ExternalApiType<type> {                      \
+    using Type = type;                                \
+    static constexpr Type kSuccess = success_value;   \
+  }
+
+DEFINE_EXTERNAL_API_TYPE(int, XPU_SUCCESS);
+DEFINE_EXTERNAL_API_TYPE(BKCLResult_t, BKCL_SUCCESS);
+
+#undef DEFINE_EXTERNAL_API_TYPE
+
+}  // namespace details
+
+#define PADDLE_ENFORCE_XPU_SUCCESS(COND)                      \
+  do {                                                        \
+    auto __cond__ = (COND);                                   \
+    using __XPU_STATUS_TYPE__ = decltype(__cond__);           \
+    constexpr auto __success_type__ =                         \
+        ::paddle::platform::details::ExternalApiType<         \
+            __XPU_STATUS_TYPE__>::kSuccess;                   \
+    if (UNLIKELY(__cond__ != __success_type__)) {             \
+      auto __summary__ = paddle::platform::errors::External(  \
+          ::paddle::platform::build_xpu_error_msg(__cond__)); \
+      __THROW_ERROR_INTERNAL__(__summary__);                  \
+    }                                                         \
+  } while (0)
+
+}  // namespace platform
+}  // namespace paddle
diff --git a/paddle/fluid/platform/device/xpu/tests/CMakeLists.txt b/paddle/fluid/platform/device/xpu/tests/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..6d98fefcf831716b10f5fcbc0141d1656898d2e8
--- /dev/null
+++ b/paddle/fluid/platform/device/xpu/tests/CMakeLists.txt
@@ -0,0 +1 @@
+cc_test(enforce_xpu_test SRCS enforce_xpu_test.cc DEPS stringpiece)
diff --git a/paddle/fluid/platform/device/xpu/tests/enforce_xpu_test.cc b/paddle/fluid/platform/device/xpu/tests/enforce_xpu_test.cc
new file mode 100644
index 0000000000000000000000000000000000000000..730bcdb37fd7b7018583793648fdce8be2eb07db
--- /dev/null
+++ b/paddle/fluid/platform/device/xpu/tests/enforce_xpu_test.cc
@@ -0,0 +1,116 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/platform/device/xpu/enforce_xpu.h"
+#include "gtest/gtest.h"
+
+template <typename T>
+bool CheckXPUStatusSuccess(T value, const std::string& msg = "success") {
+  PADDLE_ENFORCE_XPU_SUCCESS(value);
+  return true;
+}
+
+template <typename T>
+bool CheckXPUStatusFailure(T value, const std::string& msg) {
+  try {
+    PADDLE_ENFORCE_XPU_SUCCESS(value);
+    return false;
+  } catch (paddle::platform::EnforceNotMet& error) {
+    std::string ex_msg = error.what();
+    std::cout << ex_msg << std::endl;
+    return ex_msg.find(msg) != std::string::npos;
+  }
+}
+
+TEST(enforce, xpu_status) {
+  EXPECT_TRUE(CheckXPUStatusSuccess(static_cast<int>(XPU_SUCCESS)));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_INVALID_DEVICE),
+                                    "Invalid XPU device"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_UNINIT),
+                                    "XPU runtime not properly inited"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_NOMEM),
+                                    "Device memory not enough"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_NOCPUMEM),
+                                    "CPU memory not enough"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_INVALID_PARAM),
+                                    "Invalid parameter"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_NOXPUFUNC),
+                                    "Cannot get XPU Func"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_LDSO),
+                                    "Error loading dynamic library"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_LDSYM),
+                                    "Error loading func from dynamic library"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_SIMULATOR),
+                                    "Error from XPU Simulator"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_NOSUPPORT),
+                                    "Operation not supported"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_ABNORMAL),
+                                    "Device abnormal due to previous error"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_KEXCEPTION),
+                                    "Exception in kernel execution"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_TIMEOUT),
+                                    "Kernel execution timed out"));
+  EXPECT_TRUE(
+      CheckXPUStatusFailure(static_cast<int>(XPUERR_BUSY), "Resource busy"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_USEAFCLOSE),
+                                    "Use a stream after closed"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_UCECC),
+                                    "Uncorrectable ECC"));
+  EXPECT_TRUE(
+      CheckXPUStatusFailure(static_cast<int>(XPUERR_OVERHEAT), "Overheat"));
+  EXPECT_TRUE(
+      CheckXPUStatusFailure(static_cast<int>(XPUERR_UNEXPECT),
+                            "Execution error, reach unexpected control flow"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_DEVRESET),
+                                    "Device is being reset, try again later"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_HWEXCEPTION),
+                                    "Hardware module exception"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_HBM_INIT),
+                                    "Error init HBM"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_DEVINIT),
+                                    "Error init device"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_PEERRESET),
+                                    "Device is being reset, try again later"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_MAXDEV),
+                                    "Device count exceed limit"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_NOIOC),
+                                    "Unknown IOCTL command"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_DMATIMEOUT),
+                                    "DMA timed out, a reboot maybe needed"));
+  EXPECT_TRUE(CheckXPUStatusFailure(
+      static_cast<int>(XPUERR_DMAABORT),
+      "DMA aborted due to error, possibly wrong address or hardware state"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_MCUUNINIT),
+                                    "Firmware not initialized"));
+  EXPECT_TRUE(
+      CheckXPUStatusFailure(static_cast<int>(XPUERR_OLDFW),
+                            "Firmware version too old (<15), please update."));
+  EXPECT_TRUE(
+      CheckXPUStatusFailure(static_cast<int>(XPUERR_PCIE), "Error in PCIE"));
+  EXPECT_TRUE(
+      CheckXPUStatusFailure(static_cast<int>(XPUERR_FAULT),
+                            "Error copy between kernel and user space"));
+  EXPECT_TRUE(CheckXPUStatusFailure(static_cast<int>(XPUERR_INTERRUPTED),
+                                    "Execution interrupted by user"));
+}
+
+TEST(enforce, bkcl_status) {
+  EXPECT_TRUE(CheckXPUStatusSuccess(BKCL_SUCCESS));
+  EXPECT_TRUE(
+      CheckXPUStatusFailure(BKCL_INVALID_ARGUMENT, "BKCL_INVALID_ARGUMENT"));
+  EXPECT_TRUE(CheckXPUStatusFailure(BKCL_RUNTIME_ERROR, "BKCL_RUNTIME_ERROR"));
+  EXPECT_TRUE(CheckXPUStatusFailure(BKCL_SYSTEM_ERROR, "BKCL_SYSTEM_ERROR"));
+  EXPECT_TRUE(
+      CheckXPUStatusFailure(BKCL_INTERNAL_ERROR, "BKCL_INTERNAL_ERROR"));
+}
diff --git a/paddle/fluid/platform/device/xpu/xpu_header.h b/paddle/fluid/platform/device/xpu/xpu_header.h
index fe75290c252dfdf2347a7d261ece204198d110c7..1177fd63742b3b4f104c6943a3e59022677f26d9 100644
--- a/paddle/fluid/platform/device/xpu/xpu_header.h
+++ b/paddle/fluid/platform/device/xpu/xpu_header.h
@@ -1,16 +1,16 @@
-// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
 
 #pragma once
 
@@ -21,37 +21,14 @@
 
 #include "paddle/fluid/platform/bfloat16.h"
 #include "paddle/fluid/platform/enforce.h"
-#include "paddle/fluid/platform/errors.h"
 #include "paddle/fluid/platform/float16.h"
+
 #include "xpu/runtime.h"
 #include "xpu/runtime_ex.h"
 #include "xpu/xdnn.h"
 
 namespace xpu = baidu::xpu::api;
 
-class XPUActHelper {
- public:
-  // Convert string to activation type in xpu
-  static xpu::Activation_t ConvertToXpuActType(
-      const std::string& act_type_str) {
-    static std::unordered_map<std::string, xpu::Activation_t> str2act = {
-        {"linear", xpu::Activation_t::LINEAR},
-        {"relu", xpu::Activation_t::RELU},
-        {"sigmoid", xpu::Activation_t::SIGMOID},
-        {"tanh", xpu::Activation_t::TANH},
-        {"gelu", xpu::Activation_t::GELU},
-        {"leaky_relu", xpu::Activation_t::LEAKY_RELU},
-        {"sqrt", xpu::Activation_t::SQRT},
-        {"square", xpu::Activation_t::SQUARE}};
-
-    auto res = str2act.find(act_type_str);
-    PADDLE_ENFORCE_NE(res, str2act.end(),
-                      paddle::platform::errors::InvalidArgument(
-                          "Invalid activation type(%s) in XPU", act_type_str));
-    return res->second;
-  }
-};
-
 static std::map<int, std::string> XPUAPIErrorMsg = {
     {xpu::Error_t::SUCCESS, "xpu api success"},
     {xpu::Error_t::INVALID_PARAM, "xpu api invalid param"},
diff --git a/paddle/fluid/platform/device/xpu/xpu_info.cc b/paddle/fluid/platform/device/xpu/xpu_info.cc
index adc8bcc22da98b7f119588f36cb12af85638779d..483b1c5ce2795f13e5233d0e0aed2a6e0d198a21 100644
--- a/paddle/fluid/platform/device/xpu/xpu_info.cc
+++ b/paddle/fluid/platform/device/xpu/xpu_info.cc
@@ -14,8 +14,8 @@ limitations under the License. */
 #include <cstdlib>
 #include <string>
 #include "gflags/gflags.h"
+#include "paddle/fluid/platform/device/xpu/enforce_xpu.h"
 #include "paddle/fluid/platform/device/xpu/xpu_header.h"
-#include "paddle/fluid/platform/enforce.h"
 #include "paddle/fluid/string/split.h"
 
 PADDLE_DEFINE_EXPORTED_string(
@@ -31,7 +31,31 @@ PADDLE_DEFINE_EXPORTED_string(
 namespace paddle {
 namespace platform {
 
-static int GetXPUDeviceCountImpl() {
+/**************************** Version Management **************************/
+
+//! Get the version of XPU Driver
+int GetDriverVersion() {
+  uint32_t driver_version_major = 0;
+  uint32_t driver_version_minor = 0;
+  PADDLE_ENFORCE_XPU_SUCCESS(
+      xpu_get_driver_version(&driver_version_major, &driver_version_minor));
+  int driver_version = driver_version_major * 10 + driver_version_minor;
+  return driver_version;
+}
+
+//! Get the version of XPU Runtime
+int GetRuntimeVersion() {
+  uint32_t rumtime_version_major = 0;
+  uint32_t rumtime_version_minor = 0;
+  PADDLE_ENFORCE_XPU_SUCCESS(
+      xpu_get_runtime_version(&rumtime_version_major, &rumtime_version_minor));
+  int runtime_version = rumtime_version_major * 10 + rumtime_version_minor;
+  return runtime_version;
+}
+
+/**************************** Device Management **************************/
+
+static int GetDeviceCountImpl() {
   const auto *xpu_visible_devices = std::getenv("XPU_VISIBLE_DEVICES");
   if (xpu_visible_devices != nullptr) {
     std::string xpu_visible_devices_str(xpu_visible_devices);
@@ -44,29 +68,18 @@ static int GetXPUDeviceCountImpl() {
   }
 
   int count = 0;
-  int ret = xpu_device_count(&count);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
+  PADDLE_ENFORCE_XPU_SUCCESS(xpu_device_count(&count));
   return count;
 }
 
 int GetXPUDeviceCount() {
-  static auto dev_cnt = GetXPUDeviceCountImpl();
+  static auto dev_cnt = GetDeviceCountImpl();
   return dev_cnt;
 }
 
 int GetXPUCurrentDeviceId() {
   int dev_id;
-  int ret = xpu_current_device(&dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
-
+  PADDLE_ENFORCE_XPU_SUCCESS(xpu_current_device(&dev_id));
   if (dev_id >= 64) {
     // if dev_id >= 64, the device is a simulator device, -64 to get real dev_id
     dev_id -= 64;
@@ -74,6 +87,13 @@ int GetXPUCurrentDeviceId() {
   return dev_id;
 }
 
+void SetXPUDeviceId(int id) {
+  PADDLE_ENFORCE_LT(
+      id, GetXPUDeviceCount(),
+      platform::errors::InvalidArgument("id must less than XPU count"));
+  PADDLE_ENFORCE_XPU_SUCCESS(xpu_set_device(id));
+}
+
 //! Get a list of device ids from environment variable or use all.
 std::vector<int> GetXPUSelectedDevices() {
   // use user specified XPUs in single-node multi-process mode.
@@ -92,24 +112,38 @@ std::vector<int> GetXPUSelectedDevices() {
   return devices;
 }
 
-void SetXPUDeviceId(int id) {
-  PADDLE_ENFORCE_LT(
-      id, GetXPUDeviceCount(),
-      platform::errors::InvalidArgument("id must less than XPU count"));
-  int ret = xpu_set_device(id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
+/**************************** Memory Management **************************/
+
+void MemcpySyncH2D(void *dst, const void *src, size_t count, int dev_id) {
+  platform::XPUDeviceGuard guard(dev_id);
+  PADDLE_ENFORCE_XPU_SUCCESS(
+      xpu_memcpy(dst, src, count, XPUMemcpyKind::XPU_HOST_TO_DEVICE));
+}
+
+void MemcpySyncD2H(void *dst, const void *src, size_t count, int dev_id) {
+  platform::XPUDeviceGuard guard(dev_id);
+  PADDLE_ENFORCE_XPU_SUCCESS(
+      xpu_memcpy(dst, src, count, XPUMemcpyKind::XPU_DEVICE_TO_HOST));
+}
+
+void MemcpySyncD2D(void *dst, int dst_id, const void *src, int src_id,
+                   size_t count) {
+  int dev_id = GetXPUCurrentDeviceId();
+  if (dst_id == dev_id && src_id == dev_id) {
+    platform::XPUDeviceGuard guard(dev_id);
+    PADDLE_ENFORCE_XPU_SUCCESS(
+        xpu_memcpy(dst, src, count, XPUMemcpyKind::XPU_DEVICE_TO_DEVICE));
+  } else {
+    PADDLE_ENFORCE_XPU_SUCCESS(
+        xpu_memcpy_peer(dst_id, dst, src_id, src, count));
+  }
 }
 
+/**************************** Others **************************/
+
 XPUVersion get_xpu_version(int dev_id) {
   uint64_t v = 0;
-  int ret = xpu_device_get_attr(&v, XPUATTR_MODEL, dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "xpu_device_get_attr return wrong value[%d]", ret));
+  PADDLE_ENFORCE_XPU_SUCCESS(xpu_device_get_attr(&v, XPUATTR_MODEL, dev_id));
 
   if (v == K100 || v == K200) {
     VLOG(1) << "KUNLUN device " << dev_id << " is XPU1\n";
diff --git a/paddle/fluid/platform/device/xpu/xpu_info.h b/paddle/fluid/platform/device/xpu/xpu_info.h
index 3cb79d51eb7bb6857f6f3c6ccb6203b4231c8307..82672e61e51f4f52e6a7ea04c7b7f310404ced68 100644
--- a/paddle/fluid/platform/device/xpu/xpu_info.h
+++ b/paddle/fluid/platform/device/xpu/xpu_info.h
@@ -16,17 +16,35 @@ limitations under the License. */
 namespace paddle {
 namespace platform {
 
+/***** Version Management *****/
+
+//! Get the version of XPU Driver
+int GetDriverVersion();
+
+//! Get the version of XPU Runtime
+int GetRuntimeVersion();
+
+/***** Device Management *****/
+
 //! Get the total number of XPU devices in system.
 int GetXPUDeviceCount();
 
+//! Set the XPU device id for next execution.
+void SetXPUDeviceId(int device_id);
+
 //! Get the current XPU device id in system.
 int GetXPUCurrentDeviceId();
 
 //! Get a list of device ids from environment variable or use all.
 std::vector<int> GetXPUSelectedDevices();
 
-//! Set the XPU device id for next execution.
-void SetXPUDeviceId(int device_id);
+/***** Memory Management *****/
+
+//! Copy memory from address src to dst synchronously.
+void MemcpySyncH2D(void *dst, const void *src, size_t count, int dev_id);
+void MemcpySyncD2H(void *dst, const void *src, size_t count, int dev_id);
+void MemcpySyncD2D(void *dst, int dst_id, const void *src, int src_id,
+                   size_t count);
 
 class XPUDeviceGuard {
  public:
@@ -44,8 +62,8 @@ class XPUDeviceGuard {
     }
   }
 
-  XPUDeviceGuard(const XPUDeviceGuard& o) = delete;
-  XPUDeviceGuard& operator=(const XPUDeviceGuard& o) = delete;
+  XPUDeviceGuard(const XPUDeviceGuard &o) = delete;
+  XPUDeviceGuard &operator=(const XPUDeviceGuard &o) = delete;
 
  private:
   int prev_id_{-1};
diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc
index a8d092a846530b7e0be825a7e02f85f253ea786b..b2f444c30c248f6fb2dcff0c1e8da568ccaa1244 100644
--- a/paddle/fluid/platform/device_context.cc
+++ b/paddle/fluid/platform/device_context.cc
@@ -264,19 +264,7 @@ XPUDeviceContext::XPUDeviceContext() {
 XPUDeviceContext::~XPUDeviceContext() {}
 
 XPUDeviceContext::XPUDeviceContext(XPUPlace place) : place_(place) {
-  int dev_id = -1;
-  int ret = xpu_current_device(&dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
-  ret = xpu_set_device(place.device);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
+  platform::XPUDeviceGuard guard(place.device);
 
   LOG_FIRST_N(WARNING, 1) << "Please NOTE: xpu device: " << place_.device;
 
@@ -303,22 +291,10 @@ XPUDeviceContext::XPUDeviceContext(XPUPlace place) : place_(place) {
       break;
     }
   }
-
-  ret = xpu_set_device(dev_id);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
 }
 
 void XPUDeviceContext::Wait() const {
-  int ret = xpu_set_device(place_.device);
-  PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
-                    platform::errors::External(
-                        "XPU API return wrong value[%d], please check whether "
-                        "Baidu Kunlun Card is properly installed.",
-                        ret));
+  platform::SetXPUDeviceId(place_.device);
   xpu_wait(context_->xpu_stream);
 }
 
diff --git a/paddle/fluid/platform/stream_callback_manager.cc b/paddle/fluid/platform/stream_callback_manager.cc
index f6c54c2397b18f731ffa9ac44eca6a5dcaf18533..7fce0296d437a096cd9aa99a30476fc44e34c703 100644
--- a/paddle/fluid/platform/stream_callback_manager.cc
+++ b/paddle/fluid/platform/stream_callback_manager.cc
@@ -13,13 +13,7 @@
 // limitations under the License.
 
 #include "paddle/fluid/platform/stream_callback_manager.h"
-#include "paddle/fluid/platform/device/gpu/gpu_info.h"
-#include "paddle/fluid/platform/device/npu/npu_info.h"
-#include "paddle/fluid/platform/enforce.h"
-#ifdef PADDLE_WITH_MLU
-#include "paddle/fluid/platform/device/mlu/enforce.h"
-#include "paddle/fluid/platform/device/mlu/mlu_info.h"
-#endif
+#include "paddle/fluid/platform/device/device_wrapper.h"
 
 namespace paddle {
 namespace platform {