diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc
index 4b6759ea165edf29add66ee44461fdd4d9f84d00..db0f5758d2f53263d0f4166a55fb4e64c8f0e2c5 100644
--- a/paddle/fluid/operators/reader/buffered_reader.cc
+++ b/paddle/fluid/operators/reader/buffered_reader.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -14,6 +14,7 @@
 
 #include "paddle/fluid/operators/reader/buffered_reader.h"
 #include "paddle/fluid/framework/convert_utils.h"
+#include "paddle/fluid/platform/device/device_wrapper.h"
 #include "paddle/fluid/platform/profiler.h"
 #include "paddle/fluid/platform/profiler/event_tracing.h"
 
@@ -85,10 +86,27 @@ BufferedReader::BufferedReader(
     stream_ = platform::MluStreamResourcePool::Instance().New(dev_idx);
   }
 #endif
+
+#ifdef PADDLE_WITH_XPU
+  if (platform::is_xpu_place(place_)) {
+    int dev_idx = place_.device;
+    compute_stream_ =
+        ((platform::XPUDeviceContext *)(platform::DeviceContextPool::Instance()
+                                            .Get(place_)))
+            ->stream();
+    events_.resize(buffer_size);
+    for (auto &event : events_) {
+      event = platform::XpuEventResourcePool::Instance().New(dev_idx);
+    }
+    stream_ = platform::XpuStreamResourcePool::Instance().New(dev_idx);
+  }
+#endif
+
   cpu_buffer_.resize(buffer_size);
   cuda_buffer_.resize(buffer_size);
   npu_buffer_.resize(buffer_size);
   mlu_buffer_.resize(buffer_size);
+  xpu_buffer_.resize(buffer_size);
   ReadTillBufferFullAsync();
 }
 
@@ -322,6 +340,57 @@ void BufferedReader::ReadAsync(size_t i) {
       platform::MLUStreamSync(stream_.get());
     }
 #endif
+
+#ifdef PADDLE_WITH_XPU
+    if (platform::is_xpu_place(place_)) {
+      TensorVec &xpu = xpu_buffer_[i];
+      if (xpu.empty()) {
+        xpu.resize(cpu.size());
+      } else {
+        PADDLE_ENFORCE_EQ(
+            xpu.size(), cpu.size(),
+            platform::errors::InvalidArgument(
+                "Input tensor number on XPU and CPU devices are not matched. "
+                "The number on XPU is %d, on CPU is %d",
+                xpu.size(), cpu.size()));
+      }
+
+      std::vector<void *> xpu_ptrs;
+      xpu_ptrs.reserve(cpu.size());
+      for (size_t i = 0; i < cpu.size(); ++i) {
+        xpu[i].Resize(cpu[i].dims());
+        xpu[i].set_layout(cpu[i].layout());
+        xpu_ptrs.emplace_back(xpu[i].mutable_data(place_, cpu[i].type()));
+      }
+
+      platform::XPUDeviceGuard gurad(place_.device);
+      int r = xpu_event_record(events_[i].get(), compute_stream_);
+      PADDLE_ENFORCE_XDNN_SUCCESS(r, "xpu_event_record");
+      r = xpu_stream_wait_event(stream_.get(), events_[i].get());
+      PADDLE_ENFORCE_XDNN_SUCCESS(r, "xpu_stream_wait_event");
+
+      platform::RecordEvent record_event("BufferedReader:MemoryCopy",
+                                         platform::TracerEventType::UserDefined,
+                                         1);
+      for (size_t i = 0; i < cpu.size(); ++i) {
+        auto cpu_place = cpu[i].place();
+        auto cpu_ptr = cpu[i].data();
+        auto xpu_ptr = xpu_ptrs[i];
+        auto size =
+            cpu[i].numel() * paddle::framework::DataTypeSize(cpu[i].dtype());
+        // TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe
+        // KL3
+        if ((platform::is_xpu_place(cpu_place))) {
+          memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size);
+          platform::XPUStreamSync(stream_.get());
+        } else {
+          memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size);
+        }
+        xpu[i].set_lod(cpu[i].lod());
+      }
+      platform::XPUStreamSync(stream_.get());
+    }
+#endif
     return i;
   }));
 }
@@ -359,6 +428,8 @@ void BufferedReader::ReadNextImpl(std::vector<framework::LoDTensor> *out) {
     *out = std::move(npu_buffer_[i]);
   } else if (platform::is_mlu_place(place_)) {
     *out = std::move(mlu_buffer_[i]);
+  } else if (platform::is_xpu_place(place_)) {
+    *out = std::move(xpu_buffer_[i]);
   } else {
     *out = std::move(cpu_buffer_[i]);
   }
diff --git a/paddle/fluid/operators/reader/buffered_reader.h b/paddle/fluid/operators/reader/buffered_reader.h
index f0f3b6b7f9fdfeb69c46e7122fae5c6cfbf3a169..52d3d8d6999a031fef4a2e6218a74e834d5c400a 100644
--- a/paddle/fluid/operators/reader/buffered_reader.h
+++ b/paddle/fluid/operators/reader/buffered_reader.h
@@ -1,4 +1,4 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -33,6 +33,10 @@
 #include "paddle/fluid/platform/device/mlu/mlu_info.h"
 #include "paddle/fluid/platform/device/mlu/mlu_resource_pool.h"
 #endif
+#ifdef PADDLE_WITH_XPU
+#include "paddle/fluid/platform/device/xpu/xpu_info.h"
+#include "paddle/fluid/platform/device/xpu/xpu_resource_pool.h"
+#endif
 
 namespace paddle {
 namespace operators {
@@ -76,6 +80,7 @@ class BufferedReader : public framework::DecoratedReader {
   std::vector<TensorVec> cuda_buffer_;
   std::vector<TensorVec> npu_buffer_;
   std::vector<TensorVec> mlu_buffer_;
+  std::vector<TensorVec> xpu_buffer_;
   size_t prev_pos_{-1UL};
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
   gpuStream_t compute_stream_;
@@ -94,6 +99,12 @@ class BufferedReader : public framework::DecoratedReader {
   std::shared_ptr<platform::MluStreamObject> stream_;
   std::vector<std::shared_ptr<platform::MluEventObject>> events_;
 #endif
+
+#ifdef PADDLE_WITH_XPU
+  xpuStream compute_stream_;
+  std::shared_ptr<platform::XpuStreamObject> stream_;
+  std::vector<std::shared_ptr<platform::XpuEventObject>> events_;
+#endif
 };
 
 }  // namespace reader
diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt
index f29546c5210d9c6c54a16df015c21add27ccd95c..356b5ab2cd23ce511b17c3a267cceed423d7a347 100644
--- a/paddle/fluid/platform/CMakeLists.txt
+++ b/paddle/fluid/platform/CMakeLists.txt
@@ -125,7 +125,7 @@ cc_library(device_context SRCS device_context.cc DEPS simple_threadpool malloc x
     place phi_place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}
     ${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS} ${MLU_CTX_DEPS} eigen3 cpu_context generator)
 if(WITH_XPU)
-  target_link_libraries(device_context xpu_context)
+  target_link_libraries(device_context xpu_context xpu_resource_pool)
 endif()
 
 cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce)
diff --git a/paddle/fluid/platform/device/xpu/CMakeLists.txt b/paddle/fluid/platform/device/xpu/CMakeLists.txt
index b6a26f2554a131aab6e87146c241dc973d9c8f56..3399fff087f8d67717ef35f6d1b76bb7ab201e9b 100644
--- a/paddle/fluid/platform/device/xpu/CMakeLists.txt
+++ b/paddle/fluid/platform/device/xpu/CMakeLists.txt
@@ -7,5 +7,6 @@ set(XPU_CTX_DEPS xpulib ssl crypto rt z resolv dl)
 
 cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib device_context place phi_xpu_info)
 cc_library(xpu_op_list SRCS xpu_op_list.cc DEPS gflags glog enforce xpulib device_context op_kernel_type)
+cc_library(xpu_resource_pool SRCS xpu_resource_pool.cc DEPS xpu_info)
 
 add_subdirectory(tests)
diff --git a/paddle/fluid/platform/device/xpu/xpu_info.cc b/paddle/fluid/platform/device/xpu/xpu_info.cc
index 2e960c1c0dd9cbb6ecabfdcf98872f73c9c9fd61..cdd7ee7f806e932b4d1041575fb5432c81c42865 100644
--- a/paddle/fluid/platform/device/xpu/xpu_info.cc
+++ b/paddle/fluid/platform/device/xpu/xpu_info.cc
@@ -1,4 +1,4 @@
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -79,6 +79,10 @@ void MemcpySyncD2D(void* dst, const platform::XPUPlace& dst_place,
                                     *dev_ctx);
 }
 
+void XPUStreamSync(xpuStream stream) {
+  PADDLE_ENFORCE_XDNN_SUCCESS(xpu_wait(stream), "xpu_wait");
+}
+
 /**************************** Others **************************/
 
 phi::backends::xpu::XPUVersion get_xpu_version(int dev_id) {
diff --git a/paddle/fluid/platform/device/xpu/xpu_info.h b/paddle/fluid/platform/device/xpu/xpu_info.h
index 33385f8e45937880398ffc3d280974a62ced201f..38b4defadc6c3064eb47af2a48509e1e3339d6e4 100644
--- a/paddle/fluid/platform/device/xpu/xpu_info.h
+++ b/paddle/fluid/platform/device/xpu/xpu_info.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -14,8 +14,13 @@ limitations under the License. */
 #include <vector>
 #include "paddle/fluid/platform/place.h"
 #include "paddle/phi/backends/xpu/xpu_info.h"
+#include "xpu/runtime.h"
 
 namespace paddle {
+
+using xpuStream = XPUStream;
+using xpuEventHandle = XPUEvent;
+
 namespace platform {
 
 /***** Version Management *****/
@@ -51,6 +56,9 @@ void MemcpySyncD2D(void *dst, const platform::XPUPlace &dst_place,
                    const void *src, const platform::XPUPlace &src_place,
                    size_t count);
 
+//! Blocks until stream has completed all operations.
+void XPUStreamSync(xpuStream stream);
+
 using XPUDeviceGuard = phi::backends::xpu::XPUDeviceGuard;
 
 phi::backends::xpu::XPUVersion get_xpu_version(int dev_id);
diff --git a/paddle/fluid/platform/device/xpu/xpu_resource_pool.cc b/paddle/fluid/platform/device/xpu/xpu_resource_pool.cc
new file mode 100644
index 0000000000000000000000000000000000000000..af0d47c71671777bd4af497bc40362952b93d921
--- /dev/null
+++ b/paddle/fluid/platform/device/xpu/xpu_resource_pool.cc
@@ -0,0 +1,98 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#if defined(PADDLE_WITH_XPU)
+#include "paddle/fluid/platform/device/xpu/xpu_resource_pool.h"
+
+namespace paddle {
+namespace platform {
+
+XpuStreamResourcePool::XpuStreamResourcePool() {
+  int dev_cnt = platform::GetXPUDeviceCount();
+  pool_.reserve(dev_cnt);
+  for (int dev_idx = 0; dev_idx < dev_cnt; ++dev_idx) {
+    auto creator = [dev_idx] {
+      platform::XPUDeviceGuard gurad(dev_idx);
+      xpuStream stream;
+      xpu_stream_create(&stream);
+      return stream;
+    };
+
+    auto deleter = [dev_idx](xpuStream stream) {
+      platform::XPUDeviceGuard gurad(dev_idx);
+      xpu_stream_destroy(stream);
+    };
+
+    pool_.emplace_back(ResourcePool<XpuStreamObject>::Create(creator, deleter));
+  }
+}
+
+XpuStreamResourcePool& XpuStreamResourcePool::Instance() {
+  static XpuStreamResourcePool pool;
+  return pool;
+}
+
+std::shared_ptr<XpuStreamObject> XpuStreamResourcePool::New(int dev_idx) {
+  PADDLE_ENFORCE_GE(
+      dev_idx, 0,
+      platform::errors::InvalidArgument(
+          "The dev_idx should be not less than 0, but got %d.", dev_idx));
+  PADDLE_ENFORCE_LT(
+      dev_idx, pool_.size(),
+      platform::errors::OutOfRange(
+          "The dev_idx should be less than device count %d, but got %d.",
+          pool_.size(), dev_idx));
+  return pool_[dev_idx]->New();
+}
+
+XpuEventResourcePool::XpuEventResourcePool() {
+  int dev_cnt = platform::GetXPUDeviceCount();
+  pool_.reserve(dev_cnt);
+  for (int dev_idx = 0; dev_idx < dev_cnt; ++dev_idx) {
+    auto creator = [dev_idx] {
+      platform::XPUDeviceGuard gurad(dev_idx);
+      xpuEventHandle event;
+      xpu_event_create(&event);
+      return event;
+    };
+
+    auto deleter = [dev_idx](xpuEventHandle event) {
+      platform::XPUDeviceGuard gurad(dev_idx);
+      xpu_event_destroy(event);
+    };
+
+    pool_.emplace_back(ResourcePool<XpuEventObject>::Create(creator, deleter));
+  }
+}
+
+XpuEventResourcePool& XpuEventResourcePool::Instance() {
+  static XpuEventResourcePool pool;
+  return pool;
+}
+
+std::shared_ptr<XpuEventObject> XpuEventResourcePool::New(int dev_idx) {
+  PADDLE_ENFORCE_GE(
+      dev_idx, 0,
+      platform::errors::InvalidArgument(
+          "The dev_idx should be not less than 0, but got %d.", dev_idx));
+  PADDLE_ENFORCE_LT(
+      dev_idx, pool_.size(),
+      platform::errors::OutOfRange(
+          "The dev_idx should be less than device count %d, but got %d.",
+          pool_.size(), dev_idx));
+  return pool_[dev_idx]->New();
+}
+
+}  // namespace platform
+}  // namespace paddle
+#endif
diff --git a/paddle/fluid/platform/device/xpu/xpu_resource_pool.h b/paddle/fluid/platform/device/xpu/xpu_resource_pool.h
new file mode 100644
index 0000000000000000000000000000000000000000..5c6ade8f6f88f83c08e396fa8e75d3255c5e27a8
--- /dev/null
+++ b/paddle/fluid/platform/device/xpu/xpu_resource_pool.h
@@ -0,0 +1,64 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#if defined(PADDLE_WITH_XPU)
+#include <memory>
+#include <type_traits>
+#include <vector>
+
+#include "paddle/fluid/platform/device/xpu/xpu_info.h"
+#include "paddle/fluid/platform/resource_pool.h"
+
+namespace paddle {
+namespace platform {
+
+using XpuStreamObject = std::remove_pointer<xpuStream>::type;
+using XpuEventObject = std::remove_pointer<xpuEventHandle>::type;
+
+class XpuStreamResourcePool {
+ public:
+  std::shared_ptr<XpuStreamObject> New(int dev_idx);
+
+  static XpuStreamResourcePool &Instance();
+
+ private:
+  XpuStreamResourcePool();
+
+  DISABLE_COPY_AND_ASSIGN(XpuStreamResourcePool);
+
+ private:
+  std::vector<std::shared_ptr<ResourcePool<XpuStreamObject>>> pool_;
+};
+
+class XpuEventResourcePool {
+ public:
+  std::shared_ptr<XpuEventObject> New(int dev_idx);
+
+  static XpuEventResourcePool &Instance();
+
+ private:
+  XpuEventResourcePool();
+
+  DISABLE_COPY_AND_ASSIGN(XpuEventResourcePool);
+
+ private:
+  std::vector<std::shared_ptr<ResourcePool<XpuEventObject>>> pool_;
+};
+
+}  // namespace platform
+}  // namespace paddle
+
+#endif
diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h
index 2c5f24d28c6d6b87172fe9e4459908c57c3638db..2b53ecf86a641f3579f6ff6f6c78f6c2c55d83da 100644
--- a/paddle/fluid/platform/device_context.h
+++ b/paddle/fluid/platform/device_context.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 Copyright (c) 2022 NVIDIA Corporation. All rights reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -188,6 +188,7 @@ class XPUDeviceContext : public phi::XPUContext {
   explicit XPUDeviceContext(XPUPlace place);
   virtual ~XPUDeviceContext();
   Eigen::DefaultDevice* eigen_device() const { return nullptr; }
+  xpuStream stream() const { return XPUContext::x_context()->xpu_stream; }
 };
 
 template <>