diff --git a/paddle/phi/api/all.h b/paddle/phi/api/all.h
index 4e0a4729916b36cdf0457c5eeb50d2a70cceb9a2..ac8607597a4368331dab7e7a73f06be0e70c84c4 100644
--- a/paddle/phi/api/all.h
+++ b/paddle/phi/api/all.h
@@ -26,6 +26,7 @@ limitations under the License. */
 
 // new phi apis
 #include "paddle/phi/api/include/api.h"
+#include "paddle/phi/api/include/context_pool.h"
 #include "paddle/phi/api/include/sparse_api.h"
 #include "paddle/phi/api/include/tensor.h"
 
@@ -38,7 +39,6 @@ limitations under the License. */
 
 // original custom op headers
 #include "paddle/phi/api/ext/dispatch.h"
-#include "paddle/phi/api/ext/dll_decl.h"
 #include "paddle/phi/api/ext/exception.h"
 #include "paddle/phi/api/ext/op_meta_info.h"
 #include "paddle/phi/api/ext/place.h"
diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
index f820d225eff8ccb5330bda218a48999f1eec7ae2..a9475db800816c569eaad306803f89b6b3a8fad9 100644
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -20,8 +20,8 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 
-#include "paddle/phi/api/ext/dll_decl.h"
 #include "paddle/phi/api/ext/exception.h"
+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/utils/any.h"
 
diff --git a/paddle/phi/api/include/context_pool.h b/paddle/phi/api/include/context_pool.h
index a2983d9c2aa656e072b7cef010e220201ae3857f..b429252beb7fdeeebee693f8cb932eb9af5948f0 100644
--- a/paddle/phi/api/include/context_pool.h
+++ b/paddle/phi/api/include/context_pool.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include <mutex>
 
+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/macros.h"
 #include "paddle/utils/flat_hash_map.h"
@@ -55,8 +56,12 @@ struct DefaultDeviceContextType<AllocationType::GPU> {
  * In order not to depend on the fluid's DeviceContextPool,
  * the DeviceContextPool here needs to be initialized in the fluid, and cannot
  * be initialized by itself.
+ *
+ * Note: DeviceContextPool is an experimental API and may be removed in the
+ * future. From 2.3, we recommend directly using the C++ API to combine new
+ * perators.
  */
-class DeviceContextPool {
+class PADDLE_API DeviceContextPool {
  public:
   static DeviceContextPool& Instance();
 
diff --git a/paddle/phi/api/ext/dll_decl.h b/paddle/phi/api/include/dll_decl.h
similarity index 100%
rename from paddle/phi/api/ext/dll_decl.h
rename to paddle/phi/api/include/dll_decl.h
diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h
index 3c5c1531c4a2dd6d977699bb65a5c9204c4bdf8f..ad3933e2b2b53ffd08b0d4560dd321783b6b83b9 100644
--- a/paddle/phi/api/include/tensor.h
+++ b/paddle/phi/api/include/tensor.h
@@ -29,8 +29,8 @@ using gpuStream_t = cudaStream_t;
 using gpuStream_t = hipStream_t;
 #endif
 
-#include "paddle/phi/api/ext/dll_decl.h"
 #include "paddle/phi/api/ext/place.h"
+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/layout.h"
 #include "paddle/phi/common/place.h"
diff --git a/paddle/phi/api/lib/api_registry.h b/paddle/phi/api/lib/api_registry.h
index 212a2f96452f69496d9ca60fdc3c8cdb643b9679..ed1aaccb4e115fbdbc615b5e04279297208754f4 100644
--- a/paddle/phi/api/lib/api_registry.h
+++ b/paddle/phi/api/lib/api_registry.h
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/phi/api/ext/dll_decl.h"
+#include "paddle/phi/api/include/dll_decl.h"
 
 namespace paddle {
 namespace experimental {
diff --git a/paddle/phi/backends/cpu/cpu_context.h b/paddle/phi/backends/cpu/cpu_context.h
index aa14c2a8e3862139b3149bbcdcfa169d7c292377..e482fdc9e042f034988585037eb6d1096ee569da 100644
--- a/paddle/phi/backends/cpu/cpu_context.h
+++ b/paddle/phi/backends/cpu/cpu_context.h
@@ -24,7 +24,7 @@ limitations under the License. */
 
 namespace phi {
 
-class CPUContext : public DeviceContext {
+class PADDLE_API CPUContext : public DeviceContext {
  public:
   CPUContext();
   CPUContext(CPUContext&&);
diff --git a/paddle/phi/backends/gpu/gpu_context.h b/paddle/phi/backends/gpu/gpu_context.h
index ffae1f1f1353e4c92ccdfd0419a7185b12286784..8d44acaa4a0835d74d05f960c28a1be871b87f55 100644
--- a/paddle/phi/backends/gpu/gpu_context.h
+++ b/paddle/phi/backends/gpu/gpu_context.h
@@ -74,7 +74,7 @@ class DnnWorkspaceHandle {
   std::unique_ptr<std::mutex> mtx_;
 };
 
-class GPUContext : public DeviceContext {
+class PADDLE_API GPUContext : public DeviceContext {
  public:
   GPUContext();
   GPUContext(GPUContext&&);
diff --git a/paddle/phi/common/place.h b/paddle/phi/common/place.h
index 4c6d47597bd2cc12c397794eb575a74960542a6d..390684366db716cef0aac3b0f249586b8bedb7cb 100644
--- a/paddle/phi/common/place.h
+++ b/paddle/phi/common/place.h
@@ -16,6 +16,8 @@ limitations under the License. */
 
 #include <string>
 
+#include "paddle/phi/api/include/dll_decl.h"
+
 namespace phi {
 
 enum class AllocationType : int8_t {
@@ -33,11 +35,13 @@ enum class AllocationType : int8_t {
 
 const char* AllocationTypeStr(AllocationType type);
 
-size_t GetOrRegisterGlobalDeviceTypeId(const std::string& device_type);
-std::string GetGlobalDeviceType(size_t device_type_id_);
+PADDLE_API size_t
+GetOrRegisterGlobalDeviceTypeId(const std::string& device_type);
+
+PADDLE_API std::string GetGlobalDeviceType(size_t device_type_id_);
 
 /// \brief The place is used to specify where the data is stored.
-class Place {
+class PADDLE_API Place {
  public:
   Place() : device(0), alloc_type_(AllocationType::UNDEFINED) {}
 
diff --git a/paddle/phi/core/device_context.h b/paddle/phi/core/device_context.h
index 106d5ff7ddf9855f1787428f7e71e8fcb09ee49e..d7c2c777ca6328d072316de99ab27a5f1c13fa14 100644
--- a/paddle/phi/core/device_context.h
+++ b/paddle/phi/core/device_context.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include <memory>
 
+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/allocator.h"
@@ -30,7 +31,7 @@ class TensorBase;
  * All kernels must access the interfaces provided by the backend through
  * DeviceContext.
  */
-class DeviceContext {
+class PADDLE_API DeviceContext {
   using DataType = paddle::experimental::DataType;
 
  public:
diff --git a/python/paddle/fluid/tests/custom_op/CMakeLists.txt b/python/paddle/fluid/tests/custom_op/CMakeLists.txt
index c20630f1a093e333f60db0bcfef66da8efbb3ef6..c76b3da7428e3c1ac6de7f09d25135796b43f58a 100644
--- a/python/paddle/fluid/tests/custom_op/CMakeLists.txt
+++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt
@@ -3,11 +3,13 @@ if(WITH_GPU OR APPLE)
     py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py)
     py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py)
     py_test(test_custom_relu_model SRCS test_custom_relu_model.py)
+    py_test(test_context_pool SRCS test_context_pool.py)
 
     # Compiling shared library will cost some time, but running process is very fast.
     set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250)
     set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180)
     set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180)
+    set_tests_properties(test_context_pool PROPERTIES TIMEOUT 180)
 endif()
 
 py_test(test_custom_raw_op_kernel_op SRCS test_custom_raw_op_kernel_op.py)
diff --git a/python/paddle/fluid/tests/custom_op/context_pool_test_op.cc b/python/paddle/fluid/tests/custom_op/context_pool_test_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6b0edcc7ab1489599552a251e387da573a8e844a
--- /dev/null
+++ b/python/paddle/fluid/tests/custom_op/context_pool_test_op.cc
@@ -0,0 +1,54 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <iostream>
+
+#include "paddle/extension.h"
+#include "paddle/phi/backends/all_context.h"
+
+#define CHECK_INPUT(x) \
+  PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
+
+std::vector<paddle::Tensor> ContextPoolTest(const paddle::Tensor& x) {
+  // 1. test cpu context
+  paddle::experimental::Place cpu_place(
+      paddle::experimental::AllocationType::CPU);
+  auto* cpu_ctx =
+      paddle::experimental::DeviceContextPool::Instance()
+          .Get<paddle::experimental::AllocationType::CPU>(cpu_place);
+  PD_CHECK(cpu_ctx->GetPlace() == cpu_place);
+  // if want to use the eigen_device here, need to include eigen headers
+  auto* cpu_eigen_device = cpu_ctx->eigen_device();
+  PD_CHECK(cpu_eigen_device != nullptr);
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  // 2. test gpu context
+  paddle::experimental::Place gpu_place(
+      paddle::experimental::AllocationType::GPU);
+  auto* gpu_ctx =
+      paddle::experimental::DeviceContextPool::Instance()
+          .Get<paddle::experimental::AllocationType::GPU>(gpu_place);
+  PD_CHECK(gpu_ctx->GetPlace() == gpu_place);
+  // if want to use the eigen_device here, need to include eigen headers
+  auto* gpu_eigen_device = gpu_ctx->eigen_device();
+  PD_CHECK(gpu_eigen_device != nullptr);
+#endif
+
+  return {x};
+}
+
+PD_BUILD_OP(context_pool_test)
+    .Inputs({"X"})
+    .Outputs({"Out"})
+    .SetKernelFn(PD_KERNEL(ContextPoolTest));
diff --git a/python/paddle/fluid/tests/custom_op/ps_usr_print_log b/python/paddle/fluid/tests/custom_op/ps_usr_print_log
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/python/paddle/fluid/tests/custom_op/test_context_pool.py b/python/paddle/fluid/tests/custom_op/test_context_pool.py
new file mode 100644
index 0000000000000000000000000000000000000000..d532b29688b39712f6601bf0d14447a952b807d7
--- /dev/null
+++ b/python/paddle/fluid/tests/custom_op/test_context_pool.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+import numpy as np
+
+import paddle
+from paddle.utils.cpp_extension import load, get_build_directory
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args
+from paddle.utils.cpp_extension.extension_utils import run_cmd
+from paddle.fluid.framework import _test_eager_guard
+
+# Because Windows don't use docker, the shared lib already exists in the
+# cache dir, it will not be compiled again unless the shared lib is removed.
+file = '{}\\context_pool_jit\\context_pool_jit.pyd'.format(get_build_directory(
+))
+if os.name == 'nt' and os.path.isfile(file):
+    cmd = 'del {}'.format(file)
+    run_cmd(cmd, True)
+
+# Compile and load custom op Just-In-Time.
+custom_ops = load(
+    name='context_pool_jit',
+    sources=['context_pool_test_op.cc'],
+    extra_include_paths=paddle_includes,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cflags
+    extra_cuda_cflags=extra_nvcc_args,  # test for cflags
+    verbose=True)
+
+
+class TestContextPool(unittest.TestCase):
+    def setUp(self):
+        self.devices = ['cpu']
+        if paddle.is_compiled_with_cuda():
+            self.devices.append('gpu')
+
+    def use_context_pool(self):
+        x = paddle.ones([2, 2], dtype='float32')
+        out = custom_ops.context_pool_test(x)
+
+        self.assertTrue(np.array_equal(x.numpy(), out.numpy()))
+
+    def test_using_context_pool(self):
+        with _test_eager_guard():
+            self.use_context_pool()
+        self.use_context_pool()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py
index f5291bf77b56bb290ed32cbbce98c3a99ebaac42..41add6e764a8c79aecb93a18e7c097335c84fba0 100644
--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -543,6 +543,9 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
         runtime_library_dirs.extend(find_paddle_libraries(use_cuda))
         kwargs['runtime_library_dirs'] = runtime_library_dirs
 
+    if compile_dir is None:
+        # Add this compile option to isolate fluid headers
+        add_compile_flag(extra_compile_args, ['-DPADDLE_WITH_CUSTOM_KERNEL'])
     kwargs['extra_compile_args'] = extra_compile_args
 
     kwargs['language'] = 'c++'