From 5450e42c273bb3ca311df16154a07b47e4615af1 Mon Sep 17 00:00:00 2001
From: Chen Weihang <chenweihang@baidu.com>
Date: Thu, 14 Apr 2022 18:44:05 +0800
Subject: [PATCH] [CustomOp] Add context pool unittests (#41085) (#41782)

* add context pool unittests

* fix timeout

* polish details

* change option pos

* add dll decl for wndows

* fix pre-commit error

* move dll_decl and export DeviceContext

* replace lost dll_decl.h
---
 paddle/phi/api/all.h                          |  2 +-
 paddle/phi/api/ext/op_meta_info.h             |  2 +-
 paddle/phi/api/include/context_pool.h         |  7 ++-
 paddle/phi/api/{ext => include}/dll_decl.h    |  0
 paddle/phi/api/include/tensor.h               |  2 +-
 paddle/phi/api/lib/api_registry.h             |  2 +-
 paddle/phi/backends/cpu/cpu_context.h         |  2 +-
 paddle/phi/backends/gpu/gpu_context.h         |  2 +-
 paddle/phi/common/place.h                     | 10 ++-
 paddle/phi/core/device_context.h              |  3 +-
 .../fluid/tests/custom_op/CMakeLists.txt      |  2 +
 .../tests/custom_op/context_pool_test_op.cc   | 54 ++++++++++++++++
 .../fluid/tests/custom_op/ps_usr_print_log    |  0
 .../tests/custom_op/test_context_pool.py      | 62 +++++++++++++++++++
 .../utils/cpp_extension/extension_utils.py    |  3 +
 15 files changed, 142 insertions(+), 11 deletions(-)
 rename paddle/phi/api/{ext => include}/dll_decl.h (100%)
 create mode 100644 python/paddle/fluid/tests/custom_op/context_pool_test_op.cc
 create mode 100644 python/paddle/fluid/tests/custom_op/ps_usr_print_log
 create mode 100644 python/paddle/fluid/tests/custom_op/test_context_pool.py
diff --git a/paddle/phi/api/all.h b/paddle/phi/api/all.h
index 4e0a4729916..ac8607597a4 100644
--- a/paddle/phi/api/all.h
+++ b/paddle/phi/api/all.h
@@ -26,6 +26,7 @@ limitations under the License. */
 
 // new phi apis
 #include "paddle/phi/api/include/api.h"
+#include "paddle/phi/api/include/context_pool.h"
 #include "paddle/phi/api/include/sparse_api.h"
 #include "paddle/phi/api/include/tensor.h"
 
@@ -38,7 +39,6 @@ limitations under the License. */
 
 // original custom op headers
 #include "paddle/phi/api/ext/dispatch.h"
-#include "paddle/phi/api/ext/dll_decl.h"
 #include "paddle/phi/api/ext/exception.h"
 #include "paddle/phi/api/ext/op_meta_info.h"
 #include "paddle/phi/api/ext/place.h"
diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h
index 88660449b68..084b28626e7 100644
--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -20,8 +20,8 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 
-#include "paddle/phi/api/ext/dll_decl.h"
 #include "paddle/phi/api/ext/exception.h"
+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/utils/any.h"
 
diff --git a/paddle/phi/api/include/context_pool.h b/paddle/phi/api/include/context_pool.h
index a2983d9c2aa..b429252beb7 100644
--- a/paddle/phi/api/include/context_pool.h
+++ b/paddle/phi/api/include/context_pool.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include <mutex>
 
+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/macros.h"
 #include "paddle/utils/flat_hash_map.h"
@@ -55,8 +56,12 @@ struct DefaultDeviceContextType<AllocationType::GPU> {
  * In order not to depend on the fluid's DeviceContextPool,
  * the DeviceContextPool here needs to be initialized in the fluid, and cannot
  * be initialized by itself.
+ *
+ * Note: DeviceContextPool is an experimental API and may be removed in the
+ * future. From 2.3, we recommend directly using the C++ API to combine new
+ * perators.
  */
-class DeviceContextPool {
+class PADDLE_API DeviceContextPool {
  public:
   static DeviceContextPool& Instance();
 
diff --git a/paddle/phi/api/ext/dll_decl.h b/paddle/phi/api/include/dll_decl.h
similarity index 100%
rename from paddle/phi/api/ext/dll_decl.h
rename to paddle/phi/api/include/dll_decl.h
diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h
index 3c5c1531c4a..ad3933e2b2b 100644
--- a/paddle/phi/api/include/tensor.h
+++ b/paddle/phi/api/include/tensor.h
@@ -29,8 +29,8 @@ using gpuStream_t = cudaStream_t;
 using gpuStream_t = hipStream_t;
 #endif
 
-#include "paddle/phi/api/ext/dll_decl.h"
 #include "paddle/phi/api/ext/place.h"
+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/layout.h"
 #include "paddle/phi/common/place.h"
diff --git a/paddle/phi/api/lib/api_registry.h b/paddle/phi/api/lib/api_registry.h
index 212a2f96452..ed1aaccb4e1 100644
--- a/paddle/phi/api/lib/api_registry.h
+++ b/paddle/phi/api/lib/api_registry.h
@@ -14,7 +14,7 @@ limitations under the License. */
 
 #pragma once
 
-#include "paddle/phi/api/ext/dll_decl.h"
+#include "paddle/phi/api/include/dll_decl.h"
 
 namespace paddle {
 namespace experimental {
diff --git a/paddle/phi/backends/cpu/cpu_context.h b/paddle/phi/backends/cpu/cpu_context.h
index aa14c2a8e38..e482fdc9e04 100644
--- a/paddle/phi/backends/cpu/cpu_context.h
+++ b/paddle/phi/backends/cpu/cpu_context.h
@@ -24,7 +24,7 @@ limitations under the License. */
 
 namespace phi {
 
-class CPUContext : public DeviceContext {
+class PADDLE_API CPUContext : public DeviceContext {
  public:
   CPUContext();
   CPUContext(CPUContext&&);
diff --git a/paddle/phi/backends/gpu/gpu_context.h b/paddle/phi/backends/gpu/gpu_context.h
index cd08da1c0f2..d268d4ae8d8 100644
--- a/paddle/phi/backends/gpu/gpu_context.h
+++ b/paddle/phi/backends/gpu/gpu_context.h
@@ -74,7 +74,7 @@ class DnnWorkspaceHandle {
   std::unique_ptr<std::mutex> mtx_;
 };
 
-class GPUContext : public DeviceContext {
+class PADDLE_API GPUContext : public DeviceContext {
  public:
   GPUContext();
   GPUContext(GPUContext&&);
diff --git a/paddle/phi/common/place.h b/paddle/phi/common/place.h
index 4c6d47597bd..390684366db 100644
--- a/paddle/phi/common/place.h
+++ b/paddle/phi/common/place.h
@@ -16,6 +16,8 @@ limitations under the License. */
 
 #include <string>
 
+#include "paddle/phi/api/include/dll_decl.h"
+
 namespace phi {
 
 enum class AllocationType : int8_t {
@@ -33,11 +35,13 @@ enum class AllocationType : int8_t {
 
 const char* AllocationTypeStr(AllocationType type);
 
-size_t GetOrRegisterGlobalDeviceTypeId(const std::string& device_type);
-std::string GetGlobalDeviceType(size_t device_type_id_);
+PADDLE_API size_t
+GetOrRegisterGlobalDeviceTypeId(const std::string& device_type);
+
+PADDLE_API std::string GetGlobalDeviceType(size_t device_type_id_);
 
 /// \brief The place is used to specify where the data is stored.
-class Place {
+class PADDLE_API Place {
  public:
   Place() : device(0), alloc_type_(AllocationType::UNDEFINED) {}
 
diff --git a/paddle/phi/core/device_context.h b/paddle/phi/core/device_context.h
index 106d5ff7ddf..d7c2c777ca6 100644
--- a/paddle/phi/core/device_context.h
+++ b/paddle/phi/core/device_context.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include <memory>
 
+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/allocator.h"
@@ -30,7 +31,7 @@ class TensorBase;
  * All kernels must access the interfaces provided by the backend through
  * DeviceContext.
  */
-class DeviceContext {
+class PADDLE_API DeviceContext {
   using DataType = paddle::experimental::DataType;
 
  public:
diff --git a/python/paddle/fluid/tests/custom_op/CMakeLists.txt b/python/paddle/fluid/tests/custom_op/CMakeLists.txt
index a97afde3850..e9cd51d1c70 100644
--- a/python/paddle/fluid/tests/custom_op/CMakeLists.txt
+++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt
@@ -3,11 +3,13 @@ if(WITH_GPU OR APPLE)
     py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py)
     py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py)
     py_test(test_custom_relu_model SRCS test_custom_relu_model.py)
+    py_test(test_context_pool SRCS test_context_pool.py)
 
     # Compiling shared library will cost some time, but running process is very fast.
     set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250)
     set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180)
     set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180)
+    set_tests_properties(test_context_pool PROPERTIES TIMEOUT 180)
 endif()
 
 py_test(test_custom_raw_op_kernel_op SRCS test_custom_raw_op_kernel_op.py)
diff --git a/python/paddle/fluid/tests/custom_op/context_pool_test_op.cc b/python/paddle/fluid/tests/custom_op/context_pool_test_op.cc
new file mode 100644
index 00000000000..6b0edcc7ab1
--- /dev/null
+++ b/python/paddle/fluid/tests/custom_op/context_pool_test_op.cc
@@ -0,0 +1,54 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <iostream>
+
+#include "paddle/extension.h"
+#include "paddle/phi/backends/all_context.h"
+
+#define CHECK_INPUT(x) \
+  PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
+
+std::vector<paddle::Tensor> ContextPoolTest(const paddle::Tensor& x) {
+  // 1. test cpu context
+  paddle::experimental::Place cpu_place(
+      paddle::experimental::AllocationType::CPU);
+  auto* cpu_ctx =
+      paddle::experimental::DeviceContextPool::Instance()
+          .Get<paddle::experimental::AllocationType::CPU>(cpu_place);
+  PD_CHECK(cpu_ctx->GetPlace() == cpu_place);
+  // if want to use the eigen_device here, need to include eigen headers
+  auto* cpu_eigen_device = cpu_ctx->eigen_device();
+  PD_CHECK(cpu_eigen_device != nullptr);
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  // 2. test gpu context
+  paddle::experimental::Place gpu_place(
+      paddle::experimental::AllocationType::GPU);
+  auto* gpu_ctx =
+      paddle::experimental::DeviceContextPool::Instance()
+          .Get<paddle::experimental::AllocationType::GPU>(gpu_place);
+  PD_CHECK(gpu_ctx->GetPlace() == gpu_place);
+  // if want to use the eigen_device here, need to include eigen headers
+  auto* gpu_eigen_device = gpu_ctx->eigen_device();
+  PD_CHECK(gpu_eigen_device != nullptr);
+#endif
+
+  return {x};
+}
+
+PD_BUILD_OP(context_pool_test)
+    .Inputs({"X"})
+    .Outputs({"Out"})
+    .SetKernelFn(PD_KERNEL(ContextPoolTest));
diff --git a/python/paddle/fluid/tests/custom_op/ps_usr_print_log b/python/paddle/fluid/tests/custom_op/ps_usr_print_log
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/paddle/fluid/tests/custom_op/test_context_pool.py b/python/paddle/fluid/tests/custom_op/test_context_pool.py
new file mode 100644
index 00000000000..d532b29688b
--- /dev/null
+++ b/python/paddle/fluid/tests/custom_op/test_context_pool.py
@@ -0,0 +1,62 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+import numpy as np
+
+import paddle
+from paddle.utils.cpp_extension import load, get_build_directory
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args
+from paddle.utils.cpp_extension.extension_utils import run_cmd
+from paddle.fluid.framework import _test_eager_guard
+
+# Because Windows don't use docker, the shared lib already exists in the
+# cache dir, it will not be compiled again unless the shared lib is removed.
+file = '{}\\context_pool_jit\\context_pool_jit.pyd'.format(get_build_directory(
+))
+if os.name == 'nt' and os.path.isfile(file):
+    cmd = 'del {}'.format(file)
+    run_cmd(cmd, True)
+
+# Compile and load custom op Just-In-Time.
+custom_ops = load(
+    name='context_pool_jit',
+    sources=['context_pool_test_op.cc'],
+    extra_include_paths=paddle_includes,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cflags
+    extra_cuda_cflags=extra_nvcc_args,  # test for cflags
+    verbose=True)
+
+
+class TestContextPool(unittest.TestCase):
+    def setUp(self):
+        self.devices = ['cpu']
+        if paddle.is_compiled_with_cuda():
+            self.devices.append('gpu')
+
+    def use_context_pool(self):
+        x = paddle.ones([2, 2], dtype='float32')
+        out = custom_ops.context_pool_test(x)
+
+        self.assertTrue(np.array_equal(x.numpy(), out.numpy()))
+
+    def test_using_context_pool(self):
+        with _test_eager_guard():
+            self.use_context_pool()
+        self.use_context_pool()
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py
index f5291bf77b5..41add6e764a 100644
--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -543,6 +543,9 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
         runtime_library_dirs.extend(find_paddle_libraries(use_cuda))
         kwargs['runtime_library_dirs'] = runtime_library_dirs
 
+    if compile_dir is None:
+        # Add this compile option to isolate fluid headers
+        add_compile_flag(extra_compile_args, ['-DPADDLE_WITH_CUSTOM_KERNEL'])
     kwargs['extra_compile_args'] = extra_compile_args
 
     kwargs['language'] = 'c++'
-- 
GitLab