[CustomOp] Add context pool unittests (#41085)

* add context pool unittests * fix timeout * polish details * change option pos * add dll decl for wndows * fix pre-commit error * move dll_decl and export DeviceContext * replace lost dll_decl.h

[CustomOp] Add context pool unittests (#41085)
* add context pool unittests * fix timeout * polish details * change option pos * add dll decl for wndows * fix pre-commit error * move dll_decl and export DeviceContext * replace lost dll_decl.h
59ec9599 · Chen Weihang · GitHub · fdeec8c3 · 59ec9599 · 59ec9599
15 changed file
--- a/paddle/phi/api/all.h
+++ b/paddle/phi/api/all.h
@@ -26,6 +26,7 @@ limitations under the License. */

 // new phi apis
 #include "paddle/phi/api/include/api.h"
+#include "paddle/phi/api/include/context_pool.h"
 #include "paddle/phi/api/include/sparse_api.h"
 #include "paddle/phi/api/include/tensor.h"

@@ -38,7 +39,6 @@ limitations under the License. */

 // original custom op headers
 #include "paddle/phi/api/ext/dispatch.h"
-#include "paddle/phi/api/ext/dll_decl.h"
 #include "paddle/phi/api/ext/exception.h"
 #include "paddle/phi/api/ext/op_meta_info.h"
 #include "paddle/phi/api/ext/place.h"

--- a/paddle/phi/api/ext/op_meta_info.h
+++ b/paddle/phi/api/ext/op_meta_info.h
@@ -20,8 +20,8 @@ limitations under the License. */
 #include <utility>
 #include <vector>

-#include "paddle/phi/api/ext/dll_decl.h"
 #include "paddle/phi/api/ext/exception.h"
+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/utils/any.h"


--- a/paddle/phi/api/include/context_pool.h
+++ b/paddle/phi/api/include/context_pool.h
@@ -16,6 +16,7 @@ limitations under the License. */

 #include <mutex>

+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/macros.h"
 #include "paddle/utils/flat_hash_map.h"
@@ -55,8 +56,12 @@ struct DefaultDeviceContextType<AllocationType::GPU> {
 * In order not to depend on the fluid's DeviceContextPool,
 * the DeviceContextPool here needs to be initialized in the fluid, and cannot
 * be initialized by itself.
+ *
+ * Note: DeviceContextPool is an experimental API and may be removed in the
+ * future. From 2.3, we recommend directly using the C++ API to combine new
+ * perators.
 */
-class DeviceContextPool {
+class PADDLE_API DeviceContextPool {
 public:
  static DeviceContextPool& Instance();


--- a/paddle/phi/api/ext/dll_decl.h
+++ b/paddle/phi/api/ext/dll_decl.h
--- a/paddle/phi/api/include/tensor.h
+++ b/paddle/phi/api/include/tensor.h
@@ -29,8 +29,8 @@ using gpuStream_t = cudaStream_t;
 using gpuStream_t = hipStream_t;
 #endif

-#include "paddle/phi/api/ext/dll_decl.h"
 #include "paddle/phi/api/ext/place.h"
+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/layout.h"
 #include "paddle/phi/common/place.h"

--- a/paddle/phi/api/lib/api_registry.h
+++ b/paddle/phi/api/lib/api_registry.h
@@ -14,7 +14,7 @@ limitations under the License. */

 #pragma once

-#include "paddle/phi/api/ext/dll_decl.h"
+#include "paddle/phi/api/include/dll_decl.h"

 namespace paddle {
 namespace experimental {

--- a/paddle/phi/backends/cpu/cpu_context.h
+++ b/paddle/phi/backends/cpu/cpu_context.h
@@ -24,7 +24,7 @@ limitations under the License. */

 namespace phi {

-class CPUContext : public DeviceContext {
+class PADDLE_API CPUContext : public DeviceContext {
 public:
  CPUContext();
  CPUContext(CPUContext&&);

--- a/paddle/phi/backends/gpu/gpu_context.h
+++ b/paddle/phi/backends/gpu/gpu_context.h
@@ -74,7 +74,7 @@ class DnnWorkspaceHandle {
  std::unique_ptr<std::mutex> mtx_;
 };

-class GPUContext : public DeviceContext {
+class PADDLE_API GPUContext : public DeviceContext {
 public:
  GPUContext();
  GPUContext(GPUContext&&);

--- a/paddle/phi/common/place.h
+++ b/paddle/phi/common/place.h
@@ -16,6 +16,8 @@ limitations under the License. */

 #include <string>

+#include "paddle/phi/api/include/dll_decl.h"
+
 namespace phi {

 enum class AllocationType : int8_t {
@@ -33,11 +35,13 @@ enum class AllocationType : int8_t {

 const char* AllocationTypeStr(AllocationType type);

-size_t GetOrRegisterGlobalDeviceTypeId(const std::string& device_type);
-std::string GetGlobalDeviceType(size_t device_type_id_);
+PADDLE_API size_t
+GetOrRegisterGlobalDeviceTypeId(const std::string& device_type);
+
+PADDLE_API std::string GetGlobalDeviceType(size_t device_type_id_);

 /// \brief The place is used to specify where the data is stored.
-class Place {
+class PADDLE_API Place {
 public:
  Place() : device(0), alloc_type_(AllocationType::UNDEFINED) {}


--- a/paddle/phi/core/device_context.h
+++ b/paddle/phi/core/device_context.h
@@ -16,6 +16,7 @@ limitations under the License. */

 #include <memory>

+#include "paddle/phi/api/include/dll_decl.h"
 #include "paddle/phi/common/data_type.h"
 #include "paddle/phi/common/place.h"
 #include "paddle/phi/core/allocator.h"
@@ -30,7 +31,7 @@ class TensorBase;
 * All kernels must access the interfaces provided by the backend through
 * DeviceContext.
 */
-class DeviceContext {
+class PADDLE_API DeviceContext {
  using DataType = paddle::experimental::DataType;

 public:

--- a/python/paddle/fluid/tests/custom_op/CMakeLists.txt
+++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt
@@ -3,11 +3,13 @@ if(WITH_GPU OR APPLE)
    py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py)
    py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py)
    py_test(test_custom_relu_model SRCS test_custom_relu_model.py)
+    py_test(test_context_pool SRCS test_context_pool.py)

    # Compiling shared library will cost some time, but running process is very fast.
    set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250)
    set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180)
    set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180)
+    set_tests_properties(test_context_pool PROPERTIES TIMEOUT 180)
 endif()

 py_test(test_custom_raw_op_kernel_op SRCS test_custom_raw_op_kernel_op.py)

--- a/python/paddle/fluid/tests/custom_op/context_pool_test_op.cc
+++ b/python/paddle/fluid/tests/custom_op/context_pool_test_op.cc
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <iostream>
+
+#include "paddle/extension.h"
+#include "paddle/phi/backends/all_context.h"
+
+#define CHECK_INPUT(x) \
+  PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
+
+std::vector<paddle::Tensor> ContextPoolTest(const paddle::Tensor& x) {
+  // 1. test cpu context
+  paddle::experimental::Place cpu_place(
+      paddle::experimental::AllocationType::CPU);
+  auto* cpu_ctx =
+      paddle::experimental::DeviceContextPool::Instance()
+          .Get<paddle::experimental::AllocationType::CPU>(cpu_place);
+  PD_CHECK(cpu_ctx->GetPlace() == cpu_place);
+  // if want to use the eigen_device here, need to include eigen headers
+  auto* cpu_eigen_device = cpu_ctx->eigen_device();
+  PD_CHECK(cpu_eigen_device != nullptr);
+
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+  // 2. test gpu context
+  paddle::experimental::Place gpu_place(
+      paddle::experimental::AllocationType::GPU);
+  auto* gpu_ctx =
+      paddle::experimental::DeviceContextPool::Instance()
+          .Get<paddle::experimental::AllocationType::GPU>(gpu_place);
+  PD_CHECK(gpu_ctx->GetPlace() == gpu_place);
+  // if want to use the eigen_device here, need to include eigen headers
+  auto* gpu_eigen_device = gpu_ctx->eigen_device();
+  PD_CHECK(gpu_eigen_device != nullptr);
+#endif
+
+  return {x};
+}
+
+PD_BUILD_OP(context_pool_test)
+    .Inputs({"X"})
+    .Outputs({"Out"})
+    .SetKernelFn(PD_KERNEL(ContextPoolTest));
--- a/python/paddle/fluid/tests/custom_op/ps_usr_print_log
+++ b/python/paddle/fluid/tests/custom_op/ps_usr_print_log
--- a/python/paddle/fluid/tests/custom_op/test_context_pool.py
+++ b/python/paddle/fluid/tests/custom_op/test_context_pool.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import unittest
+import numpy as np
+
+import paddle
+from paddle.utils.cpp_extension import load, get_build_directory
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args
+from paddle.utils.cpp_extension.extension_utils import run_cmd
+from paddle.fluid.framework import _test_eager_guard
+
+# Because Windows don't use docker, the shared lib already exists in the
+# cache dir, it will not be compiled again unless the shared lib is removed.
+file = '{}\\context_pool_jit\\context_pool_jit.pyd'.format(get_build_directory(
+))
+if os.name == 'nt' and os.path.isfile(file):
+    cmd = 'del {}'.format(file)
+    run_cmd(cmd, True)
+
+# Compile and load custom op Just-In-Time.
+custom_ops = load(
+    name='context_pool_jit',
+    sources=['context_pool_test_op.cc'],
+    extra_include_paths=paddle_includes,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cflags
+    extra_cuda_cflags=extra_nvcc_args,  # test for cflags
+    verbose=True)
+
+
+class TestContextPool(unittest.TestCase):
+    def setUp(self):
+        self.devices = ['cpu']
+        if paddle.is_compiled_with_cuda():
+            self.devices.append('gpu')
+
+    def use_context_pool(self):
+        x = paddle.ones([2, 2], dtype='float32')
+        out = custom_ops.context_pool_test(x)
+
+        self.assertTrue(np.array_equal(x.numpy(), out.numpy()))
+
+    def test_using_context_pool(self):
+        with _test_eager_guard():
+            self.use_context_pool()
+        self.use_context_pool()
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -543,6 +543,9 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
        runtime_library_dirs.extend(find_paddle_libraries(use_cuda))
        kwargs['runtime_library_dirs'] = runtime_library_dirs

+    if compile_dir is None:
+        # Add this compile option to isolate fluid headers
+        add_compile_flag(extra_compile_args, ['-DPADDLE_WITH_CUSTOM_KERNEL'])
    kwargs['extra_compile_args'] = extra_compile_args

    kwargs['language'] = 'c++'