support complex data types for libpaddle.Tensor's element get and set (#52324)

1. add type caster for paddle's complex type, to allow pybind to automatically cast it with python's complex type; 2. add complex64 and complex128 data type for `libpaddle.Tensor`'s element get and set(which is required to perturb an element to get the numerical derivative) 3. add support for cuda pinned place in `libpaddle.Tensor` element get and set --- 4. fix a bug in op code generation.(Creation of output folder in concurrent with parsing op yamls.)

support complex data types for libpaddle.Tensor's element get and set (#52324)
1. add type caster for paddle's complex type, to allow pybind to automatically cast it with python's complex type; 2. add complex64 and complex128 data type for `libpaddle.Tensor`'s element get and set(which is required to perturb an element to get the numerical derivative) 3. add support for cuda pinned place in `libpaddle.Tensor` element get and set --- 4. fix a bug in op code generation.(Creation of output folder in concurrent with parsing op yamls.)
13b12457 · Feiyu Chan · GitHub · 40b30f50 · 13b12457 · 13b12457
5 changed file
--- a/paddle/fluid/operators/generator/CMakeLists.txt
+++ b/paddle/fluid/operators/generator/CMakeLists.txt
@@ -56,6 +56,14 @@ set(generated_static_argument_mapping_path
 set(generated_sparse_argument_mapping_path
    ${CMAKE_SOURCE_DIR}/paddle/phi/ops/compat/generated_sparse_sig.cc)

+execute_process(
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
+  COMMAND ${CMAKE_COMMAND} -E make_directory ${parsed_op_dir} RESULTS_VARIABLE
+          _result)
+if(${_result})
+  message(FATAL_ERROR "Failed to create folder for parsed op yamls, exiting.")
+endif()
+
 message(
  "parse op yamls:
 - ${op_yaml_file}
@@ -66,7 +74,6 @@ message(
 - ${static_op_yaml_file}")
 execute_process(
  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/paddle/fluid/operators/generator
-  COMMAND ${CMAKE_COMMAND} -E make_directory ${parsed_op_dir}
  COMMAND ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${op_yaml_file}
          --output_path ./parsed_ops/ops.parsed.yaml
  COMMAND ${PYTHON_EXECUTABLE} parse_op.py --op_yaml_path ${legacy_op_yaml_file}

--- a/paddle/fluid/pybind/complex.h
+++ b/paddle/fluid/pybind/complex.h
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+    This file is adpated from
+   https://github.com/pybind/pybind11/blob/master/include/pybind11/complex.h.
+    The original license is kept as-is:
+
+    pybind11/complex.h: Complex number support
+
+    Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#pragma once
+
+#include <Python.h>
+#include "paddle/phi/common/complex.h"
+#include "pybind11/pybind11.h"
+
+/// glibc defines I as a macro which breaks things, e.g., boost template names
+#ifdef I
+#undef I
+#endif
+
+PYBIND11_NAMESPACE_BEGIN(PYBIND11_NAMESPACE)
+
+PYBIND11_NAMESPACE_BEGIN(detail)
+
+// The specialization is added to make phi::dtype::complex<T> values
+// casted as python complex values automatically when return from a function
+// exported to python via pybind.
+// For more details about custom type casters, see
+// https://pybind11.readthedocs.io/en/stable/advanced/cast/custom.html
+template <typename T>
+class type_caster<phi::dtype::complex<T>> {
+ public:
+  bool load(handle src, bool convert) {
+    if (!src) return false;
+    if (!convert && !PyComplex_Check(src.ptr())) return false;
+    Py_complex result = PyComplex_AsCComplex(src.ptr());
+    if (result.real == -1.0 && PyErr_Occurred()) {
+      PyErr_Clear();
+      return false;
+    }
+    value = phi::dtype::complex<T>(static_cast<T>(result.real),
+                                   static_cast<T>(result.imag));
+    return true;
+  }
+
+  static handle cast(const phi::dtype::complex<T> &src,
+                     return_value_policy /* policy */,
+                     handle /* parent */) {
+    return PyComplex_FromDoubles(static_cast<double>(src.real),
+                                 static_cast<double>(src.imag));
+  }
+
+  PYBIND11_TYPE_CASTER(phi::dtype::complex<T>, _("complex"));
+};
+PYBIND11_NAMESPACE_END(detail)
+PYBIND11_NAMESPACE_END(PYBIND11_NAMESPACE)
--- a/paddle/fluid/pybind/tensor.cc
+++ b/paddle/fluid/pybind/tensor.cc
@@ -178,6 +178,7 @@ limitations under the License. */

 #include "paddle/fluid/eager/api/utils/global_utils.h"
 #include "paddle/fluid/imperative/layout_autotune.h"
+#include "paddle/fluid/pybind/complex.h"
 #include "paddle/fluid/pybind/eager_utils.h"
 #include "paddle/fluid/pybind/tensor.h"
 #include "paddle/phi/api/ext/op_meta_info.h"
@@ -493,6 +494,10 @@ void BindTensor(pybind11::module &m) {  // NOLINT
      .def("_get_float_element", TensorGetElement<float>)
      .def("_set_double_element", TensorSetElement<double>)
      .def("_get_double_element", TensorGetElement<double>)
+      .def("_set_complex64_element", TensorSetElement<paddle::complex64>)
+      .def("_get_complex64_element", TensorGetElement<paddle::complex64>)
+      .def("_set_complex128_element", TensorSetElement<paddle::complex128>)
+      .def("_get_complex128_element", TensorGetElement<paddle::complex128>)
      .def("_place", [](phi::DenseTensor &self) { return self.place(); })
      .def("_dtype",
           [](phi::DenseTensor &self) {

--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@@ -284,7 +284,8 @@ T TensorGetElement(const phi::DenseTensor &self, size_t offset) {
    auto p = self.place();
    paddle::memory::Copy(platform::CPUPlace(), &b, p, a + offset, sizeof(T));
 #endif
-  } else if (platform::is_gpu_place(self.place())) {
+  } else if (platform::is_gpu_place(self.place()) ||
+             platform::is_cuda_pinned_place(self.place())) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
    const T *a = self.data<T>();
    auto p = self.place();
@@ -334,7 +335,8 @@ void TensorSetElement(phi::DenseTensor *self, size_t offset, T elem) {
    T *a = self->mutable_data<T>(p);
    paddle::memory::Copy(p, a + offset, platform::CPUPlace(), &elem, sizeof(T));
 #endif
-  } else if (platform::is_gpu_place(self->place())) {
+  } else if (platform::is_gpu_place(self->place()) ||
+             platform::is_cuda_pinned_place(self->place())) {
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
    auto p = self->place();
    T *a = self->mutable_data<T>(p);

--- a/python/paddle/fluid/tests/unittests/test_tensor.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor.py
@@ -41,6 +41,8 @@ class TestTensor(unittest.TestCase):
            'float16',
            'float32',
            'float64',
+            'complex64',
+            'complex128',
        ]

    def test_int_tensor(self):
@@ -106,6 +108,58 @@ class TestTensor(unittest.TestCase):
                cuda_tensor_array_2.all(), tensor_array.all()
            )

+    def test_complex64_tensor(self):
+        scope = core.Scope()
+        var = scope.var("complex64_tensor")
+        cpu_tensor = var.get_tensor()
+        tensor_array = (
+            np.random.uniform(-1, 1, (100, 200))
+            + 1j * np.random.uniform(-1, 1, (100, 200))
+        ).astype(np.complex64)
+        place = core.CPUPlace()
+        cpu_tensor.set(tensor_array, place)
+        cpu_tensor_array_2 = np.array(cpu_tensor)
+        self.assertAlmostEqual(cpu_tensor_array_2.all(), tensor_array.all())
+
+        if core.is_compiled_with_cuda():
+            cuda_tensor = var.get_tensor()
+            tensor_array = (
+                np.random.uniform(-1, 1, (100, 200))
+                + 1j * np.random.uniform(-1, 1, (100, 200))
+            ).astype(np.complex64)
+            place = core.CUDAPlace(0)
+            cuda_tensor.set(tensor_array, place)
+            cuda_tensor_array_2 = np.array(cuda_tensor)
+            self.assertAlmostEqual(
+                cuda_tensor_array_2.all(), tensor_array.all()
+            )
+
+    def test_complex128_tensor(self):
+        scope = core.Scope()
+        var = scope.var("complex128_tensor")
+        cpu_tensor = var.get_tensor()
+        tensor_array = (
+            np.random.uniform(-1, 1, (100, 200))
+            + 1j * np.random.uniform(-1, 1, (100, 200))
+        ).astype(np.complex128)
+        place = core.CPUPlace()
+        cpu_tensor.set(tensor_array, place)
+        cpu_tensor_array_2 = np.array(cpu_tensor)
+        self.assertAlmostEqual(cpu_tensor_array_2.all(), tensor_array.all())
+
+        if core.is_compiled_with_cuda():
+            cuda_tensor = var.get_tensor()
+            tensor_array = (
+                np.random.uniform(-1, 1, (100, 200))
+                + 1j * np.random.uniform(-1, 1, (100, 200))
+            ).astype(np.complex128)
+            place = core.CUDAPlace(0)
+            cuda_tensor.set(tensor_array, place)
+            cuda_tensor_array_2 = np.array(cuda_tensor)
+            self.assertAlmostEqual(
+                cuda_tensor_array_2.all(), tensor_array.all()
+            )
+
    def test_int_lod_tensor(self):
        place = core.CPUPlace()
        scope = core.Scope()
@@ -385,6 +439,71 @@ class TestTensor(unittest.TestCase):

        self.assertIsNotNone(exception)

+    def test_tensor_set_item_complex128(self):
+        array = (
+            np.random.random((100, 100)) + 1j * np.random.random((100, 100))
+        ).astype(np.complex128)
+        tensor = fluid.Tensor()
+        place = core.CPUPlace()
+        tensor.set(array, place)
+
+        self.assertEqual(tensor._dtype(), core.VarDesc.VarType.COMPLEX128)
+        tensor._set_complex128_element(0, 42.1 + 42.1j)
+        np.testing.assert_allclose(
+            tensor._get_complex128_element(0), 42.1 + 42.1j
+        )
+
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            tensor.set(array, place)
+            self.assertEqual(tensor._dtype(), core.VarDesc.VarType.COMPLEX128)
+            tensor._set_complex128_element(0, 42.1 + 42.1j)
+            np.testing.assert_allclose(
+                tensor._get_complex128_element(0), 42.1 + 42.1j
+            )
+
+            place = core.CUDAPinnedPlace()
+            tensor.set(array, place)
+            self.assertEqual(tensor._dtype(), core.VarDesc.VarType.COMPLEX128)
+            tensor._set_complex128_element(0, 42.1 + 42.1j)
+            np.testing.assert_allclose(
+                tensor._get_complex128_element(0), 42.1 + 42.1j
+            )
+
+    def test_tensor_set_item_complex64(self):
+        array = (
+            np.random.random((100, 100)) + 1j * np.random.random((100, 100))
+        ).astype(np.complex64)
+        tensor = fluid.Tensor()
+        place = core.CPUPlace()
+        tensor.set(array, place)
+
+        self.assertEqual(tensor._dtype(), core.VarDesc.VarType.COMPLEX64)
+        tensor._set_complex64_element(0, 42.1 + 42.1j)
+        np.testing.assert_allclose(
+            np.complex64(tensor._get_complex64_element(0)),
+            np.complex64(42.1 + 42.1j),
+        )
+
+        if core.is_compiled_with_cuda():
+            place = core.CUDAPlace(0)
+            tensor.set(array, place)
+            self.assertEqual(tensor._dtype(), core.VarDesc.VarType.COMPLEX64)
+            tensor._set_complex64_element(0, 42.1 + 42.1j)
+            np.testing.assert_allclose(
+                np.complex64(tensor._get_complex64_element(0)),
+                np.complex64(42.1 + 42.1j),
+            )
+
+            place = core.CUDAPinnedPlace()
+            tensor.set(array, place)
+            self.assertEqual(tensor._dtype(), core.VarDesc.VarType.COMPLEX64)
+            tensor._set_complex64_element(0, 42.1 + 42.1j)
+            np.testing.assert_allclose(
+                np.complex64(tensor._get_complex64_element(0)),
+                np.complex64(42.1 + 42.1j),
+            )
+

 if __name__ == '__main__':
    unittest.main()