add the uva function for the Tensor (#38950)

* add the uva api for the tensor * fix the compiler problem for the uva * fix the example for the _uva * fix the compile problem in the pten library * update the enviroment support for the uva * use the make_shared replace the shared_ptr

add the uva function for the Tensor (#38950)
* add the uva api for the tensor * fix the compiler problem for the uva * fix the example for the _uva * fix the compile problem in the pten library * update the enviroment support for the uva * use the make_shared replace the shared_ptr
bfacd706 · wawltor · GitHub · df898f8b · bfacd706 · bfacd706
Showing with 87 addition and 0 deletion

paddle/fluid/pybind/imperative.cc paddle/fluid/pybind/imperative.cc +55 -0

python/paddle/fluid/tests/unittests/test_tensor_uva.py python/paddle/fluid/tests/unittests/test_tensor_uva.py +32 -0

未找到文件。
--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -1870,6 +1870,61 @@ void BindImperative(py::module *m_ptr) {
 #endif
           },
           py::return_value_policy::reference)
+#if defined(PADDLE_WITH_CUDA)
+      .def("_uva",
+           [](const std::shared_ptr<imperative::VarBase> &self, int device_id) {
+             PADDLE_ENFORCE_EQ(platform::is_cpu_place(self->Place()), true,
+                               platform::errors::InvalidArgument(
+                                   "Unified virtual addressing only support "
+                                   "CPU Tensor currently."));
+             platform::DeviceContextPool &pool =
+                 platform::DeviceContextPool::Instance();
+             auto *dev_ctx = pool.Get(platform::CUDAPlace(device_id));
+             VLOG(4) << "Init the DeviceContext, and the place is "
+                     << dev_ctx->GetPlace();
+             auto *self_tensor =
+                 self->MutableVar()->GetMutable<framework::LoDTensor>();
+             // Register the cpu memory as the cuda host memory
+             const auto &data_numel = self_tensor->numel();
+             const size_t &need_allocate_size =
+                 data_numel * framework::SizeOfType(self_tensor->type());
+             void *data_ptr = self_tensor->data();
+             auto result = cudaHostRegister(data_ptr, need_allocate_size,
+                                            cudaHostRegisterDefault);
+             if (cudaSuccess != result) {
+               VLOG(4) << "UVA(unified virtual addressing) failed allocate:"
+                       << need_allocate_size << ", the error code:" << result;
+             }
+
+             // Get device pointer from the function of cudaHostGetDevicePointer
+             void *cuda_device_pointer = nullptr;
+             cudaHostGetDevicePointer(
+                 reinterpret_cast<void **>(&cuda_device_pointer),
+                 reinterpret_cast<void *>(data_ptr), 0);
+
+             // Reset the memory with device pointer
+             std::shared_ptr<memory::allocation::Allocation> holder =
+                 std::make_shared<memory::allocation::Allocation>(
+                     cuda_device_pointer, need_allocate_size,
+                     platform::CUDAPlace(device_id));
+             self_tensor->ResetHolderWithType(holder, self_tensor->type());
+           },
+           py::arg("device_id") = 0, py::return_value_policy::reference, R"DOC(
+        Returns self tensor with the UVA(unified virtual addressing).
+
+        Args:
+            device_id(int, optional): The destination GPU device id. Default: None, means current device.
+
+        Examples:
+            .. code-block:: python
+
+              # required: gpu
+              import paddle
+              x = paddle.to_tensor([1, 2, 3], place=paddle.CPUPlace())
+              x._uva()
+              print(x)
+       )DOC")
+#endif
      .def("copy_", &imperative::VarBase::CopyFrom)
      .def("_copy_to",
           [](const std::shared_ptr<imperative::VarBase> &self,

--- a/python/paddle/fluid/tests/unittests/test_tensor_uva.py
+++ b/python/paddle/fluid/tests/unittests/test_tensor_uva.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import unittest
+import numpy as np
+from paddle.fluid.core import LoDTensor as Tensor
+
+
+class TestTensorCopyFrom(unittest.TestCase):
+    def test_main(self):
+        if paddle.fluid.core.is_compiled_with_cuda():
+            place = paddle.CPUPlace()
+            np_value = np.random.random(size=[10, 30]).astype('float32')
+            tensor = paddle.to_tensor(np_value, place=place)
+            tensor._uva()
+            self.assertTrue(tensor.place.is_gpu_place())
+
+
+if __name__ == "__main__":
+    unittest.main()