diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 3650b44ed0a85bd498ff28642660aeb2fefa4ee3..d5ec5ee9c2fc1e1688810aa0a6ad9d43537a2fa4 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -1870,6 +1870,61 @@ void BindImperative(py::module *m_ptr) { #endif }, py::return_value_policy::reference) +#if defined(PADDLE_WITH_CUDA) + .def("_uva", + [](const std::shared_ptr &self, int device_id) { + PADDLE_ENFORCE_EQ(platform::is_cpu_place(self->Place()), true, + platform::errors::InvalidArgument( + "Unified virtual addressing only support " + "CPU Tensor currently.")); + platform::DeviceContextPool &pool = + platform::DeviceContextPool::Instance(); + auto *dev_ctx = pool.Get(platform::CUDAPlace(device_id)); + VLOG(4) << "Init the DeviceContext, and the place is " + << dev_ctx->GetPlace(); + auto *self_tensor = + self->MutableVar()->GetMutable(); + // Register the cpu memory as the cuda host memory + const auto &data_numel = self_tensor->numel(); + const size_t &need_allocate_size = + data_numel * framework::SizeOfType(self_tensor->type()); + void *data_ptr = self_tensor->data(); + auto result = cudaHostRegister(data_ptr, need_allocate_size, + cudaHostRegisterDefault); + if (cudaSuccess != result) { + VLOG(4) << "UVA(unified virtual addressing) failed allocate:" + << need_allocate_size << ", the error code:" << result; + } + + // Get device pointer from the function of cudaHostGetDevicePointer + void *cuda_device_pointer = nullptr; + cudaHostGetDevicePointer( + reinterpret_cast(&cuda_device_pointer), + reinterpret_cast(data_ptr), 0); + + // Reset the memory with device pointer + std::shared_ptr holder = + std::make_shared( + cuda_device_pointer, need_allocate_size, + platform::CUDAPlace(device_id)); + self_tensor->ResetHolderWithType(holder, self_tensor->type()); + }, + py::arg("device_id") = 0, py::return_value_policy::reference, R"DOC( + Returns self tensor with the UVA(unified virtual addressing). + + Args: + device_id(int, optional): The destination GPU device id. Default: None, means current device. + + Examples: + .. code-block:: python + + # required: gpu + import paddle + x = paddle.to_tensor([1, 2, 3], place=paddle.CPUPlace()) + x._uva() + print(x) + )DOC") +#endif .def("copy_", &imperative::VarBase::CopyFrom) .def("_copy_to", [](const std::shared_ptr &self, diff --git a/python/paddle/fluid/tests/unittests/test_tensor_uva.py b/python/paddle/fluid/tests/unittests/test_tensor_uva.py new file mode 100644 index 0000000000000000000000000000000000000000..98895202c042ed2cd6b0f30079a65a1a6accfd72 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_tensor_uva.py @@ -0,0 +1,32 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import unittest +import numpy as np +from paddle.fluid.core import LoDTensor as Tensor + + +class TestTensorCopyFrom(unittest.TestCase): + def test_main(self): + if paddle.fluid.core.is_compiled_with_cuda(): + place = paddle.CPUPlace() + np_value = np.random.random(size=[10, 30]).astype('float32') + tensor = paddle.to_tensor(np_value, place=place) + tensor._uva() + self.assertTrue(tensor.place.is_gpu_place()) + + +if __name__ == "__main__": + unittest.main()