未验证 提交 bfacd706 编写于 作者: W wawltor 提交者: GitHub

add the uva function for the Tensor (#38950)

* add the uva api for the tensor

* fix the compiler problem for the uva

* fix the example for the _uva

* fix the compile problem in the pten library

* update the enviroment support for the uva

* use the make_shared replace the shared_ptr
上级 df898f8b
...@@ -1870,6 +1870,61 @@ void BindImperative(py::module *m_ptr) { ...@@ -1870,6 +1870,61 @@ void BindImperative(py::module *m_ptr) {
#endif #endif
}, },
py::return_value_policy::reference) py::return_value_policy::reference)
#if defined(PADDLE_WITH_CUDA)
.def("_uva",
[](const std::shared_ptr<imperative::VarBase> &self, int device_id) {
PADDLE_ENFORCE_EQ(platform::is_cpu_place(self->Place()), true,
platform::errors::InvalidArgument(
"Unified virtual addressing only support "
"CPU Tensor currently."));
platform::DeviceContextPool &pool =
platform::DeviceContextPool::Instance();
auto *dev_ctx = pool.Get(platform::CUDAPlace(device_id));
VLOG(4) << "Init the DeviceContext, and the place is "
<< dev_ctx->GetPlace();
auto *self_tensor =
self->MutableVar()->GetMutable<framework::LoDTensor>();
// Register the cpu memory as the cuda host memory
const auto &data_numel = self_tensor->numel();
const size_t &need_allocate_size =
data_numel * framework::SizeOfType(self_tensor->type());
void *data_ptr = self_tensor->data();
auto result = cudaHostRegister(data_ptr, need_allocate_size,
cudaHostRegisterDefault);
if (cudaSuccess != result) {
VLOG(4) << "UVA(unified virtual addressing) failed allocate:"
<< need_allocate_size << ", the error code:" << result;
}
// Get device pointer from the function of cudaHostGetDevicePointer
void *cuda_device_pointer = nullptr;
cudaHostGetDevicePointer(
reinterpret_cast<void **>(&cuda_device_pointer),
reinterpret_cast<void *>(data_ptr), 0);
// Reset the memory with device pointer
std::shared_ptr<memory::allocation::Allocation> holder =
std::make_shared<memory::allocation::Allocation>(
cuda_device_pointer, need_allocate_size,
platform::CUDAPlace(device_id));
self_tensor->ResetHolderWithType(holder, self_tensor->type());
},
py::arg("device_id") = 0, py::return_value_policy::reference, R"DOC(
Returns self tensor with the UVA(unified virtual addressing).
Args:
device_id(int, optional): The destination GPU device id. Default: None, means current device.
Examples:
.. code-block:: python
# required: gpu
import paddle
x = paddle.to_tensor([1, 2, 3], place=paddle.CPUPlace())
x._uva()
print(x)
)DOC")
#endif
.def("copy_", &imperative::VarBase::CopyFrom) .def("copy_", &imperative::VarBase::CopyFrom)
.def("_copy_to", .def("_copy_to",
[](const std::shared_ptr<imperative::VarBase> &self, [](const std::shared_ptr<imperative::VarBase> &self,
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import unittest
import numpy as np
from paddle.fluid.core import LoDTensor as Tensor
class TestTensorCopyFrom(unittest.TestCase):
def test_main(self):
if paddle.fluid.core.is_compiled_with_cuda():
place = paddle.CPUPlace()
np_value = np.random.random(size=[10, 30]).astype('float32')
tensor = paddle.to_tensor(np_value, place=place)
tensor._uva()
self.assertTrue(tensor.place.is_gpu_place())
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册