diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index d67d414b7dda8de83b7c3e129eeb2c4edf996183..691189b2e4cc3ef75a7d5d72cb8f7e555c1c3769 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -1108,6 +1108,48 @@ void TensorFromDLPack(const ::DLTensor& dl_tensor, phi::DenseTensor* dst) { #endif } +void TensorFromDLPack(const DLManagedTensor* src, phi::DenseTensor* dst) { + std::vector vec; + std::copy(src->dl_tensor.shape, + src->dl_tensor.shape + src->dl_tensor.ndim, + std::back_inserter(vec)); + + framework::DDim vddim = phi::make_ddim(vec); + dst->Resize(vddim); + ::DLDataType type = src->dl_tensor.dtype; + + auto src_ptr = static_cast(src->dl_tensor.data); + auto size = phi::product(vddim) * type.bits / 8; + + if (src->dl_tensor.device.device_type == kDLCPU) { + platform::CPUPlace dst_place = platform::CPUPlace(); + platform::CPUPlace src_place = platform::CPUPlace(); + void* dst_ptr = GetDstPtrByDLDataType(type, dst, dst_place); + memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size); + } +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + if (src->dl_tensor.device.device_type == kDLGPU) { + platform::CUDAPlace dst_place = + platform::CUDAPlace(src->dl_tensor.device.device_id); + platform::CUDAPlace src_place = + platform::CUDAPlace(src->dl_tensor.device.device_id); + void* dst_ptr = GetDstPtrByDLDataType(type, dst, dst_place); + auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(dst_place); + // Fix copy by share allocation. + memory::Copy(dst_place, + dst_ptr, + src_place, + src_ptr, + size, + reinterpret_cast(*ctx).stream()); + } +#endif + src->deleter(const_cast(src)); +#ifdef PADDLE_WITH_XPU + PADDLE_THROW(platform::errors::Unimplemented("XPUPlace is not supported")); +#endif +} + template std::string format_tensor(const phi::DenseTensor& tensor) { // TODO(zhiqiu): use the print option to format tensor. diff --git a/paddle/fluid/framework/tensor_util.h b/paddle/fluid/framework/tensor_util.h index 3466b33c828d71df1e350afa22d295c327c41b96..603239067b160e483480abe57e38a629f56d9c32 100644 --- a/paddle/fluid/framework/tensor_util.h +++ b/paddle/fluid/framework/tensor_util.h @@ -112,7 +112,9 @@ template void TesnorToVector(const phi::DenseTensor& src, std::vector* dst); // convert dlpack's DLTensor to tensor + void TensorFromDLPack(const ::DLTensor& dl_tensor, phi::DenseTensor* dst); +void TensorFromDLPack(const DLManagedTensor* src, phi::DenseTensor* dst); // // The implementation of template functions. diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index e55ecefb4a6abd2b2bad136036f0cb09da4d0915..28411124c4019565c19937d83457eb8ce0fc78be 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -703,11 +703,11 @@ PYBIND11_MODULE(libpaddle, m) { phi::DenseTensor tensor; if (dl.device.device_type == kDLCPU) { - paddle::framework::TensorFromDLPack(dl, &tensor); + paddle::framework::TensorFromDLPack(dmt, &tensor); } #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (dl.device.device_type == kDLGPU) { - paddle::framework::TensorFromDLPack(dl, &tensor); + paddle::framework::TensorFromDLPack(dmt, &tensor); } #endif return tensor; diff --git a/python/paddle/tests/test_dlpack.py b/python/paddle/tests/test_dlpack.py index 5e4b8d2ef20b60fbc4897a1a1193accd046dcbe0..b25281cea0cc1f1432aec1a8388fb8b1b1e877a0 100644 --- a/python/paddle/tests/test_dlpack.py +++ b/python/paddle/tests/test_dlpack.py @@ -123,6 +123,14 @@ class TestDLPack(unittest.TestCase): self.func_test_dlpack_dtype_conversion() self.func_test_dlpack_dtype_conversion() + def test_dlpack_deletion(self): + # See Paddle issue 47171 + if paddle.is_compiled_with_cuda(): + for i in range(80): + a = paddle.rand(shape=[1024 * 128, 1024], dtype="float32") + dlpack = paddle.utils.dlpack.to_dlpack(a) + b = paddle.utils.dlpack.from_dlpack(dlpack) + class TestRaiseError(unittest.TestCase): def func_test_from_dlpack_raise_type_error(self):