diff --git a/paddle/phi/core/CMakeLists.txt b/paddle/phi/core/CMakeLists.txt index d3c206c99dc225f240c0d0af75b843d94195a0b2..f4f57a0acbbb386a3642a05e0d0dc70cd082a4d8 100644 --- a/paddle/phi/core/CMakeLists.txt +++ b/paddle/phi/core/CMakeLists.txt @@ -16,13 +16,13 @@ cc_library(tensor_base SRCS tensor_base.cc allocator.cc DEPS phi_enforce) cc_library(tensor_meta SRCS tensor_meta.cc DEPS phi_enforce) cc_library(lod_utils SRCS lod_utils.cc DEPS phi_enforce) -cc_library(phi_device_context SRCS device_context.cc DEPS tensor_base) cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS fluid_convert_utils tensor_meta tensor_base) cc_library(sparse_coo_tensor SRCS sparse_coo_tensor.cc DEPS tensor_meta tensor_base) cc_library(sparse_csr_tensor SRCS sparse_csr_tensor.cc DEPS dense_tensor tensor_base) cc_library(meta_tensor SRCS meta_tensor.cc DEPS tensor_base tensor_meta dense_tensor) cc_library(infermeta_utils SRCS infermeta_utils.cc DEPS meta_tensor) +cc_library(phi_device_context SRCS device_context.cc DEPS dense_tensor selected_rows) cc_library(selected_rows SRCS selected_rows_impl.cc DEPS dense_tensor phi_enforce ddim memcpy) cc_library(phi_custom_kernel SRCS custom_kernel.cc DEPS kernel_factory convert_utils) diff --git a/paddle/phi/core/dense_tensor.cc b/paddle/phi/core/dense_tensor.cc index 44cb63e2b874bd2df9b034ecf9f03053d1888c94..7a0f50533360d71e8cd025a520d753c366c08edb 100644 --- a/paddle/phi/core/dense_tensor.cc +++ b/paddle/phi/core/dense_tensor.cc @@ -94,9 +94,9 @@ void* DenseTensor::AllocateFrom(Allocator* allocator, bytes)); bytes = requested_size; } - // TODO(paddle-dev): In case of the allocator of storage_ is different with - // the incoming allocator, we should re-alloc data using the incoming - // allocator. + // NOTE(paddle-dev): In case of the allocator of storage_ is different with + // the incoming allocator, we will re-alloc data using the incoming + // allocator. See DeviceContext.Alloc in core/device_context.cc. if (!holder_ || holder_->size() < bytes + meta_.offset) { meta_.offset = 0; VLOG(10) << "Allocate data with bytes: " << bytes; diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc index 9c1d85251f8926141341ee6b8c15e29164894ee7..b139eb99dd4846adb3f7ef3a27507a2ca4478e6d 100644 --- a/paddle/phi/core/device_context.cc +++ b/paddle/phi/core/device_context.cc @@ -13,8 +13,9 @@ // limitations under the License. #include "paddle/phi/core/device_context.h" +#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/tensor_base.h" +#include "paddle/phi/core/selected_rows.h" namespace phi { using DataType = paddle::experimental::DataType; @@ -72,6 +73,7 @@ struct DeviceContext::Impl { } void* Alloc(TensorBase* tensor, + const Place& place, DataType dtype = DataType::UNDEFINED, size_t requested_size = 0) const { PADDLE_ENFORCE_NOT_NULL( @@ -81,6 +83,12 @@ struct DeviceContext::Impl { if (dtype == DataType::UNDEFINED) { dtype = tensor->dtype(); } + // NOTE(paddle-dev): In case of tensor has already hold allocation and + // is going to allocate allocation on new place, we will clear its holder + // firstly and then re-alloc it. + if (tensor->initialized() && tensor->place() != place) { + ClearHolder(tensor); + } auto* allocator = tensor->numel() == 0 ? zero_allocator_ : device_allocator_; return tensor->AllocateFrom( @@ -88,9 +96,11 @@ struct DeviceContext::Impl { } template - T* Alloc(TensorBase* tensor, size_t requested_size = 0) const { + T* Alloc(TensorBase* tensor, + const Place& place, + size_t requested_size = 0) const { DataType dtype = paddle::experimental::CppTypeToDataType::Type(); - return static_cast(Alloc(tensor, dtype, requested_size)); + return static_cast(Alloc(tensor, place, dtype, requested_size)); } void* HostAlloc(TensorBase* tensor, @@ -103,6 +113,9 @@ struct DeviceContext::Impl { if (dtype == DataType::UNDEFINED) { dtype = tensor->dtype(); } + if (tensor->initialized() && tensor->place() != CPUPlace()) { + ClearHolder(tensor); + } auto* allocator = tensor->numel() == 0 ? zero_allocator_ : host_allocator_; return tensor->AllocateFrom( const_cast(allocator), dtype, requested_size); @@ -147,6 +160,19 @@ struct DeviceContext::Impl { } private: + void ClearHolder(TensorBase* tensor) const { + if (!tensor->initialized()) return; + + if (DenseTensor::classof(tensor)) { + static_cast(tensor)->clear(); + } else if (SelectedRows::classof(tensor)) { + static_cast(tensor)->mutable_value()->clear(); + } else { + PADDLE_THROW(errors::Unimplemented( + "Only support DenseTensor and SelectedRows now.")); + } + } + const Allocator* device_allocator_{nullptr}; const Allocator* host_allocator_{nullptr}; const Allocator* zero_allocator_{nullptr}; @@ -168,7 +194,7 @@ DeviceContext::DeviceContext(DeviceContext&& other) { impl_ = std::move(other.impl_); } -DeviceContext& DeviceContext::operator=(DeviceContext&&) = default; +DeviceContext& DeviceContext::operator=(DeviceContext&& other) = default; DeviceContext::~DeviceContext() = default; @@ -199,12 +225,12 @@ const Allocator& DeviceContext::GetZeroAllocator() const { void* DeviceContext::Alloc(TensorBase* tensor, DataType dtype, size_t requested_size) const { - return impl_->Alloc(tensor, dtype, requested_size); + return impl_->Alloc(tensor, GetPlace(), dtype, requested_size); } template T* DeviceContext::Alloc(TensorBase* tensor, size_t requested_size) const { - return impl_->Alloc(tensor, requested_size); + return impl_->Alloc(tensor, GetPlace(), requested_size); } void* DeviceContext::HostAlloc(TensorBase* tensor,