未验证 提交 2753c16f 编写于 作者: A Aurelius84 提交者: GitHub

[Phi] Add ClearHolder when re-alloc on new place in DeviceContext (#39833)

* [Phi] Add ClearHolder when re-alloc on new place in DeviceContext

* fix hostAlloc

* foix inferRT unittest

* remove dev_ctx ptr
上级 282e09dc
...@@ -16,13 +16,13 @@ cc_library(tensor_base SRCS tensor_base.cc allocator.cc DEPS phi_enforce) ...@@ -16,13 +16,13 @@ cc_library(tensor_base SRCS tensor_base.cc allocator.cc DEPS phi_enforce)
cc_library(tensor_meta SRCS tensor_meta.cc DEPS phi_enforce) cc_library(tensor_meta SRCS tensor_meta.cc DEPS phi_enforce)
cc_library(lod_utils SRCS lod_utils.cc DEPS phi_enforce) cc_library(lod_utils SRCS lod_utils.cc DEPS phi_enforce)
cc_library(phi_device_context SRCS device_context.cc DEPS tensor_base)
cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS fluid_convert_utils tensor_meta tensor_base) cc_library(dense_tensor SRCS dense_tensor.cc dense_tensor_impl.cc DEPS fluid_convert_utils tensor_meta tensor_base)
cc_library(sparse_coo_tensor SRCS sparse_coo_tensor.cc DEPS tensor_meta tensor_base) cc_library(sparse_coo_tensor SRCS sparse_coo_tensor.cc DEPS tensor_meta tensor_base)
cc_library(sparse_csr_tensor SRCS sparse_csr_tensor.cc DEPS dense_tensor tensor_base) cc_library(sparse_csr_tensor SRCS sparse_csr_tensor.cc DEPS dense_tensor tensor_base)
cc_library(meta_tensor SRCS meta_tensor.cc DEPS tensor_base tensor_meta dense_tensor) cc_library(meta_tensor SRCS meta_tensor.cc DEPS tensor_base tensor_meta dense_tensor)
cc_library(infermeta_utils SRCS infermeta_utils.cc DEPS meta_tensor) cc_library(infermeta_utils SRCS infermeta_utils.cc DEPS meta_tensor)
cc_library(phi_device_context SRCS device_context.cc DEPS dense_tensor selected_rows)
cc_library(selected_rows SRCS selected_rows_impl.cc DEPS dense_tensor phi_enforce ddim memcpy) cc_library(selected_rows SRCS selected_rows_impl.cc DEPS dense_tensor phi_enforce ddim memcpy)
cc_library(phi_custom_kernel SRCS custom_kernel.cc DEPS kernel_factory convert_utils) cc_library(phi_custom_kernel SRCS custom_kernel.cc DEPS kernel_factory convert_utils)
......
...@@ -94,9 +94,9 @@ void* DenseTensor::AllocateFrom(Allocator* allocator, ...@@ -94,9 +94,9 @@ void* DenseTensor::AllocateFrom(Allocator* allocator,
bytes)); bytes));
bytes = requested_size; bytes = requested_size;
} }
// TODO(paddle-dev): In case of the allocator of storage_ is different with // NOTE(paddle-dev): In case of the allocator of storage_ is different with
// the incoming allocator, we should re-alloc data using the incoming // the incoming allocator, we will re-alloc data using the incoming
// allocator. // allocator. See DeviceContext.Alloc in core/device_context.cc.
if (!holder_ || holder_->size() < bytes + meta_.offset) { if (!holder_ || holder_->size() < bytes + meta_.offset) {
meta_.offset = 0; meta_.offset = 0;
VLOG(10) << "Allocate data with bytes: " << bytes; VLOG(10) << "Allocate data with bytes: " << bytes;
......
...@@ -13,8 +13,9 @@ ...@@ -13,8 +13,9 @@
// limitations under the License. // limitations under the License.
#include "paddle/phi/core/device_context.h" #include "paddle/phi/core/device_context.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/tensor_base.h" #include "paddle/phi/core/selected_rows.h"
namespace phi { namespace phi {
using DataType = paddle::experimental::DataType; using DataType = paddle::experimental::DataType;
...@@ -72,6 +73,7 @@ struct DeviceContext::Impl { ...@@ -72,6 +73,7 @@ struct DeviceContext::Impl {
} }
void* Alloc(TensorBase* tensor, void* Alloc(TensorBase* tensor,
const Place& place,
DataType dtype = DataType::UNDEFINED, DataType dtype = DataType::UNDEFINED,
size_t requested_size = 0) const { size_t requested_size = 0) const {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
...@@ -81,6 +83,12 @@ struct DeviceContext::Impl { ...@@ -81,6 +83,12 @@ struct DeviceContext::Impl {
if (dtype == DataType::UNDEFINED) { if (dtype == DataType::UNDEFINED) {
dtype = tensor->dtype(); dtype = tensor->dtype();
} }
// NOTE(paddle-dev): In case of tensor has already hold allocation and
// is going to allocate allocation on new place, we will clear its holder
// firstly and then re-alloc it.
if (tensor->initialized() && tensor->place() != place) {
ClearHolder(tensor);
}
auto* allocator = auto* allocator =
tensor->numel() == 0 ? zero_allocator_ : device_allocator_; tensor->numel() == 0 ? zero_allocator_ : device_allocator_;
return tensor->AllocateFrom( return tensor->AllocateFrom(
...@@ -88,9 +96,11 @@ struct DeviceContext::Impl { ...@@ -88,9 +96,11 @@ struct DeviceContext::Impl {
} }
template <typename T> template <typename T>
T* Alloc(TensorBase* tensor, size_t requested_size = 0) const { T* Alloc(TensorBase* tensor,
const Place& place,
size_t requested_size = 0) const {
DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type(); DataType dtype = paddle::experimental::CppTypeToDataType<T>::Type();
return static_cast<T*>(Alloc(tensor, dtype, requested_size)); return static_cast<T*>(Alloc(tensor, place, dtype, requested_size));
} }
void* HostAlloc(TensorBase* tensor, void* HostAlloc(TensorBase* tensor,
...@@ -103,6 +113,9 @@ struct DeviceContext::Impl { ...@@ -103,6 +113,9 @@ struct DeviceContext::Impl {
if (dtype == DataType::UNDEFINED) { if (dtype == DataType::UNDEFINED) {
dtype = tensor->dtype(); dtype = tensor->dtype();
} }
if (tensor->initialized() && tensor->place() != CPUPlace()) {
ClearHolder(tensor);
}
auto* allocator = tensor->numel() == 0 ? zero_allocator_ : host_allocator_; auto* allocator = tensor->numel() == 0 ? zero_allocator_ : host_allocator_;
return tensor->AllocateFrom( return tensor->AllocateFrom(
const_cast<Allocator*>(allocator), dtype, requested_size); const_cast<Allocator*>(allocator), dtype, requested_size);
...@@ -147,6 +160,19 @@ struct DeviceContext::Impl { ...@@ -147,6 +160,19 @@ struct DeviceContext::Impl {
} }
private: private:
void ClearHolder(TensorBase* tensor) const {
if (!tensor->initialized()) return;
if (DenseTensor::classof(tensor)) {
static_cast<DenseTensor*>(tensor)->clear();
} else if (SelectedRows::classof(tensor)) {
static_cast<SelectedRows*>(tensor)->mutable_value()->clear();
} else {
PADDLE_THROW(errors::Unimplemented(
"Only support DenseTensor and SelectedRows now."));
}
}
const Allocator* device_allocator_{nullptr}; const Allocator* device_allocator_{nullptr};
const Allocator* host_allocator_{nullptr}; const Allocator* host_allocator_{nullptr};
const Allocator* zero_allocator_{nullptr}; const Allocator* zero_allocator_{nullptr};
...@@ -168,7 +194,7 @@ DeviceContext::DeviceContext(DeviceContext&& other) { ...@@ -168,7 +194,7 @@ DeviceContext::DeviceContext(DeviceContext&& other) {
impl_ = std::move(other.impl_); impl_ = std::move(other.impl_);
} }
DeviceContext& DeviceContext::operator=(DeviceContext&&) = default; DeviceContext& DeviceContext::operator=(DeviceContext&& other) = default;
DeviceContext::~DeviceContext() = default; DeviceContext::~DeviceContext() = default;
...@@ -199,12 +225,12 @@ const Allocator& DeviceContext::GetZeroAllocator() const { ...@@ -199,12 +225,12 @@ const Allocator& DeviceContext::GetZeroAllocator() const {
void* DeviceContext::Alloc(TensorBase* tensor, void* DeviceContext::Alloc(TensorBase* tensor,
DataType dtype, DataType dtype,
size_t requested_size) const { size_t requested_size) const {
return impl_->Alloc(tensor, dtype, requested_size); return impl_->Alloc(tensor, GetPlace(), dtype, requested_size);
} }
template <typename T> template <typename T>
T* DeviceContext::Alloc(TensorBase* tensor, size_t requested_size) const { T* DeviceContext::Alloc(TensorBase* tensor, size_t requested_size) const {
return impl_->Alloc<T>(tensor, requested_size); return impl_->Alloc<T>(tensor, GetPlace(), requested_size);
} }
void* DeviceContext::HostAlloc(TensorBase* tensor, void* DeviceContext::HostAlloc(TensorBase* tensor,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册