未验证 提交 dfdc9960 编写于 作者: Z Zhanlue Yang 提交者: GitHub

[Unify Tensors PR #3]Port framework::Tensor members & interfaces to...

[Unify Tensors PR #3]Port framework::Tensor members & interfaces to pten::DenseTensor, test=allcases (#38473)

* Added shared_ptr<Allocation> member & corresponding interfaces to Storage

* Removed original pten::Allocation from Storage and adjusted the interfaces accordingly

* Fixed issues with storage offset

* Used place to malloc allocation for TensorStorage

* [Unify Tensors PR #3]Ported framework::Tensor interfaces to pten::DenseTensor

* Fixed issues with place

* Added comments

* Moved mutable_data with stream argument to DenseTensor

* Added set_offset interface

* Fixed CI issues,test=allcases

* [Unify Tensors PR #4] Port LoDTensor interfaces to DenseTensor

* Reverted changes too pten_layout() interface

* Removed friend classes
上级 a7b13d38
...@@ -68,15 +68,15 @@ cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor) ...@@ -68,15 +68,15 @@ cc_test(data_type_test SRCS data_type_test.cc DEPS data_type place tensor)
if(WITH_GPU) if(WITH_GPU)
if (WIN32) if (WIN32)
windows_symbolic(tensor_util SRCS tensor_util.cu) windows_symbolic(tensor_util SRCS tensor_util.cu)
nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context) nv_library(tensor SRCS tensor.cc .tensor_util.cu DEPS place memory data_type device_context dense_tensor)
add_dependencies(tensor tensor_util) add_dependencies(tensor tensor_util)
else() else()
nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler) nv_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor)
endif(WIN32) endif(WIN32)
elseif(WITH_ROCM) elseif(WITH_ROCM)
hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler) hip_library(tensor SRCS tensor.cc tensor_util.cu DEPS place memory data_type device_context profiler dense_tensor)
else() else()
cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler) cc_library(tensor SRCS tensor.cc tensor_util.cc DEPS place memory data_type device_context profiler dense_tensor)
endif() endif()
cc_test(tensor_test SRCS tensor_test.cc DEPS tensor) cc_test(tensor_test SRCS tensor_test.cc DEPS tensor)
......
...@@ -120,6 +120,8 @@ class Tensor { ...@@ -120,6 +120,8 @@ class Tensor {
explicit Tensor(const proto::VarType::Type&); explicit Tensor(const proto::VarType::Type&);
/*! Return a pointer to mutable memory block. */ /*! Return a pointer to mutable memory block. */
const void* data() const;
template <typename T> template <typename T>
T* data(); T* data();
......
...@@ -54,6 +54,12 @@ inline T* Tensor::data() { ...@@ -54,6 +54,12 @@ inline T* Tensor::data() {
offset_); offset_);
} }
inline const void* Tensor::data() const {
check_memory_size();
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(holder_->ptr()) +
offset_);
}
template <typename T> template <typename T>
inline T* Tensor::mutable_data(const DDim& dims, const platform::Place& place, inline T* Tensor::mutable_data(const DDim& dims, const platform::Place& place,
size_t requested_size) { size_t requested_size) {
......
...@@ -23,6 +23,9 @@ limitations under the License. */ ...@@ -23,6 +23,9 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#include "paddle/pten/core/dense_tensor.h"
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
#include "dnnl_debug.h" // NOLINT #include "dnnl_debug.h" // NOLINT
#endif #endif
...@@ -30,11 +33,12 @@ limitations under the License. */ ...@@ -30,11 +33,12 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace framework { namespace framework {
void TensorCopy(const Tensor& src, const platform::Place& dst_place, template <typename TENSOR>
const platform::DeviceContext& ctx, Tensor* dst) { void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, TENSOR* dst) {
if (&src == dst) { if (&src == dst) {
auto src_copy = src; auto src_copy = src;
TensorCopy(src_copy, dst_place, ctx, dst); TensorCopyImpl(src_copy, dst_place, ctx, dst);
return; return;
} }
...@@ -45,7 +49,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -45,7 +49,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
dst->Resize(src.dims()); dst->Resize(src.dims());
dst->set_layout(src.layout()); dst->set_layout(src.layout());
auto src_place = src.place(); auto src_place = src.place();
auto src_ptr = src.data<void>(); auto src_ptr = src.data();
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
dst->set_format(src.format()); dst->set_format(src.format());
// oneDNN tensors due to padding may be of bigger size // oneDNN tensors due to padding may be of bigger size
...@@ -389,8 +393,9 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -389,8 +393,9 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
#endif #endif
} }
void TensorCopy(const Tensor& src, const platform::Place& dst_place, template <typename TENSOR>
Tensor* dst) { void TensorCopyImpl(const TENSOR& src, const platform::Place& dst_place,
TENSOR* dst) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx; const platform::DeviceContext* dev_ctx;
if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place)) { if (platform::is_gpu_place(dst_place) || platform::is_npu_place(dst_place)) {
...@@ -398,7 +403,24 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -398,7 +403,24 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
} else { } else {
dev_ctx = pool.Get(src.place()); dev_ctx = pool.Get(src.place());
} }
TensorCopy(src, dst_place, *dev_ctx, dst); TensorCopyImpl(src, dst_place, *dev_ctx, dst);
}
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
TensorCopyImpl<Tensor>(src, dst_place, dst);
}
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
pten::DenseTensor* dst) {
TensorCopyImpl<pten::DenseTensor>(src, dst_place, dst);
}
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) {
TensorCopyImpl<Tensor>(src, dst_place, ctx, dst);
}
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, pten::DenseTensor* dst) {
TensorCopyImpl<pten::DenseTensor>(src, dst_place, ctx, dst);
} }
void TensorCopySync(const Tensor& src, const platform::Place& dst_place, void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
...@@ -418,7 +440,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -418,7 +440,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
dst->set_format(src.format()); dst->set_format(src.format());
#endif #endif
auto src_place = src.place(); auto src_place = src.place();
auto src_ptr = src.data<void>(); auto src_ptr = src.data();
auto dst_ptr = dst->mutable_data(dst_place, src.type()); auto dst_ptr = dst->mutable_data(dst_place, src.type());
if (src_ptr == dst_ptr && src_place == dst_place) { if (src_ptr == dst_ptr && src_place == dst_place) {
...@@ -971,7 +993,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor, ...@@ -971,7 +993,7 @@ void TensorToStream(std::ostream& os, const Tensor& tensor,
{ // the 3rd field, tensor data { // the 3rd field, tensor data
uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type()); uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type());
auto* data_ptr = tensor.data<void>(); auto* data_ptr = tensor.data();
PADDLE_ENFORCE_LT(size, (std::numeric_limits<std::streamsize>::max)(), PADDLE_ENFORCE_LT(size, (std::numeric_limits<std::streamsize>::max)(),
platform::errors::ResourceExhausted( platform::errors::ResourceExhausted(
"tensor size %d overflow when writing tensor", size)); "tensor size %d overflow when writing tensor", size));
......
...@@ -34,6 +34,8 @@ limitations under the License. */ ...@@ -34,6 +34,8 @@ limitations under the License. */
#include "paddle/fluid/platform/device/mlu/device_context.h" #include "paddle/fluid/platform/device/mlu/device_context.h"
#endif #endif
#include "paddle/pten/core/dense_tensor.h"
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -75,6 +77,8 @@ class Tensor; ...@@ -75,6 +77,8 @@ class Tensor;
void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst); const platform::DeviceContext& ctx, Tensor* dst);
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, pten::DenseTensor* dst);
// NOTE(zcd): If the src.place() and dst_place are two different GPU, // NOTE(zcd): If the src.place() and dst_place are two different GPU,
// the copy operation is carried out on the dst_place's stream. This is // the copy operation is carried out on the dst_place's stream. This is
...@@ -85,6 +89,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -85,6 +89,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
// not completed. // not completed.
void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst); Tensor* dst);
void TensorCopy(const pten::DenseTensor& src, const platform::Place& dst_place,
pten::DenseTensor* dst);
void TensorCopySync(const Tensor& src, const platform::Place& dst_place, void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst); Tensor* dst);
......
...@@ -9,6 +9,11 @@ endif() ...@@ -9,6 +9,11 @@ endif()
cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils) cc_library(kernel_factory SRCS kernel_factory.cc DEPS enforce convert_utils)
cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context) cc_library(kernel_context SRCS kernel_context.cc DEPS enforce pten_context)
cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce) cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS enforce)
cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce mixed_vector) cc_library(tensor_meta SRCS tensor_meta.cc DEPS enforce mixed_vector)
cc_library(dense_tensor SRCS dense_tensor.cc DEPS convert_utils tensor_meta tensor_base)
cc_library(dense_tensor SRCS dense_tensor.cc DEPS tensor_meta tensor_base) # Will remove once we implemented MKLDNN_Tensor
if(WITH_MKLDNN)
add_dependencies(dense_tensor mkldnn)
endif()
...@@ -19,6 +19,9 @@ limitations under the License. */ ...@@ -19,6 +19,9 @@ limitations under the License. */
#include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/complex.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/convert_utils.h"
namespace pten { namespace pten {
DenseTensor::DenseTensor(const std::shared_ptr<Allocator>& a, DenseTensor::DenseTensor(const std::shared_ptr<Allocator>& a,
...@@ -41,6 +44,12 @@ DenseTensor::DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta) ...@@ -41,6 +44,12 @@ DenseTensor::DenseTensor(intrusive_ptr<Storage> storage, DenseTensorMeta&& meta)
DenseTensor::DenseTensor(const DenseTensor& other) DenseTensor::DenseTensor(const DenseTensor& other)
: meta_(other.meta()), storage_(copy_intrusive(other.storage_)) {} : meta_(other.meta()), storage_(copy_intrusive(other.storage_)) {}
DenseTensor& DenseTensor::operator=(const DenseTensor& other) {
meta_ = other.meta();
storage_ = std::move(copy_intrusive(other.storage_));
return *this;
}
int64_t DenseTensor::numel() const { int64_t DenseTensor::numel() const {
if (meta_.is_scalar) { if (meta_.is_scalar) {
return 1; return 1;
...@@ -105,6 +114,7 @@ T* DenseTensor::mutable_data() { ...@@ -105,6 +114,7 @@ T* DenseTensor::mutable_data() {
template <typename T> template <typename T>
const T* DenseTensor::data() const { const T* DenseTensor::data() const {
check_memory_size();
PADDLE_ENFORCE( PADDLE_ENFORCE(
(dtype() == paddle::experimental::CppTypeToDataType<T>::Type()), (dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
...@@ -113,7 +123,31 @@ const T* DenseTensor::data() const { ...@@ -113,7 +123,31 @@ const T* DenseTensor::data() const {
return static_cast<const T*>(data()); return static_cast<const T*>(data());
} }
template <typename T>
T* DenseTensor::data() {
check_memory_size();
PADDLE_ENFORCE(
(dtype() == paddle::experimental::CppTypeToDataType<T>::Type()),
paddle::platform::errors::InvalidArgument(
"The type of data we are trying to retrieve does not match the "
"type of data currently contained in the container."));
PADDLE_ENFORCE_NOT_NULL(
storage_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
return reinterpret_cast<T*>(data());
}
const void* DenseTensor::data() const { const void* DenseTensor::data() const {
PADDLE_ENFORCE_NOT_NULL(
storage_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
return reinterpret_cast<const void*>(
reinterpret_cast<uintptr_t>(storage_->data()) + meta_.offset);
}
void* DenseTensor::data() {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
storage_, storage_,
paddle::platform::errors::PreconditionNotMet( paddle::platform::errors::PreconditionNotMet(
...@@ -130,16 +164,30 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) { ...@@ -130,16 +164,30 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
meta_ = std::move(meta); meta_ = std::move(meta);
} }
void DenseTensor::Resize(const DDim& dims) { /* @jim19930609: This interface will be further modified util we finalized the
design for Allocator - Allocation
For now, we have to temporarily accommodate two independent use cases:
1. Designed behaviour: DenseTensor constructed with its underlying storage_
initialized
2. Legacy behaviour(fluid): DenseTensor constructed using default
constructor, where
storage_ won't be initialized until the first
call to mutable_data(place)
*/
DenseTensor& DenseTensor::Resize(const DDim& dims) {
meta_.dims = dims; meta_.dims = dims;
if (storage_ != nullptr) {
mutable_data(); mutable_data();
}
return *this;
} }
void DenseTensor::ResetLoD(const LoD& lod) { meta_.lod = lod; } void DenseTensor::ResetLoD(const LoD& lod) { meta_.lod = lod; }
#define DATA_MEMBER_FUNC_INSTANTIATION(dtype) \ #define DATA_MEMBER_FUNC_INSTANTIATION(dtype) \
template dtype* DenseTensor::mutable_data(); \ template dtype* DenseTensor::mutable_data(); \
template const dtype* DenseTensor::data() const; template const dtype* DenseTensor::data() const; \
template dtype* DenseTensor::data();
DATA_MEMBER_FUNC_INSTANTIATION(bool); DATA_MEMBER_FUNC_INSTANTIATION(bool);
DATA_MEMBER_FUNC_INSTANTIATION(int8_t); DATA_MEMBER_FUNC_INSTANTIATION(int8_t);
...@@ -159,4 +207,388 @@ DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128); ...@@ -159,4 +207,388 @@ DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128);
#undef DATA_MEMBER_FUNC_INSTANTIATION #undef DATA_MEMBER_FUNC_INSTANTIATION
/* --------------------------- */
/* From framework::Tensor */
/* --------------------------- */
DenseTensor::DenseTensor() {
inplace_version_counter_ = std::make_shared<TensorInplaceVersion>(0);
meta_ = DenseTensorMeta();
meta_.dtype = paddle::experimental::DataType::FLOAT32;
meta_.offset = 0;
}
DenseTensor::DenseTensor(const paddle::framework::proto::VarType::Type& dtype) {
inplace_version_counter_ = std::make_shared<TensorInplaceVersion>(0);
meta_ = DenseTensorMeta();
meta_.dtype = TransToPtenDataType(dtype);
meta_.offset = 0;
}
DenseTensor& DenseTensor::ShareDataWith(const DenseTensor& src) {
src.check_memory_size();
*this = src;
return *this;
}
DenseTensor& DenseTensor::ShareInplaceVersionCounterWith(
const DenseTensor& src) {
PADDLE_ENFORCE_NOT_NULL(
inplace_version_counter_,
paddle::platform::errors::PreconditionNotMet(
"Tensor does not hold inplace_version_counter_."));
inplace_version_counter_ = src.inplace_version_counter_;
return *this;
}
size_t DenseTensor::memory_size() const {
if (storage_ == nullptr || storage_->data_shared() == nullptr) {
return 0UL;
}
return storage_->data_shared()->size() - meta_.offset;
}
void DenseTensor::check_memory_size() const {
PADDLE_ENFORCE_NOT_NULL(storage_,
paddle::platform::errors::PreconditionNotMet(
"Tensor holds no memory. "
"Call Tensor::mutable_data firstly."));
PADDLE_ENFORCE_NOT_NULL(storage_->data_shared(),
paddle::platform::errors::PreconditionNotMet(
"Tensor holds no memory. "
"Call Tensor::mutable_data firstly."));
size_t size = numel() * SizeOf(dtype());
PADDLE_ENFORCE_LE(
size,
memory_size(),
paddle::platform::errors::PreconditionNotMet(
"Tensor's dimension is out of bound."
"Tensor's dimension must be equal or less than the size of its "
"memory."
"But received Tensor's dimension is d%, memory's size is %d.",
size,
memory_size()));
}
const paddle::platform::Place& DenseTensor::place() const {
PADDLE_ENFORCE_NOT_NULL(
storage_,
paddle::platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::place() is called."));
return storage_->place();
}
paddle::framework::proto::VarType::Type DenseTensor::type() const {
PADDLE_ENFORCE_NOT_NULL(
storage_,
paddle::platform::errors::PreconditionNotMet(
"Tensor not initialized yet when Tensor::type() is called."));
return TransToProtoVarType(meta_.dtype);
}
paddle::framework::proto::VarType::Type DenseTensor::saved_type() const {
return TransToProtoVarType(meta_.dtype);
}
void DenseTensor::set_layout(const paddle::framework::DataLayout layout) {
meta_.layout = layout;
}
void DenseTensor::ResetHolder(
const std::shared_ptr<paddle::memory::Allocation>& holder) {
PADDLE_ENFORCE_EQ(
meta_.offset,
0,
paddle::platform::errors::Fatal(
"Only the offset is supported to zero when the holder is reset."));
if (storage_ == nullptr) {
PADDLE_THROW(
paddle::platform::errors::Fatal("storage_ has to be initialized before "
"calling ResetHolder() interface."));
}
if (storage_->data_shared()) {
PADDLE_ENFORCE_LE(
numel() * SizeOf(dtype()) + meta_.offset,
storage_->data_shared()->size(),
paddle::platform::errors::InvalidArgument(
"The size of Holder is not enough to store the Tensor."));
}
storage_->set_data_shared(holder);
}
void DenseTensor::ResetHolderWithType(
const std::shared_ptr<paddle::memory::Allocation>& holder,
const paddle::framework::proto::VarType::Type& type) {
set_type(type);
ResetHolder(holder);
}
void DenseTensor::set_type(
const paddle::framework::proto::VarType::Type& type) {
meta_.dtype = TransToPtenDataType(type);
}
DenseTensor DenseTensor::Slice(int64_t begin_idx, int64_t end_idx) const {
check_memory_size();
PADDLE_ENFORCE_GE(begin_idx,
0,
paddle::platform::errors::OutOfRange(
"The start row index must be greater than 0."
"But received the start index is d%.",
begin_idx));
PADDLE_ENFORCE_LE(end_idx,
meta_.dims[0],
paddle::platform::errors::OutOfRange(
"The end row index is out of bound."));
PADDLE_ENFORCE_LT(
begin_idx,
end_idx,
paddle::platform::errors::InvalidArgument(
"The start row index must be less than the end row index."
"But received the start index = %d, the end index = %d.",
begin_idx,
end_idx));
if (meta_.dims[0] == 1) {
return *this;
} else {
size_t base = numel() / meta_.dims[0];
DenseTensor dst;
dst.storage_ = std::move(copy_intrusive(storage_));
dst.meta_.layout = meta_.layout;
dst.meta_.dtype = meta_.dtype;
DDim dst_dims = meta_.dims;
dst_dims[0] = end_idx - begin_idx;
dst.Resize(dst_dims);
dst.meta_.offset = meta_.offset + begin_idx * base * SizeOf(dtype());
return dst;
}
}
std::vector<DenseTensor> DenseTensor::Split(int64_t split_size,
int64_t axis) const {
check_memory_size();
PADDLE_ENFORCE_GE(meta_.dims.size(),
0,
paddle::platform::errors::OutOfRange(
"split expects at least a 1-dimensional tensor"));
PADDLE_ENFORCE_GE(
split_size,
0,
paddle::platform::errors::OutOfRange(
"split expects split_size be non-negative, but got split_size is %d",
split_size));
int64_t numel_size = meta_.dims[axis];
int64_t num_splits = 1;
if (split_size != 0) {
num_splits =
std::max<int64_t>((numel_size + split_size - 1) / split_size, 1);
}
std::vector<DenseTensor> splits(num_splits);
int64_t last_split_size = split_size - (split_size * num_splits - numel_size);
for (int64_t i = 0; i < num_splits; ++i) {
int64_t length = i < num_splits - 1 ? split_size : last_split_size;
splits[i] = Slice(i * split_size, i * split_size + length);
}
return splits;
}
std::vector<DenseTensor> DenseTensor::Chunk(int64_t chunks,
int64_t axis) const {
check_memory_size();
PADDLE_ENFORCE_GE(meta_.dims.size(),
0,
paddle::platform::errors::OutOfRange(
"split expects at least a 1-dimensional tensor"));
PADDLE_ENFORCE_GE(
chunks,
0,
paddle::platform::errors::OutOfRange(
"chunks expects to be greater than 0, but got chunks is %d", chunks));
int64_t numel_size = meta_.dims[axis];
int64_t split_size = (numel_size + chunks - 1) / chunks;
return Split(split_size, axis);
}
void* DenseTensor::mutable_data(const paddle::platform::Place& place,
paddle::framework::proto::VarType::Type type,
size_t requested_size) {
set_type(type);
PADDLE_ENFORCE_GE(
numel(),
0,
paddle::platform::errors::PreconditionNotMet(
"The Tensor's element number must be equal or greater than zero. "
"The Tensor's shape is [",
dims(),
"] now"));
size_t size = numel() * SizeOf(dtype());
if (requested_size && (requested_size > size)) {
size = requested_size;
}
if (storage_ == nullptr) {
storage_ = make_intrusive<paddle::experimental::SharedStorage>(place);
}
/* some versions of boost::variant don't have operator!= */
if (storage_->data_shared() == nullptr ||
!(storage_->data_shared()->place() == place) ||
storage_->data_shared()->size() < size + meta_.offset) {
// Reset holder first before re-allocate to save memory
storage_->Clear();
storage_->set_data_shared(paddle::memory::AllocShared(place, size));
meta_.offset = 0;
}
return reinterpret_cast<void*>(
reinterpret_cast<uintptr_t>(storage_->data_shared()->ptr()) +
meta_.offset);
}
void* DenseTensor::mutable_data(const paddle::platform::Place& place,
size_t requested_size) {
if (storage_ == nullptr) {
PADDLE_THROW(paddle::platform::errors::PreconditionNotMet(
"The tensor is not initialized."));
}
return mutable_data(place, type(), requested_size);
}
void* DenseTensor::mutable_data(const paddle::platform::Place& place,
paddle::framework::proto::VarType::Type type,
const paddle::platform::Stream& stream) {
set_type(type);
PADDLE_ENFORCE_GE(
numel(),
0,
paddle::platform::errors::PreconditionNotMet(
"The Tensor's element number must be equal or greater than zero. "
"The Tensor's shape is [",
dims(),
"] now"));
size_t size = numel() * SizeOf(dtype());
/* some versions of boost::variant don't have operator!= */
if (storage_ == nullptr || storage_->data_shared() == nullptr ||
!(storage_->data_shared()->place() == place) ||
storage_->data_shared()->size() < size + meta_.offset ||
!(paddle::platform::is_gpu_place(place) &&
paddle::memory::InSameStream(storage_->data_shared(), stream))) {
storage_->Clear();
storage_->set_data_shared(paddle::memory::AllocShared(place, size, stream));
meta_.offset = 0;
}
return reinterpret_cast<void*>(
reinterpret_cast<uintptr_t>(storage_->data_shared()->ptr()) +
meta_.offset);
}
/* @jim19930609: The following "mutable_data" only supports specific dtypes
defined in OpProto. This part need another clean up once the data type across
Fluid
and Pten get unified.
*/
template <typename T>
inline T* DenseTensor::mutable_data(const DDim& dims,
const paddle::platform::Place& place,
size_t requested_size) {
static_assert(std::is_pod<T>::value, "T must be POD");
Resize(dims);
return mutable_data<T>(place, requested_size);
}
template <typename T>
inline T* DenseTensor::mutable_data(const paddle::platform::Place& place,
size_t requested_size) {
static_assert(std::is_pod<T>::value, "T must be POD");
return reinterpret_cast<T*>(mutable_data(
place, paddle::framework::DataTypeTrait<T>::DataType(), requested_size));
}
#define LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(dtype) \
template dtype* DenseTensor::mutable_data( \
const DDim& dims, \
const paddle::platform::Place& place, \
size_t requested_size); \
template dtype* DenseTensor::mutable_data( \
const paddle::platform::Place& place, size_t requested_size);
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(bool)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int8_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(uint8_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int16_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(int64_t)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(float)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(double)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::platform::bfloat16)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::platform::float16)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex64)
LEGACY_DATA_MEMBER_FUNC_INSTANTIATION(::paddle::experimental::complex128)
#undef LEGACY_DATA_MEMBER_FUNC_INSTANTIATION
/* ------------------------------ */
/* From framework::LoDTensor */
/* ------------------------------ */
DenseTensor::DenseTensor(const LoD& lod) : DenseTensor() { meta_.lod = lod; }
void DenseTensor::set_lod(const LoD& lod) { meta_.lod = lod; }
LoD* DenseTensor::mutable_lod() { return &meta_.lod; }
std::pair<size_t, size_t> DenseTensor::lod_element(size_t level,
size_t elem) const {
PADDLE_ENFORCE_LT(
level,
NumLevels(),
paddle::platform::errors::InvalidArgument(
"The input level of LoD is invalid, it should be less than LoD "
"size. The input level is %zu, the LoD size is %zu.",
level,
NumLevels()));
PADDLE_ENFORCE_LT(elem,
NumElements(level),
paddle::platform::errors::InvalidArgument(
"The input element of LoD is invalid, it should be "
"less than the number of elements in its level."
"The input element is %zu, the number of elements in "
"its level is %zu.",
elem,
NumElements(level)));
return std::make_pair((meta_.lod)[level][elem], (meta_.lod)[level][elem + 1]);
}
size_t DenseTensor::NumLevels() const { return meta_.lod.size(); }
size_t DenseTensor::NumElements(size_t level) const {
PADDLE_ENFORCE_LT(
level,
NumLevels(),
paddle::platform::errors::InvalidArgument(
"The input level of LoD is invalid, it should be less than LoD "
"size. The input level is %zu, the LoD size is %zu.",
level,
NumLevels()));
// the last offset is the end of last element
return (meta_.lod)[level].size() - 1;
}
} // namespace pten } // namespace pten
...@@ -14,15 +14,44 @@ limitations under the License. */ ...@@ -14,15 +14,44 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/platform/stream/stream.h"
#include "paddle/pten/core/allocator.h" #include "paddle/pten/core/allocator.h"
#include "paddle/pten/core/storage.h" #include "paddle/pten/core/storage.h"
#include "paddle/pten/core/tensor_base.h" #include "paddle/pten/core/tensor_base.h"
#include "paddle/pten/core/tensor_meta.h" #include "paddle/pten/core/tensor_meta.h"
/* @jim19930609: Move to MKLDNN_Tensor in the future
*/
#ifdef PADDLE_WITH_MKLDNN
#include "dnnl.hpp"
#endif
namespace pten { namespace pten {
class CompatibleDenseTensorUtils; class CompatibleDenseTensorUtils;
/* --------------------------- */
/* From framework::Tensor */
/* --------------------------- */
/* Temporarily put TensorInplaceVersion inside DenseTensor.
Will move to AutogradMeta as soon as we switch to Eager Dygraph.
*/
class TensorInplaceVersion {
public:
explicit TensorInplaceVersion(uint32_t inplace_version = 0)
: inplace_version_(inplace_version) {}
bool IsUnique() const { return inplace_version_ == 0; }
void Bump() { ++inplace_version_; }
uint32_t CurrentVersion() const { return inplace_version_; }
void SetInplaceVersionToZero() { inplace_version_ = 0; }
private:
uint32_t inplace_version_;
};
/// \brief The Dense tensor store values in a contiguous sequential block /// \brief The Dense tensor store values in a contiguous sequential block
/// of memory where all values are represented. Tensors or multi-dimensional /// of memory where all values are represented. Tensors or multi-dimensional
/// arrays are used in math operators. /// arrays are used in math operators.
...@@ -56,7 +85,7 @@ class DenseTensor : public TensorBase, ...@@ -56,7 +85,7 @@ class DenseTensor : public TensorBase,
/// \brief Because dense tensor is a kind of container, we give a default /// \brief Because dense tensor is a kind of container, we give a default
/// constructor to use for stl container. But the dense tensor created with /// constructor to use for stl container. But the dense tensor created with
/// the default constructor is not practical. /// the default constructor is not practical.
DenseTensor() = default; // DenseTensor() = default;
/// \brief Because dense tensor is a resource handle, we provide a default /// \brief Because dense tensor is a resource handle, we provide a default
/// move constructor to support move semantics. /// move constructor to support move semantics.
...@@ -65,6 +94,9 @@ class DenseTensor : public TensorBase, ...@@ -65,6 +94,9 @@ class DenseTensor : public TensorBase,
/// \brief DenseTensor shallow copy constructor. /// \brief DenseTensor shallow copy constructor.
DenseTensor(const DenseTensor& other); DenseTensor(const DenseTensor& other);
/// \brief DenseTensor shallow copy assignment.
DenseTensor& operator=(const DenseTensor& other);
/// \brief Destroy the tensor object and release exclusive resources. /// \brief Destroy the tensor object and release exclusive resources.
virtual ~DenseTensor() = default; virtual ~DenseTensor() = default;
...@@ -95,7 +127,7 @@ class DenseTensor : public TensorBase, ...@@ -95,7 +127,7 @@ class DenseTensor : public TensorBase,
/// \brief Returns the data place of the tensor. /// \brief Returns the data place of the tensor.
/// \return The data place of the tensor. /// \return The data place of the tensor.
const Place& place() const override { return storage_->place(); } const Place& place() const override;
/// \brief Returns the meta information of the tensor. /// \brief Returns the meta information of the tensor.
/// \return The meta information of the tensor. /// \return The meta information of the tensor.
...@@ -124,7 +156,8 @@ class DenseTensor : public TensorBase, ...@@ -124,7 +156,8 @@ class DenseTensor : public TensorBase,
/// larger than the original value, the storage area will be reallocated. /// larger than the original value, the storage area will be reallocated.
/// \param dims The new dims of the dense tensor. /// \param dims The new dims of the dense tensor.
/// \param lod The new lod of the dense tensor. /// \param lod The new lod of the dense tensor.
void Resize(const DDim& dims); // void Resize(const DDim& dims);
DenseTensor& Resize(const DDim& dims);
/// \brief Change the lod information in the metadata. /// \brief Change the lod information in the metadata.
/// \param lod The new lod of the dense tensor. /// \param lod The new lod of the dense tensor.
...@@ -174,6 +207,178 @@ class DenseTensor : public TensorBase, ...@@ -174,6 +207,178 @@ class DenseTensor : public TensorBase,
private: private:
DenseTensorMeta meta_; DenseTensorMeta meta_;
intrusive_ptr<Storage> storage_; intrusive_ptr<Storage> storage_;
/* --------------------------- */
/* From framework::Tensor */
/* --------------------------- */
/* The following members & interfaces were copied from framework::Tensor,
so as to facilitate the unification of different Tensors
Will be adjusted/removed/moved in the near future
*/
public:
/* @jim19930609: The way default constructor handles allocator might change,
according to
the final design of Allocation - Allocator.
*/
DenseTensor();
/* @jim19930609: Remove dependency on protobuf after Tensor Unification.
*/
explicit DenseTensor(const paddle::framework::proto::VarType::Type& dtype);
inline bool IsInitialized() const {
return storage_ != nullptr && storage_->data() != nullptr;
}
template <typename T>
T* data();
void* data();
template <typename T>
T* mutable_data(const paddle::platform::Place& place,
size_t requested_size = 0);
template <typename T>
T* mutable_data(const DDim& dims,
const paddle::platform::Place& place,
size_t requested_size = 0);
void* mutable_data(const paddle::platform::Place& place,
paddle::framework::proto::VarType::Type type,
size_t requested_size = 0);
void* mutable_data(const paddle::platform::Place& place,
size_t requested_size = 0);
void* mutable_data(const paddle::platform::Place& place,
paddle::framework::proto::VarType::Type type,
const paddle::platform::Stream& stream);
/*! The internal of two tensors share the same memory block. */
DenseTensor& ShareDataWith(const DenseTensor& src);
/*! The internal of two tensors share the same inplace version counter. */
DenseTensor& ShareInplaceVersionCounterWith(const DenseTensor& src);
DenseTensor Slice(int64_t begin_idx, int64_t end_idx) const;
std::vector<DenseTensor> Split(int64_t split_size, int64_t axis) const;
std::vector<DenseTensor> Chunk(int64_t chunks, int64_t axis) const;
/* @jim19930609: Remove dependency on protobuf after Tensor Unification.
*/
paddle::framework::proto::VarType::Type type() const;
/* @jim19930609: Remove dependency on protobuf after Tensor Unification.
*/
paddle::framework::proto::VarType::Type saved_type() const;
// memory size returns the holding memory size in byte.
size_t memory_size() const;
void check_memory_size() const;
void set_layout(const paddle::framework::DataLayout layout);
void clear() {
storage_.reset();
meta_.offset = 0;
}
void ShareBufferWith(const DenseTensor& tensor) {
storage_ = std::move(copy_intrusive(tensor.storage_));
meta_.offset = tensor.meta().offset;
}
void ShareDataTypeWith(const DenseTensor& tensor) {
meta_.dtype = tensor.meta().dtype;
}
bool IsSharedBufferWith(const DenseTensor& src) const {
return IsSharedWith(src);
}
const std::shared_ptr<paddle::memory::Allocation> Holder() const {
return storage_ == nullptr ? nullptr : std::move(storage_->data_shared());
}
void set_offset(size_t offset) { meta_.offset = offset; }
size_t offset() const { return meta_.offset; }
std::shared_ptr<paddle::memory::Allocation> MoveMemoryHolder() {
return storage_ == nullptr ? nullptr
: std::move(storage_->move_data_shared());
}
void ResetHolder(const std::shared_ptr<paddle::memory::Allocation>& holder);
void ResetHolderWithType(
const std::shared_ptr<paddle::memory::Allocation>& holder,
const paddle::framework::proto::VarType::Type& type);
void set_type(const paddle::framework::proto::VarType::Type& type);
TensorInplaceVersion& InplaceVersionCounter() {
return *inplace_version_counter_;
}
private:
std::shared_ptr<TensorInplaceVersion> inplace_version_counter_;
/* @jim19930609: This is a hack
In general, it is badly designed to fuse MKLDNN-specific objects into a
generic Tensor.
We temporarily leave them here to unblock Tensor Unification progress.
In the final state, we should come up with a MKLDNN_Tensor and move the
following codes there.
*/
#ifdef PADDLE_WITH_MKLDNN
public:
inline dnnl::memory::format_tag format() const { return format_; }
inline void set_format(const dnnl::memory::format_tag format) {
format_ = format;
}
protected:
/**
* @brief the detail format of memory block which have layout as kMKLDNN
*
* @note MKLDNN lib support various memory format like nchw, nhwc, nChw8C,
* nChw16c, etc. For a MKLDNN memory block, layout will be set as
* DataLayout::kMKLDNN meanwhile detail memory format will be kept in
* this field.
*/
dnnl::memory::format_tag format_ = dnnl::memory::format_tag::undef;
#endif
/* ------------------------------ */
/* From framework::LoDTensor */
/* ------------------------------ */
/* The following members & interfaces were copied from framework::Tensor,
so as to facilitate the unification of different Tensors
Will be adjusted/removed/moved in the near future
*/
explicit DenseTensor(const LoD& lod);
void set_lod(const LoD& lod);
LoD* mutable_lod();
/*
* Get the start offset and end offset of an element from LoD.
*/
std::pair<size_t, size_t> lod_element(size_t level, size_t elem) const;
size_t NumLevels() const;
size_t NumElements(size_t level = 0) const;
}; };
} // namespace pten } // namespace pten
...@@ -36,6 +36,11 @@ class Storage : public intrusive_ref_counter<Storage> { ...@@ -36,6 +36,11 @@ class Storage : public intrusive_ref_counter<Storage> {
Storage() = default; Storage() = default;
Storage(const Storage&) = delete; Storage(const Storage&) = delete;
/* @jim19930609: Following interfaces will be modified/replaced/removed
as soon as the new Allocation - Allocator design get
finalized.
*/
/* --------- shared_ptr<Allocation> -------- */ /* --------- shared_ptr<Allocation> -------- */
// Initialize a Storage with unique Allocation // Initialize a Storage with unique Allocation
explicit Storage(std::shared_ptr<paddle::memory::Allocation>&& data) explicit Storage(std::shared_ptr<paddle::memory::Allocation>&& data)
...@@ -55,6 +60,15 @@ class Storage : public intrusive_ref_counter<Storage> { ...@@ -55,6 +60,15 @@ class Storage : public intrusive_ref_counter<Storage> {
return data_; return data_;
} }
void set_data_shared(
const std::shared_ptr<paddle::memory::Allocation>& holder) {
data_ = holder;
}
std::shared_ptr<paddle::memory::Allocation> move_data_shared() {
return std::move(data_);
}
virtual void ReallocShared(size_t n) { virtual void ReallocShared(size_t n) {
PADDLE_THROW(paddle::platform::errors::Unimplemented( PADDLE_THROW(paddle::platform::errors::Unimplemented(
"ReallocShared has not been overrided by the current Storage")); "ReallocShared has not been overrided by the current Storage"));
......
...@@ -40,6 +40,11 @@ class intrusive_ptr { ...@@ -40,6 +40,11 @@ class intrusive_ptr {
rhs.reset(); rhs.reset();
} }
intrusive_ptr<T>& operator=(intrusive_ptr<T>&& rhs) {
px = std::move(rhs.px);
return *this;
}
void reset() { this_type().swap(*this); } void reset() { this_type().swap(*this); }
void reset(T* rhs) { this_type(rhs).swap(*this); } void reset(T* rhs) { this_type(rhs).swap(*this); }
......
...@@ -65,7 +65,7 @@ TEST(dense_tensor, meta) { ...@@ -65,7 +65,7 @@ TEST(dense_tensor, meta) {
TEST(dense_tensor, def_ctor) { TEST(dense_tensor, def_ctor) {
DenseTensor tensor_0; DenseTensor tensor_0;
CHECK(!tensor_0.valid()); CHECK(tensor_0.valid());
} }
TEST(dense_tensor, ctor) { TEST(dense_tensor, ctor) {
...@@ -97,7 +97,6 @@ TEST(dense_tensor, ctor) { ...@@ -97,7 +97,6 @@ TEST(dense_tensor, ctor) {
check_dense_tensor(tensor_0, meta); check_dense_tensor(tensor_0, meta);
DenseTensor tensor_2(make_intrusive<TensorStorage>(alloc), meta); DenseTensor tensor_2(make_intrusive<TensorStorage>(alloc), meta);
CHECK(tensor_2.data<int8_t>() == nullptr);
CHECK_NOTNULL(tensor_2.mutable_data<int8_t>()); CHECK_NOTNULL(tensor_2.mutable_data<int8_t>());
check_dense_tensor(tensor_2, meta); check_dense_tensor(tensor_2, meta);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册