未验证 提交 a504ff3f 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Remove offset in storage (#38472)

* remove offset in storage

* revert api change

* fix custom op slice bug

* fix mutable_data error
上级 3f6229c6
......@@ -83,7 +83,7 @@ void ScaleAPI(const egr::EagerTensor& x, float scale, float bias,
SizeOf(dense_tensor->dtype());
auto dense_out = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size), 0),
paddle::memory::Alloc(place, bytes_size)),
std::move(tensor_meta));
// Handle Device Context
const paddle::platform::Place& expected_kernel_place =
......
......@@ -41,7 +41,7 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) {
paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size), 0),
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>();
......
......@@ -42,7 +42,7 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) {
paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size), 0),
paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>();
......
......@@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
"Tensors.",
vec_true_outs.size(), outs.size()));
for (size_t j = 0; j < vec_true_outs.size(); ++j) {
experimental::MovesSharedStorage(
experimental::SharesStorage(
std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(j).impl())
.get(),
vec_true_outs.at(j));
}
} else {
auto* true_out = ctx.Output<Tensor>(out_name);
experimental::MovesSharedStorage(
experimental::SharesStorage(
std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(i).impl())
.get(),
true_out);
......
......@@ -273,6 +273,7 @@ class Tensor {
const std::shared_ptr<memory::Allocation>& Holder() const { return holder_; }
size_t offset() const { return offset_; }
void set_offset(size_t offset) { offset_ = offset; }
std::shared_ptr<memory::Allocation> MoveMemoryHolder() {
return std::move(holder_);
......
......@@ -456,8 +456,7 @@ class ReshapeKernel {
// non-inplace need move all result from pt_out to out, inplace need set
// result dims.
if (in != out) {
paddle::experimental::MovesSharedStorage(pt_out,
static_cast<Tensor *>(out));
paddle::experimental::SharesStorage(pt_out, static_cast<Tensor *>(out));
} else {
out->Resize(pt_out->dims());
}
......
......@@ -304,7 +304,7 @@ class PADDLE_API Tensor final {
* The index number begins from begin_idx + 1.
* @return Tensor
*/
Tensor slice(const int64_t begin_idx, const int64_t end_idx) const;
Tensor slice(int64_t begin_idx, int64_t end_idx) const;
/**
* @brief Return the implemention of current Tensor.
......
......@@ -253,11 +253,11 @@ template PADDLE_API paddle::platform::float16 *
Tensor::data<paddle::platform::float16>();
// TODO(chenweihang): replace slice impl by API
Tensor Tensor::slice(const int64_t begin_idx, const int64_t end_idx) const {
Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const {
if (is_dense_tensor()) {
return Tensor(std::make_shared<pten::DenseTensor>(
std::move(pten::CompatibleDenseTensorUtils::Slice(
std::dynamic_pointer_cast<pten::DenseTensor>(impl_).get(),
*(std::dynamic_pointer_cast<pten::DenseTensor>(impl_).get()),
begin_idx,
end_idx))));
} else {
......
......@@ -37,7 +37,6 @@ class ExternalStorage : public pten::Storage {
void Clear() override {
data_ = nullptr;
size_ = 0;
offset_ = 0;
}
size_t size() const noexcept override { return size_; }
......@@ -57,13 +56,11 @@ class ExternalStorage : public pten::Storage {
class SharedStorage : public pten::Storage {
public:
explicit SharedStorage(
const std::shared_ptr<paddle::memory::Allocation>& allocation,
size_t offset)
const std::shared_ptr<paddle::memory::Allocation>& allocation)
: Storage(allocation) {
CHECK(allocation);
place_ = allocation->place();
size_ = allocation->size();
offset_ = offset;
}
// In order to be compatible with the original Tensor design and execution
......@@ -84,7 +81,6 @@ class SharedStorage : public pten::Storage {
void Clear() override {
data_ = nullptr;
size_ = 0;
offset_ = 0;
}
size_t size() const noexcept override { return size_; }
......@@ -96,12 +92,10 @@ class SharedStorage : public pten::Storage {
}
// Temporary method: For compatible with fluid Tensor and improve performance
void ResetAllocation(std::shared_ptr<paddle::memory::Allocation> allocation,
size_t offset) {
void ResetAllocation(std::shared_ptr<paddle::memory::Allocation> allocation) {
data_ = allocation;
size_ = allocation->size();
place_ = allocation->place();
offset_ = offset;
}
// Temporary method: For compatible with fluid Tensor and improve performance
......
......@@ -33,39 +33,35 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) {
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::Tensor& src) {
VLOG(3) << "MakePtenDenseTensor based Tensor.";
pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()),
src.dims(),
pten::TransToPtenDataLayout(src.layout())};
auto shared_storage =
pten::make_intrusive<SharedStorage>(src.Holder(), src.offset());
pten::TransToPtenDataLayout(src.layout()),
src.offset()};
auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta));
}
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::LoDTensor& src) {
pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()),
src.dims(),
pten::TransToPtenDataLayout(src.layout())};
SetLoD(&meta.lod, src.lod());
auto shared_storage =
pten::make_intrusive<SharedStorage>(src.Holder(), src.offset());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta));
auto out =
MakePtenDenseTensor(static_cast<const paddle::framework::Tensor&>(src));
SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod),
src.lod());
return std::move(out);
}
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::Tensor& tensor,
const pten::TensorArgDef& arg_def) {
const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) {
pten::DenseTensorMeta meta{arg_def.dtype,
tensor.dims(),
pten::TransToPtenDataLayout(tensor.layout())};
src.dims(),
pten::TransToPtenDataLayout(src.layout()),
src.offset()};
if (tensor.IsInitialized() &&
tensor.place() == pten::TransToFluidPlace(arg_def.backend)) {
auto shared_storage =
pten::make_intrusive<SharedStorage>(tensor.Holder(), tensor.offset());
if (src.IsInitialized() &&
src.place() == pten::TransToFluidPlace(arg_def.backend)) {
auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta));
} else {
......@@ -77,25 +73,13 @@ std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
}
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::LoDTensor& tensor,
const paddle::framework::LoDTensor& src,
const pten::TensorArgDef& arg_def) {
pten::DenseTensorMeta meta{arg_def.dtype,
tensor.dims(),
pten::TransToPtenDataLayout(tensor.layout()),
pten::TransToPtenLoD(tensor.lod())};
if (tensor.IsInitialized() &&
tensor.place() == pten::TransToFluidPlace(arg_def.backend)) {
auto shared_storage =
pten::make_intrusive<SharedStorage>(tensor.Holder(), tensor.offset());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta));
} else {
return std::make_unique<pten::DenseTensor>(
std::move(pten::make_intrusive<SharedStorage>(
pten::TransToFluidPlace(arg_def.backend))),
std::move(meta));
}
auto out = MakePtenDenseTensor(
static_cast<const paddle::framework::Tensor&>(src), arg_def);
SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod),
src.lod());
return std::move(out);
}
pten::Scalar MakePtenScalar(const paddle::framework::LoDTensor& src) {
......@@ -328,23 +312,15 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
std::shared_ptr<paddle::memory::allocation::Allocation> holder(
new TensorStorage(std::move(storage)));
dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype()));
dst->set_offset(src->meta().offset);
}
void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) {
PADDLE_ENFORCE_NOT_NULL(
src,
platform::errors::InvalidArgument(
"The source DenseTensor is nullptr when move storage."));
PADDLE_ENFORCE_NOT_NULL(
dst,
platform::errors::InvalidArgument(
"The destination LoDTensor is nullptr when move storage."));
SetLoD(dst->mutable_lod(), src->lod());
MovesStorage(src, static_cast<paddle::framework::Tensor*>(dst));
SetLoD(dst->mutable_lod(), src->lod());
}
void MovesSharedStorage(pten::DenseTensor* src,
paddle::framework::Tensor* dst) {
void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
PADDLE_ENFORCE_NOT_NULL(
src,
platform::errors::InvalidArgument(
......@@ -358,24 +334,22 @@ void MovesSharedStorage(pten::DenseTensor* src,
pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(src));
dst->ResetHolderWithType(storage->GetAllocation(),
pten::TransToProtoVarType(src->dtype()));
dst->set_offset(src->meta().offset);
}
void MovesSharedStorage(pten::DenseTensor* src,
paddle::framework::LoDTensor* dst) {
MovesSharedStorage(src, static_cast<paddle::framework::Tensor*>(dst));
void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) {
SharesStorage(src, static_cast<paddle::framework::Tensor*>(dst));
SetLoD(dst->mutable_lod(), src->lod());
}
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
VLOG(3) << "ReMakePtenDenseTensor based Tensor.";
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims();
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataType&>(meta->dtype) = arg_def.dtype;
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataLayout&>(meta->layout) =
pten::TransToPtenDataLayout(src.layout());
meta->dtype = pten::TransToPtenDataType(src.type());
meta->layout = pten::TransToPtenDataLayout(src.layout());
meta->offset = src.offset();
auto* shared_storage = static_cast<SharedStorage*>(
pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst));
......@@ -384,42 +358,30 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
platform::errors::NotFound(
"Target DenseTensor's shared storage is nullptr."));
if (src.IsInitialized()) {
shared_storage->ResetAllocation(src.Holder(), src.offset());
}
PADDLE_ENFORCE_EQ(src.IsInitialized(),
true,
paddle::platform::errors::InvalidArgument(
"Source Tensor is not initialized."));
shared_storage->ResetAllocation(src.Holder());
}
void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
pten::DenseTensor* dst) {
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims();
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataType&>(meta->dtype) = pten::TransToPtenDataType(src.type());
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataLayout&>(meta->layout) =
pten::TransToPtenDataLayout(src.layout());
auto* shared_storage = static_cast<SharedStorage*>(
pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst));
PADDLE_ENFORCE_NOT_NULL(
shared_storage,
platform::errors::NotFound(
"Target DenseTensor's shared storage is nullptr."));
if (src.IsInitialized()) {
shared_storage->ResetAllocation(src.Holder(), src.offset());
}
SetLoD(&meta->lod, src.lod());
ReMakePtenDenseTensor(static_cast<const paddle::framework::Tensor&>(src),
dst);
}
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
pten::DenseTensor* dst) {
void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
VLOG(3) << "ReMakePtenDenseTensor based Tensor and TensorArgDef.";
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims();
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataType&>(meta->dtype) = pten::TransToPtenDataType(src.type());
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataLayout&>(meta->layout) =
pten::TransToPtenDataLayout(src.layout());
meta->dtype = arg_def.dtype;
meta->layout = pten::TransToPtenDataLayout(src.layout());
meta->offset = src.offset();
auto* shared_storage = static_cast<SharedStorage*>(
pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst));
......@@ -428,38 +390,24 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
platform::errors::NotFound(
"Target DenseTensor's shared storage is nullptr."));
if (src.IsInitialized()) {
shared_storage->ResetAllocation(src.Holder(), src.offset());
}
}
void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims();
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataType&>(meta->dtype) = arg_def.dtype;
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataLayout&>(meta->layout) =
pten::TransToPtenDataLayout(src.layout());
SetLoD(&(meta->lod), src.lod());
auto* shared_storage = static_cast<SharedStorage*>(
pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst));
PADDLE_ENFORCE_NOT_NULL(
shared_storage,
platform::errors::NotFound(
"Target DenseTensor's shared storage is nullptr."));
if (src.IsInitialized() &&
src.place() == pten::TransToFluidPlace(arg_def.backend)) {
shared_storage->ResetAllocation(src.Holder(), src.offset());
shared_storage->ResetAllocation(src.Holder());
} else {
shared_storage->ResetAllocationPlace(
pten::TransToFluidPlace(arg_def.backend));
}
}
void ReMakePtenDenseTensorByArgDef(const paddle::framework::LoDTensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
SetLoD(&meta->lod, src.lod());
ReMakePtenDenseTensorByArgDef(
static_cast<const paddle::framework::Tensor&>(src), arg_def, dst);
}
void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
......@@ -475,9 +423,9 @@ void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
if (!platform::is_same_place(tensor.place(), expected_place)) {
framework::LoDTensor tmp_tensor;
framework::TensorCopySync(tensor, expected_place, &tmp_tensor);
ReMakePtenDenseTensor(tmp_tensor, arg_def, dst);
ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst);
} else {
ReMakePtenDenseTensor(tensor, arg_def, dst);
ReMakePtenDenseTensorByArgDef(tensor, arg_def, dst);
}
} else if (variable.IsType<framework::SelectedRows>()) {
// TODO(chenweihang): now we don't deal with row and height
......@@ -492,9 +440,9 @@ void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
framework::Tensor tmp_tensor;
TensorCopySync(tensor.value(), expected_place, &tmp_tensor);
// TODO(chenweihang): adapt SelectedRows by xiaowei's design
ReMakePtenDenseTensor(tmp_tensor, arg_def, dst);
ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst);
} else {
ReMakePtenDenseTensor(tensor.value(), arg_def, dst);
ReMakePtenDenseTensorByArgDef(tensor.value(), arg_def, dst);
}
} else {
PADDLE_THROW(platform::errors::Unimplemented(
......@@ -510,12 +458,12 @@ void ReMakePtenDenseTensorFromVar(framework::Variable* variable,
// KernelContext to original tensor
if (variable->template IsType<framework::LoDTensor>()) {
auto* tensor = variable->template GetMutable<framework::LoDTensor>();
ReMakePtenDenseTensor(*tensor, arg_def, dst);
ReMakePtenDenseTensorByArgDef(*tensor, arg_def, dst);
} else if (variable->template IsType<framework::SelectedRows>()) {
auto* tensor = variable->template GetMutable<framework::SelectedRows>();
// TODO(chenweihang): adapt SelectedRows by xiaowei's design,
// here the row and height will lost in output!
ReMakePtenDenseTensor(tensor->value(), arg_def, dst);
ReMakePtenDenseTensorByArgDef(tensor->value(), arg_def, dst);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared output `%s` type now when call pt kernel.",
......
......@@ -58,10 +58,9 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst);
void MovesSharedStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
void MovesSharedStorage(pten::DenseTensor* src,
paddle::framework::LoDTensor* dst);
void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst);
/**
* In order to improve the compatibility state performance, some tricky tool
......@@ -72,20 +71,20 @@ void MovesSharedStorage(pten::DenseTensor* src,
* the overhead caused by frequent construction and destruction of the
* DenseTensor.
*/
void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
pten::DenseTensor* dst);
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
pten::DenseTensor* dst);
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void ReMakePtenDenseTensorByArgDef(const paddle::framework::LoDTensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
......
......@@ -48,16 +48,16 @@ class CompatibleDenseTensorUtils {
}
}
static DenseTensor Slice(DenseTensor* tensor,
static DenseTensor Slice(const DenseTensor& tensor,
int64_t begin_idx,
int64_t end_idx) {
size_t bytes = tensor->numel() * SizeOf(tensor->dtype());
PADDLE_ENFORCE_GE(tensor->capacity(),
size_t bytes = tensor.numel() * SizeOf(tensor.dtype());
PADDLE_ENFORCE_GE(tensor.capacity(),
bytes,
paddle::platform::errors::InvalidArgument(
"The memory size %d should be enough to meet the "
"volume required by metadata %d.",
tensor->capacity(),
tensor.capacity(),
bytes));
PADDLE_ENFORCE_GE(begin_idx,
0,
......@@ -66,7 +66,7 @@ class CompatibleDenseTensorUtils {
"But received the start index is d%.",
begin_idx));
PADDLE_ENFORCE_LE(end_idx,
tensor->dims()[0],
tensor.dims()[0],
paddle::platform::errors::OutOfRange(
"The end row index is out of bound."));
PADDLE_ENFORCE_LT(
......@@ -77,13 +77,12 @@ class CompatibleDenseTensorUtils {
"But received the start index = %d, the end index = %d.",
begin_idx,
end_idx));
DenseTensor ret =
DenseTensor(copy_intrusive(tensor->storage_), tensor->meta_);
if (tensor->dims()[0] != 1) {
DenseTensor ret(tensor);
if (tensor.dims()[0] != 1) {
ret.meta_.dims[0] = end_idx - begin_idx;
ret.meta_.offset = tensor->meta_.offset +
begin_idx * (tensor->numel() / tensor->dims()[0]) *
paddle::experimental::SizeOf(tensor->dtype());
ret.meta_.offset = tensor.meta_.offset +
begin_idx * (tensor.numel() / tensor.dims()[0]) *
paddle::experimental::SizeOf(tensor.dtype());
}
return ret;
}
......
......@@ -72,12 +72,14 @@ void* DenseTensor::mutable_data(size_t request_bytes) {
bytes));
bytes = request_bytes;
}
if (storage_->size() < bytes || storage_->size() == 0) {
if (storage_->size() < bytes + meta_.offset || storage_->size() == 0) {
VLOG(10) << "mutbale data realloc, original size: " << storage_->size()
<< ", new size: " << bytes;
storage_->Realloc(bytes);
meta_.offset = 0;
}
return storage_->data();
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) +
meta_.offset);
}
template <typename T>
......@@ -116,7 +118,8 @@ const void* DenseTensor::data() const {
storage_,
paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function."));
return storage_->data();
return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) +
meta_.offset);
}
void DenseTensor::set_meta(DenseTensorMeta&& meta) {
......
......@@ -47,7 +47,7 @@ class Storage : public intrusive_ref_counter<Storage> {
void* data() const {
return data_ ? reinterpret_cast<void*>(
reinterpret_cast<uintptr_t>(data_->ptr()) + offset_)
reinterpret_cast<uintptr_t>(data_->ptr()))
: nullptr;
}
......@@ -71,7 +71,6 @@ class Storage : public intrusive_ref_counter<Storage> {
virtual void Realloc(size_t n) = 0;
protected:
size_t offset_{0};
std::shared_ptr<paddle::memory::Allocation> data_;
};
......@@ -89,7 +88,6 @@ class TensorStorage : public Storage {
void Clear() override {
data_ = nullptr;
size_ = 0;
offset_ = 0;
}
void Realloc(size_t size) override;
......
......@@ -21,14 +21,16 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims)
DenseTensorMeta::DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout)
: dims(dims), dtype(dtype), layout(layout) {}
DataLayout layout,
size_t offset)
: dims(dims), dtype(dtype), layout(layout), offset(offset) {}
DenseTensorMeta::DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const LoD& lod)
: dims(dims), dtype(dtype), layout(layout), lod(lod) {}
const LoD& lod,
size_t offset)
: dims(dims), dtype(dtype), layout(layout), lod(lod), offset(offset) {}
bool DenseTensorMeta::valid() const noexcept {
bool valid{true};
......
......@@ -41,18 +41,20 @@ struct DenseTensorMeta {
DenseTensorMeta() = default;
DenseTensorMeta(DataType dtype, const DDim& dims);
DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout);
DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const LoD& lod);
size_t offset = 0);
DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const LoD& lod,
size_t offset = 0);
/// \brief Test whether the metadata is valid. Does not throw exceptions.
/// \return Whether the metadata is valid.
bool valid() const noexcept;
/// During the entire life cycle of a DenseTensor, the following attributes
/// marked with `const` are expected to remain unchanged.
bool is_scalar{false};
DDim dims;
DataType dtype{DataType::UNDEFINED};
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include "paddle/extension.h"
#define CHECK_INPUT(x) \
PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
std::vector<paddle::Tensor> SimpleSliceFunction(const paddle::Tensor& x,
int64_t begin_index,
int64_t end_index) {
return {x.slice(begin_index, end_index)};
}
std::vector<std::vector<int64_t>> SimpleSliceInferShape(
const std::vector<int64_t>& x_shape,
int64_t begin_index,
int64_t end_index) {
PD_CHECK(begin_index > 0, "The begin index is out of bound.");
PD_CHECK(end_index > 0, "The end index must is out of bound.");
PD_CHECK(begin_index < end_index,
"The begin index is greater than end index.");
auto out_shape = x_shape;
out_shape[0] = end_index - begin_index;
return {out_shape};
}
PD_BUILD_OP(custom_simple_slice)
.Inputs({"X"})
.Outputs({"Out"})
.Attrs({"begin_index: int64_t", "end_index: int64_t"})
.SetKernelFn(PD_KERNEL(SimpleSliceFunction))
.SetInferShapeFn(PD_INFER_SHAPE(SimpleSliceInferShape));
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtaina copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import numpy as np
import paddle
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\custom_simple_slice\\custom_simple_slice.pyd'.format(
get_build_directory())
if os.name == 'nt' and os.path.isfile(file):
cmd = 'del {}'.format(file)
run_cmd(cmd, True)
custom_ops = load(
name='custom_simple_slice_jit',
sources=['custom_simple_slice_op.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_cc_args, # test for cc flags
extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags
verbose=True)
class TestCustomSimpleSliceJit(unittest.TestCase):
def test_slice_output(self):
np_x = np.random.random((5, 2)).astype("float32")
x = paddle.to_tensor(np_x)
custom_op_out = custom_ops.custom_simple_slice(x, 2, 3)
np_out = np_x[2:3]
self.assertTrue(
np.array_equal(custom_op_out, np_out),
"custom op: {},\n numpy: {}".format(np_out, custom_op_out.numpy()))
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册