未验证 提交 a504ff3f 编写于 作者: C Chen Weihang 提交者: GitHub

[PTen] Remove offset in storage (#38472)

* remove offset in storage

* revert api change

* fix custom op slice bug

* fix mutable_data error
上级 3f6229c6
...@@ -83,7 +83,7 @@ void ScaleAPI(const egr::EagerTensor& x, float scale, float bias, ...@@ -83,7 +83,7 @@ void ScaleAPI(const egr::EagerTensor& x, float scale, float bias,
SizeOf(dense_tensor->dtype()); SizeOf(dense_tensor->dtype());
auto dense_out = std::make_shared<pten::DenseTensor>( auto dense_out = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>( pten::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size), 0), paddle::memory::Alloc(place, bytes_size)),
std::move(tensor_meta)); std::move(tensor_meta));
// Handle Device Context // Handle Device Context
const paddle::platform::Place& expected_kernel_place = const paddle::platform::Place& expected_kernel_place =
......
...@@ -41,7 +41,7 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { ...@@ -41,7 +41,7 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) {
paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype()); paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<pten::DenseTensor>( auto ret_dense = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>( pten::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size), 0), paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta)); std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>(); float* t_ptr = t_dense->mutable_data<float>();
......
...@@ -42,7 +42,7 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) { ...@@ -42,7 +42,7 @@ egr::EagerTensor hook_function(const egr::EagerTensor& t) {
paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype()); paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype());
auto ret_dense = std::make_shared<pten::DenseTensor>( auto ret_dense = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>( pten::make_intrusive<paddle::experimental::SharedStorage>(
paddle::memory::Alloc(place, bytes_size), 0), paddle::memory::Alloc(place, bytes_size)),
std::move(ret_meta)); std::move(ret_meta));
float* t_ptr = t_dense->mutable_data<float>(); float* t_ptr = t_dense->mutable_data<float>();
......
...@@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, ...@@ -207,14 +207,14 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
"Tensors.", "Tensors.",
vec_true_outs.size(), outs.size())); vec_true_outs.size(), outs.size()));
for (size_t j = 0; j < vec_true_outs.size(); ++j) { for (size_t j = 0; j < vec_true_outs.size(); ++j) {
experimental::MovesSharedStorage( experimental::SharesStorage(
std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(j).impl()) std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(j).impl())
.get(), .get(),
vec_true_outs.at(j)); vec_true_outs.at(j));
} }
} else { } else {
auto* true_out = ctx.Output<Tensor>(out_name); auto* true_out = ctx.Output<Tensor>(out_name);
experimental::MovesSharedStorage( experimental::SharesStorage(
std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(i).impl()) std::dynamic_pointer_cast<pten::DenseTensor>(outs.at(i).impl())
.get(), .get(),
true_out); true_out);
......
...@@ -273,6 +273,7 @@ class Tensor { ...@@ -273,6 +273,7 @@ class Tensor {
const std::shared_ptr<memory::Allocation>& Holder() const { return holder_; } const std::shared_ptr<memory::Allocation>& Holder() const { return holder_; }
size_t offset() const { return offset_; } size_t offset() const { return offset_; }
void set_offset(size_t offset) { offset_ = offset; }
std::shared_ptr<memory::Allocation> MoveMemoryHolder() { std::shared_ptr<memory::Allocation> MoveMemoryHolder() {
return std::move(holder_); return std::move(holder_);
......
...@@ -456,8 +456,7 @@ class ReshapeKernel { ...@@ -456,8 +456,7 @@ class ReshapeKernel {
// non-inplace need move all result from pt_out to out, inplace need set // non-inplace need move all result from pt_out to out, inplace need set
// result dims. // result dims.
if (in != out) { if (in != out) {
paddle::experimental::MovesSharedStorage(pt_out, paddle::experimental::SharesStorage(pt_out, static_cast<Tensor *>(out));
static_cast<Tensor *>(out));
} else { } else {
out->Resize(pt_out->dims()); out->Resize(pt_out->dims());
} }
......
...@@ -304,7 +304,7 @@ class PADDLE_API Tensor final { ...@@ -304,7 +304,7 @@ class PADDLE_API Tensor final {
* The index number begins from begin_idx + 1. * The index number begins from begin_idx + 1.
* @return Tensor * @return Tensor
*/ */
Tensor slice(const int64_t begin_idx, const int64_t end_idx) const; Tensor slice(int64_t begin_idx, int64_t end_idx) const;
/** /**
* @brief Return the implemention of current Tensor. * @brief Return the implemention of current Tensor.
......
...@@ -253,11 +253,11 @@ template PADDLE_API paddle::platform::float16 * ...@@ -253,11 +253,11 @@ template PADDLE_API paddle::platform::float16 *
Tensor::data<paddle::platform::float16>(); Tensor::data<paddle::platform::float16>();
// TODO(chenweihang): replace slice impl by API // TODO(chenweihang): replace slice impl by API
Tensor Tensor::slice(const int64_t begin_idx, const int64_t end_idx) const { Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const {
if (is_dense_tensor()) { if (is_dense_tensor()) {
return Tensor(std::make_shared<pten::DenseTensor>( return Tensor(std::make_shared<pten::DenseTensor>(
std::move(pten::CompatibleDenseTensorUtils::Slice( std::move(pten::CompatibleDenseTensorUtils::Slice(
std::dynamic_pointer_cast<pten::DenseTensor>(impl_).get(), *(std::dynamic_pointer_cast<pten::DenseTensor>(impl_).get()),
begin_idx, begin_idx,
end_idx)))); end_idx))));
} else { } else {
......
...@@ -37,7 +37,6 @@ class ExternalStorage : public pten::Storage { ...@@ -37,7 +37,6 @@ class ExternalStorage : public pten::Storage {
void Clear() override { void Clear() override {
data_ = nullptr; data_ = nullptr;
size_ = 0; size_ = 0;
offset_ = 0;
} }
size_t size() const noexcept override { return size_; } size_t size() const noexcept override { return size_; }
...@@ -57,13 +56,11 @@ class ExternalStorage : public pten::Storage { ...@@ -57,13 +56,11 @@ class ExternalStorage : public pten::Storage {
class SharedStorage : public pten::Storage { class SharedStorage : public pten::Storage {
public: public:
explicit SharedStorage( explicit SharedStorage(
const std::shared_ptr<paddle::memory::Allocation>& allocation, const std::shared_ptr<paddle::memory::Allocation>& allocation)
size_t offset)
: Storage(allocation) { : Storage(allocation) {
CHECK(allocation); CHECK(allocation);
place_ = allocation->place(); place_ = allocation->place();
size_ = allocation->size(); size_ = allocation->size();
offset_ = offset;
} }
// In order to be compatible with the original Tensor design and execution // In order to be compatible with the original Tensor design and execution
...@@ -84,7 +81,6 @@ class SharedStorage : public pten::Storage { ...@@ -84,7 +81,6 @@ class SharedStorage : public pten::Storage {
void Clear() override { void Clear() override {
data_ = nullptr; data_ = nullptr;
size_ = 0; size_ = 0;
offset_ = 0;
} }
size_t size() const noexcept override { return size_; } size_t size() const noexcept override { return size_; }
...@@ -96,12 +92,10 @@ class SharedStorage : public pten::Storage { ...@@ -96,12 +92,10 @@ class SharedStorage : public pten::Storage {
} }
// Temporary method: For compatible with fluid Tensor and improve performance // Temporary method: For compatible with fluid Tensor and improve performance
void ResetAllocation(std::shared_ptr<paddle::memory::Allocation> allocation, void ResetAllocation(std::shared_ptr<paddle::memory::Allocation> allocation) {
size_t offset) {
data_ = allocation; data_ = allocation;
size_ = allocation->size(); size_ = allocation->size();
place_ = allocation->place(); place_ = allocation->place();
offset_ = offset;
} }
// Temporary method: For compatible with fluid Tensor and improve performance // Temporary method: For compatible with fluid Tensor and improve performance
......
...@@ -33,39 +33,35 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) { ...@@ -33,39 +33,35 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) {
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor( std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::Tensor& src) { const paddle::framework::Tensor& src) {
VLOG(3) << "MakePtenDenseTensor based Tensor.";
pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()), pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()),
src.dims(), src.dims(),
pten::TransToPtenDataLayout(src.layout())}; pten::TransToPtenDataLayout(src.layout()),
auto shared_storage = src.offset()};
pten::make_intrusive<SharedStorage>(src.Holder(), src.offset()); auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage), return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta)); std::move(meta));
} }
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor( std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::LoDTensor& src) { const paddle::framework::LoDTensor& src) {
pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()), auto out =
src.dims(), MakePtenDenseTensor(static_cast<const paddle::framework::Tensor&>(src));
pten::TransToPtenDataLayout(src.layout())}; SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod),
SetLoD(&meta.lod, src.lod()); src.lod());
auto shared_storage = return std::move(out);
pten::make_intrusive<SharedStorage>(src.Holder(), src.offset());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta));
} }
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor( std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::Tensor& tensor, const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) {
const pten::TensorArgDef& arg_def) {
pten::DenseTensorMeta meta{arg_def.dtype, pten::DenseTensorMeta meta{arg_def.dtype,
tensor.dims(), src.dims(),
pten::TransToPtenDataLayout(tensor.layout())}; pten::TransToPtenDataLayout(src.layout()),
src.offset()};
if (tensor.IsInitialized() && if (src.IsInitialized() &&
tensor.place() == pten::TransToFluidPlace(arg_def.backend)) { src.place() == pten::TransToFluidPlace(arg_def.backend)) {
auto shared_storage = auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
pten::make_intrusive<SharedStorage>(tensor.Holder(), tensor.offset());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage), return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta)); std::move(meta));
} else { } else {
...@@ -77,25 +73,13 @@ std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor( ...@@ -77,25 +73,13 @@ std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
} }
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor( std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::LoDTensor& tensor, const paddle::framework::LoDTensor& src,
const pten::TensorArgDef& arg_def) { const pten::TensorArgDef& arg_def) {
pten::DenseTensorMeta meta{arg_def.dtype, auto out = MakePtenDenseTensor(
tensor.dims(), static_cast<const paddle::framework::Tensor&>(src), arg_def);
pten::TransToPtenDataLayout(tensor.layout()), SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod),
pten::TransToPtenLoD(tensor.lod())}; src.lod());
return std::move(out);
if (tensor.IsInitialized() &&
tensor.place() == pten::TransToFluidPlace(arg_def.backend)) {
auto shared_storage =
pten::make_intrusive<SharedStorage>(tensor.Holder(), tensor.offset());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta));
} else {
return std::make_unique<pten::DenseTensor>(
std::move(pten::make_intrusive<SharedStorage>(
pten::TransToFluidPlace(arg_def.backend))),
std::move(meta));
}
} }
pten::Scalar MakePtenScalar(const paddle::framework::LoDTensor& src) { pten::Scalar MakePtenScalar(const paddle::framework::LoDTensor& src) {
...@@ -328,23 +312,15 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { ...@@ -328,23 +312,15 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
std::shared_ptr<paddle::memory::allocation::Allocation> holder( std::shared_ptr<paddle::memory::allocation::Allocation> holder(
new TensorStorage(std::move(storage))); new TensorStorage(std::move(storage)));
dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype())); dst->ResetHolderWithType(holder, pten::TransToProtoVarType(src->dtype()));
dst->set_offset(src->meta().offset);
} }
void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) { void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) {
PADDLE_ENFORCE_NOT_NULL(
src,
platform::errors::InvalidArgument(
"The source DenseTensor is nullptr when move storage."));
PADDLE_ENFORCE_NOT_NULL(
dst,
platform::errors::InvalidArgument(
"The destination LoDTensor is nullptr when move storage."));
SetLoD(dst->mutable_lod(), src->lod());
MovesStorage(src, static_cast<paddle::framework::Tensor*>(dst)); MovesStorage(src, static_cast<paddle::framework::Tensor*>(dst));
SetLoD(dst->mutable_lod(), src->lod());
} }
void MovesSharedStorage(pten::DenseTensor* src, void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
paddle::framework::Tensor* dst) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
src, src,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
...@@ -358,24 +334,22 @@ void MovesSharedStorage(pten::DenseTensor* src, ...@@ -358,24 +334,22 @@ void MovesSharedStorage(pten::DenseTensor* src,
pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(src)); pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(src));
dst->ResetHolderWithType(storage->GetAllocation(), dst->ResetHolderWithType(storage->GetAllocation(),
pten::TransToProtoVarType(src->dtype())); pten::TransToProtoVarType(src->dtype()));
dst->set_offset(src->meta().offset);
} }
void MovesSharedStorage(pten::DenseTensor* src, void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst) {
paddle::framework::LoDTensor* dst) { SharesStorage(src, static_cast<paddle::framework::Tensor*>(dst));
MovesSharedStorage(src, static_cast<paddle::framework::Tensor*>(dst));
SetLoD(dst->mutable_lod(), src->lod()); SetLoD(dst->mutable_lod(), src->lod());
} }
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) { pten::DenseTensor* dst) {
VLOG(3) << "ReMakePtenDenseTensor based Tensor.";
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims(); meta->dims = src.dims();
// Since the type of DenseTensorMeta is const, const_cast must be used meta->dtype = pten::TransToPtenDataType(src.type());
const_cast<DataType&>(meta->dtype) = arg_def.dtype; meta->layout = pten::TransToPtenDataLayout(src.layout());
// Since the type of DenseTensorMeta is const, const_cast must be used meta->offset = src.offset();
const_cast<DataLayout&>(meta->layout) =
pten::TransToPtenDataLayout(src.layout());
auto* shared_storage = static_cast<SharedStorage*>( auto* shared_storage = static_cast<SharedStorage*>(
pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst)); pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst));
...@@ -384,42 +358,30 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, ...@@ -384,42 +358,30 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
platform::errors::NotFound( platform::errors::NotFound(
"Target DenseTensor's shared storage is nullptr.")); "Target DenseTensor's shared storage is nullptr."));
if (src.IsInitialized()) { PADDLE_ENFORCE_EQ(src.IsInitialized(),
shared_storage->ResetAllocation(src.Holder(), src.offset()); true,
} paddle::platform::errors::InvalidArgument(
"Source Tensor is not initialized."));
shared_storage->ResetAllocation(src.Holder());
} }
void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src, void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
pten::DenseTensor* dst) { pten::DenseTensor* dst) {
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims(); SetLoD(&meta->lod, src.lod());
// Since the type of DenseTensorMeta is const, const_cast must be used ReMakePtenDenseTensor(static_cast<const paddle::framework::Tensor&>(src),
const_cast<DataType&>(meta->dtype) = pten::TransToPtenDataType(src.type()); dst);
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataLayout&>(meta->layout) =
pten::TransToPtenDataLayout(src.layout());
auto* shared_storage = static_cast<SharedStorage*>(
pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst));
PADDLE_ENFORCE_NOT_NULL(
shared_storage,
platform::errors::NotFound(
"Target DenseTensor's shared storage is nullptr."));
if (src.IsInitialized()) {
shared_storage->ResetAllocation(src.Holder(), src.offset());
}
} }
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
pten::DenseTensor* dst) { const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
VLOG(3) << "ReMakePtenDenseTensor based Tensor and TensorArgDef.";
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims(); meta->dims = src.dims();
// Since the type of DenseTensorMeta is const, const_cast must be used meta->dtype = arg_def.dtype;
const_cast<DataType&>(meta->dtype) = pten::TransToPtenDataType(src.type()); meta->layout = pten::TransToPtenDataLayout(src.layout());
// Since the type of DenseTensorMeta is const, const_cast must be used meta->offset = src.offset();
const_cast<DataLayout&>(meta->layout) =
pten::TransToPtenDataLayout(src.layout());
auto* shared_storage = static_cast<SharedStorage*>( auto* shared_storage = static_cast<SharedStorage*>(
pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst)); pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst));
...@@ -428,38 +390,24 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, ...@@ -428,38 +390,24 @@ void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
platform::errors::NotFound( platform::errors::NotFound(
"Target DenseTensor's shared storage is nullptr.")); "Target DenseTensor's shared storage is nullptr."));
if (src.IsInitialized()) {
shared_storage->ResetAllocation(src.Holder(), src.offset());
}
}
void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims();
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataType&>(meta->dtype) = arg_def.dtype;
// Since the type of DenseTensorMeta is const, const_cast must be used
const_cast<DataLayout&>(meta->layout) =
pten::TransToPtenDataLayout(src.layout());
SetLoD(&(meta->lod), src.lod());
auto* shared_storage = static_cast<SharedStorage*>(
pten::CompatibleDenseTensorUtils::UnsafeGetMutableStorage(dst));
PADDLE_ENFORCE_NOT_NULL(
shared_storage,
platform::errors::NotFound(
"Target DenseTensor's shared storage is nullptr."));
if (src.IsInitialized() && if (src.IsInitialized() &&
src.place() == pten::TransToFluidPlace(arg_def.backend)) { src.place() == pten::TransToFluidPlace(arg_def.backend)) {
shared_storage->ResetAllocation(src.Holder(), src.offset()); shared_storage->ResetAllocation(src.Holder());
} else { } else {
shared_storage->ResetAllocationPlace( shared_storage->ResetAllocationPlace(
pten::TransToFluidPlace(arg_def.backend)); pten::TransToFluidPlace(arg_def.backend));
} }
} }
void ReMakePtenDenseTensorByArgDef(const paddle::framework::LoDTensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) {
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
SetLoD(&meta->lod, src.lod());
ReMakePtenDenseTensorByArgDef(
static_cast<const paddle::framework::Tensor&>(src), arg_def, dst);
}
void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
const pten::TensorArgDef& arg_def, const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst) { pten::DenseTensor* dst) {
...@@ -475,9 +423,9 @@ void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, ...@@ -475,9 +423,9 @@ void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
if (!platform::is_same_place(tensor.place(), expected_place)) { if (!platform::is_same_place(tensor.place(), expected_place)) {
framework::LoDTensor tmp_tensor; framework::LoDTensor tmp_tensor;
framework::TensorCopySync(tensor, expected_place, &tmp_tensor); framework::TensorCopySync(tensor, expected_place, &tmp_tensor);
ReMakePtenDenseTensor(tmp_tensor, arg_def, dst); ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst);
} else { } else {
ReMakePtenDenseTensor(tensor, arg_def, dst); ReMakePtenDenseTensorByArgDef(tensor, arg_def, dst);
} }
} else if (variable.IsType<framework::SelectedRows>()) { } else if (variable.IsType<framework::SelectedRows>()) {
// TODO(chenweihang): now we don't deal with row and height // TODO(chenweihang): now we don't deal with row and height
...@@ -492,9 +440,9 @@ void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, ...@@ -492,9 +440,9 @@ void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
framework::Tensor tmp_tensor; framework::Tensor tmp_tensor;
TensorCopySync(tensor.value(), expected_place, &tmp_tensor); TensorCopySync(tensor.value(), expected_place, &tmp_tensor);
// TODO(chenweihang): adapt SelectedRows by xiaowei's design // TODO(chenweihang): adapt SelectedRows by xiaowei's design
ReMakePtenDenseTensor(tmp_tensor, arg_def, dst); ReMakePtenDenseTensorByArgDef(tmp_tensor, arg_def, dst);
} else { } else {
ReMakePtenDenseTensor(tensor.value(), arg_def, dst); ReMakePtenDenseTensorByArgDef(tensor.value(), arg_def, dst);
} }
} else { } else {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
...@@ -510,12 +458,12 @@ void ReMakePtenDenseTensorFromVar(framework::Variable* variable, ...@@ -510,12 +458,12 @@ void ReMakePtenDenseTensorFromVar(framework::Variable* variable,
// KernelContext to original tensor // KernelContext to original tensor
if (variable->template IsType<framework::LoDTensor>()) { if (variable->template IsType<framework::LoDTensor>()) {
auto* tensor = variable->template GetMutable<framework::LoDTensor>(); auto* tensor = variable->template GetMutable<framework::LoDTensor>();
ReMakePtenDenseTensor(*tensor, arg_def, dst); ReMakePtenDenseTensorByArgDef(*tensor, arg_def, dst);
} else if (variable->template IsType<framework::SelectedRows>()) { } else if (variable->template IsType<framework::SelectedRows>()) {
auto* tensor = variable->template GetMutable<framework::SelectedRows>(); auto* tensor = variable->template GetMutable<framework::SelectedRows>();
// TODO(chenweihang): adapt SelectedRows by xiaowei's design, // TODO(chenweihang): adapt SelectedRows by xiaowei's design,
// here the row and height will lost in output! // here the row and height will lost in output!
ReMakePtenDenseTensor(tensor->value(), arg_def, dst); ReMakePtenDenseTensorByArgDef(tensor->value(), arg_def, dst);
} else { } else {
PADDLE_THROW(platform::errors::Unimplemented( PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared output `%s` type now when call pt kernel.", "Unsupported shared output `%s` type now when call pt kernel.",
......
...@@ -58,10 +58,9 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); ...@@ -58,10 +58,9 @@ void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst); void MovesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst);
void MovesSharedStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
void MovesSharedStorage(pten::DenseTensor* src, void SharesStorage(pten::DenseTensor* src, paddle::framework::LoDTensor* dst);
paddle::framework::LoDTensor* dst);
/** /**
* In order to improve the compatibility state performance, some tricky tool * In order to improve the compatibility state performance, some tricky tool
...@@ -72,20 +71,20 @@ void MovesSharedStorage(pten::DenseTensor* src, ...@@ -72,20 +71,20 @@ void MovesSharedStorage(pten::DenseTensor* src,
* the overhead caused by frequent construction and destruction of the * the overhead caused by frequent construction and destruction of the
* DenseTensor. * DenseTensor.
*/ */
void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
pten::DenseTensor* dst);
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
pten::DenseTensor* dst);
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst); pten::DenseTensor* dst);
void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src, void ReMakePtenDenseTensor(const paddle::framework::LoDTensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst); pten::DenseTensor* dst);
void ReMakePtenDenseTensorByArgDef(const paddle::framework::Tensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void ReMakePtenDenseTensorByArgDef(const paddle::framework::LoDTensor& src,
const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst);
void ReMakePtenDenseTensorFromVar(const framework::Variable& variable, void ReMakePtenDenseTensorFromVar(const framework::Variable& variable,
const pten::TensorArgDef& arg_def, const pten::TensorArgDef& arg_def,
pten::DenseTensor* dst); pten::DenseTensor* dst);
......
...@@ -48,16 +48,16 @@ class CompatibleDenseTensorUtils { ...@@ -48,16 +48,16 @@ class CompatibleDenseTensorUtils {
} }
} }
static DenseTensor Slice(DenseTensor* tensor, static DenseTensor Slice(const DenseTensor& tensor,
int64_t begin_idx, int64_t begin_idx,
int64_t end_idx) { int64_t end_idx) {
size_t bytes = tensor->numel() * SizeOf(tensor->dtype()); size_t bytes = tensor.numel() * SizeOf(tensor.dtype());
PADDLE_ENFORCE_GE(tensor->capacity(), PADDLE_ENFORCE_GE(tensor.capacity(),
bytes, bytes,
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"The memory size %d should be enough to meet the " "The memory size %d should be enough to meet the "
"volume required by metadata %d.", "volume required by metadata %d.",
tensor->capacity(), tensor.capacity(),
bytes)); bytes));
PADDLE_ENFORCE_GE(begin_idx, PADDLE_ENFORCE_GE(begin_idx,
0, 0,
...@@ -66,7 +66,7 @@ class CompatibleDenseTensorUtils { ...@@ -66,7 +66,7 @@ class CompatibleDenseTensorUtils {
"But received the start index is d%.", "But received the start index is d%.",
begin_idx)); begin_idx));
PADDLE_ENFORCE_LE(end_idx, PADDLE_ENFORCE_LE(end_idx,
tensor->dims()[0], tensor.dims()[0],
paddle::platform::errors::OutOfRange( paddle::platform::errors::OutOfRange(
"The end row index is out of bound.")); "The end row index is out of bound."));
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
...@@ -77,13 +77,12 @@ class CompatibleDenseTensorUtils { ...@@ -77,13 +77,12 @@ class CompatibleDenseTensorUtils {
"But received the start index = %d, the end index = %d.", "But received the start index = %d, the end index = %d.",
begin_idx, begin_idx,
end_idx)); end_idx));
DenseTensor ret = DenseTensor ret(tensor);
DenseTensor(copy_intrusive(tensor->storage_), tensor->meta_); if (tensor.dims()[0] != 1) {
if (tensor->dims()[0] != 1) {
ret.meta_.dims[0] = end_idx - begin_idx; ret.meta_.dims[0] = end_idx - begin_idx;
ret.meta_.offset = tensor->meta_.offset + ret.meta_.offset = tensor.meta_.offset +
begin_idx * (tensor->numel() / tensor->dims()[0]) * begin_idx * (tensor.numel() / tensor.dims()[0]) *
paddle::experimental::SizeOf(tensor->dtype()); paddle::experimental::SizeOf(tensor.dtype());
} }
return ret; return ret;
} }
......
...@@ -72,12 +72,14 @@ void* DenseTensor::mutable_data(size_t request_bytes) { ...@@ -72,12 +72,14 @@ void* DenseTensor::mutable_data(size_t request_bytes) {
bytes)); bytes));
bytes = request_bytes; bytes = request_bytes;
} }
if (storage_->size() < bytes || storage_->size() == 0) { if (storage_->size() < bytes + meta_.offset || storage_->size() == 0) {
VLOG(10) << "mutbale data realloc, original size: " << storage_->size() VLOG(10) << "mutbale data realloc, original size: " << storage_->size()
<< ", new size: " << bytes; << ", new size: " << bytes;
storage_->Realloc(bytes); storage_->Realloc(bytes);
meta_.offset = 0;
} }
return storage_->data(); return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) +
meta_.offset);
} }
template <typename T> template <typename T>
...@@ -116,7 +118,8 @@ const void* DenseTensor::data() const { ...@@ -116,7 +118,8 @@ const void* DenseTensor::data() const {
storage_, storage_,
paddle::platform::errors::PreconditionNotMet( paddle::platform::errors::PreconditionNotMet(
"The storage must be valid when call the mutable data function.")); "The storage must be valid when call the mutable data function."));
return storage_->data(); return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(storage_->data()) +
meta_.offset);
} }
void DenseTensor::set_meta(DenseTensorMeta&& meta) { void DenseTensor::set_meta(DenseTensorMeta&& meta) {
......
...@@ -47,7 +47,7 @@ class Storage : public intrusive_ref_counter<Storage> { ...@@ -47,7 +47,7 @@ class Storage : public intrusive_ref_counter<Storage> {
void* data() const { void* data() const {
return data_ ? reinterpret_cast<void*>( return data_ ? reinterpret_cast<void*>(
reinterpret_cast<uintptr_t>(data_->ptr()) + offset_) reinterpret_cast<uintptr_t>(data_->ptr()))
: nullptr; : nullptr;
} }
...@@ -71,7 +71,6 @@ class Storage : public intrusive_ref_counter<Storage> { ...@@ -71,7 +71,6 @@ class Storage : public intrusive_ref_counter<Storage> {
virtual void Realloc(size_t n) = 0; virtual void Realloc(size_t n) = 0;
protected: protected:
size_t offset_{0};
std::shared_ptr<paddle::memory::Allocation> data_; std::shared_ptr<paddle::memory::Allocation> data_;
}; };
...@@ -89,7 +88,6 @@ class TensorStorage : public Storage { ...@@ -89,7 +88,6 @@ class TensorStorage : public Storage {
void Clear() override { void Clear() override {
data_ = nullptr; data_ = nullptr;
size_ = 0; size_ = 0;
offset_ = 0;
} }
void Realloc(size_t size) override; void Realloc(size_t size) override;
......
...@@ -21,14 +21,16 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims) ...@@ -21,14 +21,16 @@ DenseTensorMeta::DenseTensorMeta(DataType dtype, const DDim& dims)
DenseTensorMeta::DenseTensorMeta(DataType dtype, DenseTensorMeta::DenseTensorMeta(DataType dtype,
const DDim& dims, const DDim& dims,
DataLayout layout) DataLayout layout,
: dims(dims), dtype(dtype), layout(layout) {} size_t offset)
: dims(dims), dtype(dtype), layout(layout), offset(offset) {}
DenseTensorMeta::DenseTensorMeta(DataType dtype, DenseTensorMeta::DenseTensorMeta(DataType dtype,
const DDim& dims, const DDim& dims,
DataLayout layout, DataLayout layout,
const LoD& lod) const LoD& lod,
: dims(dims), dtype(dtype), layout(layout), lod(lod) {} size_t offset)
: dims(dims), dtype(dtype), layout(layout), lod(lod), offset(offset) {}
bool DenseTensorMeta::valid() const noexcept { bool DenseTensorMeta::valid() const noexcept {
bool valid{true}; bool valid{true};
......
...@@ -41,18 +41,20 @@ struct DenseTensorMeta { ...@@ -41,18 +41,20 @@ struct DenseTensorMeta {
DenseTensorMeta() = default; DenseTensorMeta() = default;
DenseTensorMeta(DataType dtype, const DDim& dims); DenseTensorMeta(DataType dtype, const DDim& dims);
DenseTensorMeta(DataType dtype, const DDim& dims, DataLayout layout);
DenseTensorMeta(DataType dtype, DenseTensorMeta(DataType dtype,
const DDim& dims, const DDim& dims,
DataLayout layout, DataLayout layout,
const LoD& lod); size_t offset = 0);
DenseTensorMeta(DataType dtype,
const DDim& dims,
DataLayout layout,
const LoD& lod,
size_t offset = 0);
/// \brief Test whether the metadata is valid. Does not throw exceptions. /// \brief Test whether the metadata is valid. Does not throw exceptions.
/// \return Whether the metadata is valid. /// \return Whether the metadata is valid.
bool valid() const noexcept; bool valid() const noexcept;
/// During the entire life cycle of a DenseTensor, the following attributes
/// marked with `const` are expected to remain unchanged.
bool is_scalar{false}; bool is_scalar{false};
DDim dims; DDim dims;
DataType dtype{DataType::UNDEFINED}; DataType dtype{DataType::UNDEFINED};
......
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include "paddle/extension.h"
#define CHECK_INPUT(x) \
PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
std::vector<paddle::Tensor> SimpleSliceFunction(const paddle::Tensor& x,
int64_t begin_index,
int64_t end_index) {
return {x.slice(begin_index, end_index)};
}
std::vector<std::vector<int64_t>> SimpleSliceInferShape(
const std::vector<int64_t>& x_shape,
int64_t begin_index,
int64_t end_index) {
PD_CHECK(begin_index > 0, "The begin index is out of bound.");
PD_CHECK(end_index > 0, "The end index must is out of bound.");
PD_CHECK(begin_index < end_index,
"The begin index is greater than end index.");
auto out_shape = x_shape;
out_shape[0] = end_index - begin_index;
return {out_shape};
}
PD_BUILD_OP(custom_simple_slice)
.Inputs({"X"})
.Outputs({"Out"})
.Attrs({"begin_index: int64_t", "end_index: int64_t"})
.SetKernelFn(PD_KERNEL(SimpleSliceFunction))
.SetInferShapeFn(PD_INFER_SHAPE(SimpleSliceInferShape));
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtaina copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import numpy as np
import paddle
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\custom_simple_slice\\custom_simple_slice.pyd'.format(
get_build_directory())
if os.name == 'nt' and os.path.isfile(file):
cmd = 'del {}'.format(file)
run_cmd(cmd, True)
custom_ops = load(
name='custom_simple_slice_jit',
sources=['custom_simple_slice_op.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_cc_args, # test for cc flags
extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags
verbose=True)
class TestCustomSimpleSliceJit(unittest.TestCase):
def test_slice_output(self):
np_x = np.random.random((5, 2)).astype("float32")
x = paddle.to_tensor(np_x)
custom_op_out = custom_ops.custom_simple_slice(x, 2, 3)
np_out = np_x[2:3]
self.assertTrue(
np.array_equal(custom_op_out, np_out),
"custom op: {},\n numpy: {}".format(np_out, custom_op_out.numpy()))
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册