未验证 提交 98c1829b 编写于 作者: Z zyfncg 提交者: GitHub

【PTen】Remove ReMakePtenDenseTensor (#39094)

* remove remake densetensor

* fix eager test error

* fix bug in eager
上级 7a1e1193
...@@ -300,12 +300,10 @@ class EagerTensor final { ...@@ -300,12 +300,10 @@ class EagerTensor final {
const auto& framework_tensor = var_.Get<LEGACY_TYPE>(); const auto& framework_tensor = var_.Get<LEGACY_TYPE>();
if (defined()) { if (defined()) {
VLOG(8) << "Sync Var to initialized tensor for: " << name(); VLOG(8) << "Sync Var to initialized tensor for: " << name();
paddle::experimental::ReMakePtenDenseTensor( static_cast<TYPE&>(*impl()) = framework_tensor;
framework_tensor, static_cast<pten::DenseTensor*>(impl().get()));
} else { } else {
VLOG(8) << "Sync Var to uninitialized tensor for: " << name(); VLOG(8) << "Sync Var to uninitialized tensor for: " << name();
this->set_impl(std::move( this->set_impl(std::make_shared<pten::DenseTensor>(framework_tensor));
paddle::experimental::MakePtenDenseTensor(framework_tensor)));
} }
var_.Clear(); var_.Clear();
} }
......
...@@ -133,7 +133,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, ...@@ -133,7 +133,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
"is not initialized.", "is not initialized.",
i, in_name)); i, in_name));
paddle::experimental::Tensor custom_t; paddle::experimental::Tensor custom_t;
custom_t.set_impl(std::move(experimental::MakePtenDenseTensor(*x))); custom_t.set_impl(std::make_shared<pten::DenseTensor>(*x));
custom_vec_in.emplace_back(custom_t); custom_vec_in.emplace_back(custom_t);
} }
custom_vec_ins.emplace_back(custom_vec_in); custom_vec_ins.emplace_back(custom_vec_in);
...@@ -145,7 +145,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, ...@@ -145,7 +145,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Input tensor (%s) is not initialized.", in_name)); "Input tensor (%s) is not initialized.", in_name));
paddle::experimental::Tensor custom_in; paddle::experimental::Tensor custom_in;
custom_in.set_impl(std::move(experimental::MakePtenDenseTensor(*x))); custom_in.set_impl(std::make_shared<pten::DenseTensor>(*x));
custom_ins.emplace_back(custom_in); custom_ins.emplace_back(custom_in);
} }
} }
......
...@@ -75,12 +75,9 @@ class TestKernel : public OpKernel<float> { ...@@ -75,12 +75,9 @@ class TestKernel : public OpKernel<float> {
output->Resize(input->dims()); output->Resize(input->dims());
output->mutable_data<T>(ctx.GetPlace()); output->mutable_data<T>(ctx.GetPlace());
auto pt_input = paddle::experimental::MakePtenDenseTensor(*input);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*output);
pten::funcs::TransformFunctor<AddFunctor<T>, T, DeviceContext> functor( pten::funcs::TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
*pt_input, *pt_input, pt_out.get(), *input, *input, output, ctx.template device_context<DeviceContext>(),
ctx.template device_context<DeviceContext>(), AddFunctor<T>()); AddFunctor<T>());
functor.Run(); functor.Run();
} }
}; };
......
...@@ -45,11 +45,8 @@ void TensorReduceFunctorImpl(const framework::Tensor& x, framework::Tensor* y, ...@@ -45,11 +45,8 @@ void TensorReduceFunctorImpl(const framework::Tensor& x, framework::Tensor* y,
gpuStream_t stream) { gpuStream_t stream) {
y->mutable_data<Ty>(x.place()); y->mutable_data<Ty>(x.place());
auto pt_x = paddle::experimental::MakePtenDenseTensor(x);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
pten::kernels::TensorReduceFunctorImpl<Tx, Ty, ReduceOp, TransformOp>( pten::kernels::TensorReduceFunctorImpl<Tx, Ty, ReduceOp, TransformOp>(
*pt_x.get(), pt_y.get(), transform, origin_reduce_dims, stream); x, y, transform, origin_reduce_dims, stream);
} }
} // namespace operators } // namespace operators
......
...@@ -252,9 +252,6 @@ class ReduceKernel : public framework::OpKernel<T> { ...@@ -252,9 +252,6 @@ class ReduceKernel : public framework::OpKernel<T> {
dev_ctx.GetPlace(), dev_ctx.GetPlace(),
static_cast<framework::proto::VarType::Type>(cast_out_dtype)); static_cast<framework::proto::VarType::Type>(cast_out_dtype));
auto pt_x = paddle::experimental::MakePtenDenseTensor(*input);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*output);
std::vector<int64_t> tmp_dims(dims.begin(), dims.end()); std::vector<int64_t> tmp_dims(dims.begin(), dims.end());
// call new kernel // call new kernel
...@@ -262,8 +259,8 @@ class ReduceKernel : public framework::OpKernel<T> { ...@@ -262,8 +259,8 @@ class ReduceKernel : public framework::OpKernel<T> {
T, Functor>( T, Functor>(
static_cast<const typename framework::ConvertToPtenContext< static_cast<const typename framework::ConvertToPtenContext<
DeviceContext>::TYPE&>(dev_ctx), DeviceContext>::TYPE&>(dev_ctx),
*pt_x.get(), reduce_all, tmp_dims, keep_dim, *input, reduce_all, tmp_dims, keep_dim,
pten::TransToPtenDataType(cast_out_dtype), pt_out.get()); pten::TransToPtenDataType(cast_out_dtype), output);
} }
}; };
template <typename DeviceContext, typename OutT, typename Functor> template <typename DeviceContext, typename OutT, typename Functor>
...@@ -724,16 +721,13 @@ class ReduceCudaKernel : public framework::OpKernel<T> { ...@@ -724,16 +721,13 @@ class ReduceCudaKernel : public framework::OpKernel<T> {
static_cast<framework::proto::VarType::Type>(input->type())); static_cast<framework::proto::VarType::Type>(input->type()));
} }
auto pt_x = paddle::experimental::MakePtenDenseTensor(*input);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*output);
std::vector<int64_t> dims_int64{dims.begin(), dims.end()}; std::vector<int64_t> dims_int64{dims.begin(), dims.end()};
auto pt_out_dtype = pten::TransToPtenDataType( auto pt_out_dtype = pten::TransToPtenDataType(
static_cast<framework::proto::VarType::Type>(out_dtype)); static_cast<framework::proto::VarType::Type>(out_dtype));
pten::Reduce<T, ReduceOp, TransformOp>(dev_ctx, *pt_x.get(), reduce_all, pten::Reduce<T, ReduceOp, TransformOp>(
dims_int64, false, pt_out_dtype, dev_ctx, *input, reduce_all, dims_int64, false, pt_out_dtype, output);
pt_out.get());
} }
}; };
#endif #endif
......
...@@ -130,7 +130,7 @@ void InitEagerTensorWithNumpyValue(EagerTensorObject* self, ...@@ -130,7 +130,7 @@ void InitEagerTensorWithNumpyValue(EagerTensorObject* self,
"Place should be one of " "Place should be one of "
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace")); "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace"));
} }
paddle::experimental::ReMakePtenDenseTensor(temp_tensor, impl_ptr); *impl_ptr = temp_tensor;
} }
void InitEagerTensorWithEagerTensor(EagerTensorObject* self, void InitEagerTensorWithEagerTensor(EagerTensorObject* self,
...@@ -164,23 +164,10 @@ void InitEagerTensorWithFrameworkTensor(EagerTensorObject* self, ...@@ -164,23 +164,10 @@ void InitEagerTensorWithFrameworkTensor(EagerTensorObject* self,
const std::string& name) { const std::string& name) {
self->eager_tensor.set_name(name); self->eager_tensor.set_name(name);
if (place == src.place()) { if (place == src.place()) {
std::shared_ptr<pten::DenseTensor> dense_tensor = self->eager_tensor.set_impl(std::make_shared<pten::DenseTensor>(src));
std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(place),
pten::DenseTensorMeta(pten::TransToPtenDataType(src.type()),
src.dims()));
paddle::experimental::ReMakePtenDenseTensor(src, dense_tensor.get());
self->eager_tensor.set_impl(dense_tensor);
VLOG(4) << "Same place, do ShareDataWith"; VLOG(4) << "Same place, do ShareDataWith";
} else { } else {
std::shared_ptr<pten::DenseTensor> dense_tensor = auto temp = egr::EagerTensor(std::make_shared<pten::DenseTensor>(src));
std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
src.place()),
pten::DenseTensorMeta(pten::TransToPtenDataType(src.type()),
src.dims()));
paddle::experimental::ReMakePtenDenseTensor(src, dense_tensor.get());
auto temp = egr::EagerTensor(dense_tensor);
self->eager_tensor.set_impl( self->eager_tensor.set_impl(
temp.copy_to(pten::TransToPtenBackend(place), true).impl()); temp.copy_to(pten::TransToPtenBackend(place), true).impl());
VLOG(4) << "Different place, do TensorCopy"; VLOG(4) << "Different place, do TensorCopy";
......
...@@ -161,9 +161,7 @@ static PyObject* eager_api_read_next_eager_tensor_list(PyObject* self, ...@@ -161,9 +161,7 @@ static PyObject* eager_api_read_next_eager_tensor_list(PyObject* self,
auto autograd_meta = egr::EagerUtils::autograd_meta(&eager_tensor); auto autograd_meta = egr::EagerUtils::autograd_meta(&eager_tensor);
autograd_meta->SetPersistable(false); autograd_meta->SetPersistable(false);
autograd_meta->SetStopGradient(true); autograd_meta->SetStopGradient(true);
auto tmp = std::move(tensor); eager_tensor.set_impl(std::make_shared<pten::DenseTensor>(tensor));
eager_tensor.set_impl(
std::move(paddle::experimental::MakePtenDenseTensor(tmp)));
return eager_tensor; return eager_tensor;
}; };
for (auto& tensor : tensor_list) { for (auto& tensor : tensor_list) {
......
...@@ -31,57 +31,9 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) { ...@@ -31,57 +31,9 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) {
} }
} }
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensorBase(
const paddle::framework::Tensor& src) {
VLOG(3) << "MakePtenDenseTensor based Tensor.";
pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()),
src.dims(),
src.layout(),
src.offset()};
if (!src.IsInitialized()) {
return std::make_unique<pten::DenseTensor>(
std::move(pten::make_intrusive<SharedStorage>(src.place())),
std::move(meta));
}
auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta));
}
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor( std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::Tensor& src) { const paddle::framework::Tensor& src) {
auto out = MakePtenDenseTensorBase( return std::make_unique<pten::DenseTensor>(src);
static_cast<const paddle::framework::Tensor&>(src));
SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod),
src.lod());
return std::move(out);
}
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensorBase(
const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) {
pten::DenseTensorMeta meta{
arg_def.dtype, src.dims(), src.layout(), src.offset()};
if (src.IsInitialized() &&
src.place() == pten::TransToFluidPlace(arg_def.backend)) {
auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta));
} else {
return std::make_unique<pten::DenseTensor>(
std::move(pten::make_intrusive<SharedStorage>(
pten::TransToFluidPlace(arg_def.backend))),
std::move(meta));
}
}
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) {
auto out = MakePtenDenseTensorBase(
static_cast<const paddle::framework::Tensor&>(src), arg_def);
SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod),
src.lod());
return std::move(out);
} }
pten::Scalar MakePtenScalar(const paddle::framework::Tensor& src) { pten::Scalar MakePtenScalar(const paddle::framework::Tensor& src) {
...@@ -246,83 +198,6 @@ pten::ScalarArray MakePtenScalarArrayFromVarList( ...@@ -246,83 +198,6 @@ pten::ScalarArray MakePtenScalarArrayFromVarList(
return {vector_data}; return {vector_data};
} }
std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
const framework::Variable& variable, const pten::TensorArgDef& arg_def) {
auto expected_place = pten::TransToFluidPlace(arg_def.backend);
if (variable.IsType<framework::LoDTensor>()) {
const auto& tensor = variable.Get<framework::LoDTensor>();
if (tensor.IsInitialized() &&
!platform::is_same_place(tensor.place(), expected_place)) {
framework::LoDTensor tmp_tensor;
framework::TensorCopySync(tensor, expected_place, &tmp_tensor);
return MakePtenDenseTensor(tmp_tensor);
} else {
return MakePtenDenseTensor(tensor);
}
} else if (variable.IsType<pten::SelectedRows>()) {
// TODO(chenweihang): now we don't deal with row and height
// by xiaowei's advice
const auto& tensor = variable.Get<pten::SelectedRows>();
if (!platform::is_same_place(tensor.value().place(), expected_place)) {
framework::Tensor tmp_tensor;
paddle::framework::TensorCopySync(
tensor.value(), expected_place, &tmp_tensor);
// TODO(chenweihang): adapt SelectedRows by xiaowei's design
return MakePtenDenseTensor(tmp_tensor);
} else {
return MakePtenDenseTensor(tensor.value());
}
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared input `%s` type now when call pt kernel.",
framework::ToTypeName(variable.Type())));
}
return {};
}
std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
framework::Variable* variable, const pten::TensorArgDef& arg_def) {
// mutable_data before run kernel, to avoid share output form
// KernelContext to original tensor
if (variable->template IsType<framework::LoDTensor>()) {
auto* tensor = variable->template GetMutable<framework::LoDTensor>();
return MakePtenDenseTensor(*tensor, arg_def);
} else if (variable->template IsType<pten::SelectedRows>()) {
auto* tensor = variable->template GetMutable<pten::SelectedRows>();
// TODO(chenweihang): adapt SelectedRows by xiaowei's design,
// here the row and height will lost in output!
return MakePtenDenseTensor(tensor->value(), arg_def);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared output `%s` type now when call pt kernel.",
framework::ToTypeName(variable->Type())));
}
return {};
}
void MovesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
PADDLE_ENFORCE_NOT_NULL(
src,
platform::errors::InvalidArgument(
"The source DenseTensor is nullptr when move storage."));
PADDLE_ENFORCE_NOT_NULL(
dst,
platform::errors::InvalidArgument(
"The destination Tensor is nullptr when move storage."));
dst->Resize(src->dims());
dst->set_type(pten::TransToProtoVarType(src->dtype()));
auto storage = src->MoveMemoryHolder();
dst->ResetHolderWithType(storage, pten::TransToProtoVarType(src->dtype()));
dst->set_offset(src->meta().offset);
}
void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
MovesStorageBase(src, static_cast<paddle::framework::Tensor*>(dst));
SetLoD(dst->mutable_lod(), src->lod());
}
void SharesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) { void SharesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
src, src,
...@@ -343,25 +218,6 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { ...@@ -343,25 +218,6 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
SetLoD(dst->mutable_lod(), src->lod()); SetLoD(dst->mutable_lod(), src->lod());
} }
void ReMakePtenDenseTensorBase(const paddle::framework::Tensor& src,
pten::DenseTensor* dst) {
VLOG(3) << "ReMakePtenDenseTensor based Tensor.";
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims();
meta->dtype = pten::TransToPtenDataType(src.type());
meta->layout = src.layout();
meta->offset = src.offset();
dst->ResetHolder(src.Holder());
}
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
pten::DenseTensor* dst) {
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
SetLoD(&meta->lod, src.lod());
ReMakePtenDenseTensorBase(static_cast<const paddle::framework::Tensor&>(src),
dst);
}
static bool IsSameAllocation(const std::shared_ptr<memory::Allocation>& a, static bool IsSameAllocation(const std::shared_ptr<memory::Allocation>& a,
const std::shared_ptr<memory::Allocation>& b) { const std::shared_ptr<memory::Allocation>& b) {
return a->ptr() == b->ptr() && a->size() == b->size() && return a->ptr() == b->ptr() && a->size() == b->size() &&
......
...@@ -45,28 +45,8 @@ pten::ScalarArray MakePtenScalarArrayFromVar( ...@@ -45,28 +45,8 @@ pten::ScalarArray MakePtenScalarArrayFromVar(
pten::ScalarArray MakePtenScalarArrayFromVarList( pten::ScalarArray MakePtenScalarArrayFromVarList(
const std::vector<framework::Variable*>& variable_list); const std::vector<framework::Variable*>& variable_list);
std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
const framework::Variable& variable, const pten::TensorArgDef& arg_def);
std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
framework::Variable* variable, const pten::TensorArgDef& arg_def);
void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
/**
* In order to improve the compatibility state performance, some tricky tool
* functions are added.
*
* The ReMake** function takes out the LoDTensor information and directly
* replaces it with the corresponding member of the DenseTensor to avoid
* the overhead caused by frequent construction and destruction of the
* DenseTensor.
*/
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
pten::DenseTensor* dst);
void MakeVariableFromPtenTensor(pten::DenseTensor* src, void MakeVariableFromPtenTensor(pten::DenseTensor* src,
framework::Variable* variable); framework::Variable* variable);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册