未验证 提交 98c1829b 编写于 作者: Z zyfncg 提交者: GitHub

【PTen】Remove ReMakePtenDenseTensor (#39094)

* remove remake densetensor

* fix eager test error

* fix bug in eager
上级 7a1e1193
......@@ -300,12 +300,10 @@ class EagerTensor final {
const auto& framework_tensor = var_.Get<LEGACY_TYPE>();
if (defined()) {
VLOG(8) << "Sync Var to initialized tensor for: " << name();
paddle::experimental::ReMakePtenDenseTensor(
framework_tensor, static_cast<pten::DenseTensor*>(impl().get()));
static_cast<TYPE&>(*impl()) = framework_tensor;
} else {
VLOG(8) << "Sync Var to uninitialized tensor for: " << name();
this->set_impl(std::move(
paddle::experimental::MakePtenDenseTensor(framework_tensor)));
this->set_impl(std::make_shared<pten::DenseTensor>(framework_tensor));
}
var_.Clear();
}
......
......@@ -133,7 +133,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
"is not initialized.",
i, in_name));
paddle::experimental::Tensor custom_t;
custom_t.set_impl(std::move(experimental::MakePtenDenseTensor(*x)));
custom_t.set_impl(std::make_shared<pten::DenseTensor>(*x));
custom_vec_in.emplace_back(custom_t);
}
custom_vec_ins.emplace_back(custom_vec_in);
......@@ -145,7 +145,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
platform::errors::InvalidArgument(
"Input tensor (%s) is not initialized.", in_name));
paddle::experimental::Tensor custom_in;
custom_in.set_impl(std::move(experimental::MakePtenDenseTensor(*x)));
custom_in.set_impl(std::make_shared<pten::DenseTensor>(*x));
custom_ins.emplace_back(custom_in);
}
}
......
......@@ -75,12 +75,9 @@ class TestKernel : public OpKernel<float> {
output->Resize(input->dims());
output->mutable_data<T>(ctx.GetPlace());
auto pt_input = paddle::experimental::MakePtenDenseTensor(*input);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*output);
pten::funcs::TransformFunctor<AddFunctor<T>, T, DeviceContext> functor(
*pt_input, *pt_input, pt_out.get(),
ctx.template device_context<DeviceContext>(), AddFunctor<T>());
*input, *input, output, ctx.template device_context<DeviceContext>(),
AddFunctor<T>());
functor.Run();
}
};
......
......@@ -45,11 +45,8 @@ void TensorReduceFunctorImpl(const framework::Tensor& x, framework::Tensor* y,
gpuStream_t stream) {
y->mutable_data<Ty>(x.place());
auto pt_x = paddle::experimental::MakePtenDenseTensor(x);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
pten::kernels::TensorReduceFunctorImpl<Tx, Ty, ReduceOp, TransformOp>(
*pt_x.get(), pt_y.get(), transform, origin_reduce_dims, stream);
x, y, transform, origin_reduce_dims, stream);
}
} // namespace operators
......
......@@ -252,9 +252,6 @@ class ReduceKernel : public framework::OpKernel<T> {
dev_ctx.GetPlace(),
static_cast<framework::proto::VarType::Type>(cast_out_dtype));
auto pt_x = paddle::experimental::MakePtenDenseTensor(*input);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*output);
std::vector<int64_t> tmp_dims(dims.begin(), dims.end());
// call new kernel
......@@ -262,8 +259,8 @@ class ReduceKernel : public framework::OpKernel<T> {
T, Functor>(
static_cast<const typename framework::ConvertToPtenContext<
DeviceContext>::TYPE&>(dev_ctx),
*pt_x.get(), reduce_all, tmp_dims, keep_dim,
pten::TransToPtenDataType(cast_out_dtype), pt_out.get());
*input, reduce_all, tmp_dims, keep_dim,
pten::TransToPtenDataType(cast_out_dtype), output);
}
};
template <typename DeviceContext, typename OutT, typename Functor>
......@@ -724,16 +721,13 @@ class ReduceCudaKernel : public framework::OpKernel<T> {
static_cast<framework::proto::VarType::Type>(input->type()));
}
auto pt_x = paddle::experimental::MakePtenDenseTensor(*input);
auto pt_out = paddle::experimental::MakePtenDenseTensor(*output);
std::vector<int64_t> dims_int64{dims.begin(), dims.end()};
auto pt_out_dtype = pten::TransToPtenDataType(
static_cast<framework::proto::VarType::Type>(out_dtype));
pten::Reduce<T, ReduceOp, TransformOp>(dev_ctx, *pt_x.get(), reduce_all,
dims_int64, false, pt_out_dtype,
pt_out.get());
pten::Reduce<T, ReduceOp, TransformOp>(
dev_ctx, *input, reduce_all, dims_int64, false, pt_out_dtype, output);
}
};
#endif
......
......@@ -130,7 +130,7 @@ void InitEagerTensorWithNumpyValue(EagerTensorObject* self,
"Place should be one of "
"CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace"));
}
paddle::experimental::ReMakePtenDenseTensor(temp_tensor, impl_ptr);
*impl_ptr = temp_tensor;
}
void InitEagerTensorWithEagerTensor(EagerTensorObject* self,
......@@ -164,23 +164,10 @@ void InitEagerTensorWithFrameworkTensor(EagerTensorObject* self,
const std::string& name) {
self->eager_tensor.set_name(name);
if (place == src.place()) {
std::shared_ptr<pten::DenseTensor> dense_tensor =
std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(place),
pten::DenseTensorMeta(pten::TransToPtenDataType(src.type()),
src.dims()));
paddle::experimental::ReMakePtenDenseTensor(src, dense_tensor.get());
self->eager_tensor.set_impl(dense_tensor);
self->eager_tensor.set_impl(std::make_shared<pten::DenseTensor>(src));
VLOG(4) << "Same place, do ShareDataWith";
} else {
std::shared_ptr<pten::DenseTensor> dense_tensor =
std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
src.place()),
pten::DenseTensorMeta(pten::TransToPtenDataType(src.type()),
src.dims()));
paddle::experimental::ReMakePtenDenseTensor(src, dense_tensor.get());
auto temp = egr::EagerTensor(dense_tensor);
auto temp = egr::EagerTensor(std::make_shared<pten::DenseTensor>(src));
self->eager_tensor.set_impl(
temp.copy_to(pten::TransToPtenBackend(place), true).impl());
VLOG(4) << "Different place, do TensorCopy";
......
......@@ -161,9 +161,7 @@ static PyObject* eager_api_read_next_eager_tensor_list(PyObject* self,
auto autograd_meta = egr::EagerUtils::autograd_meta(&eager_tensor);
autograd_meta->SetPersistable(false);
autograd_meta->SetStopGradient(true);
auto tmp = std::move(tensor);
eager_tensor.set_impl(
std::move(paddle::experimental::MakePtenDenseTensor(tmp)));
eager_tensor.set_impl(std::make_shared<pten::DenseTensor>(tensor));
return eager_tensor;
};
for (auto& tensor : tensor_list) {
......
......@@ -31,57 +31,9 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) {
}
}
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensorBase(
const paddle::framework::Tensor& src) {
VLOG(3) << "MakePtenDenseTensor based Tensor.";
pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()),
src.dims(),
src.layout(),
src.offset()};
if (!src.IsInitialized()) {
return std::make_unique<pten::DenseTensor>(
std::move(pten::make_intrusive<SharedStorage>(src.place())),
std::move(meta));
}
auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta));
}
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::Tensor& src) {
auto out = MakePtenDenseTensorBase(
static_cast<const paddle::framework::Tensor&>(src));
SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod),
src.lod());
return std::move(out);
}
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensorBase(
const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) {
pten::DenseTensorMeta meta{
arg_def.dtype, src.dims(), src.layout(), src.offset()};
if (src.IsInitialized() &&
src.place() == pten::TransToFluidPlace(arg_def.backend)) {
auto shared_storage = pten::make_intrusive<SharedStorage>(src.Holder());
return std::make_unique<pten::DenseTensor>(std::move(shared_storage),
std::move(meta));
} else {
return std::make_unique<pten::DenseTensor>(
std::move(pten::make_intrusive<SharedStorage>(
pten::TransToFluidPlace(arg_def.backend))),
std::move(meta));
}
}
std::unique_ptr<pten::DenseTensor> MakePtenDenseTensor(
const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) {
auto out = MakePtenDenseTensorBase(
static_cast<const paddle::framework::Tensor&>(src), arg_def);
SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod),
src.lod());
return std::move(out);
return std::make_unique<pten::DenseTensor>(src);
}
pten::Scalar MakePtenScalar(const paddle::framework::Tensor& src) {
......@@ -246,83 +198,6 @@ pten::ScalarArray MakePtenScalarArrayFromVarList(
return {vector_data};
}
std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
const framework::Variable& variable, const pten::TensorArgDef& arg_def) {
auto expected_place = pten::TransToFluidPlace(arg_def.backend);
if (variable.IsType<framework::LoDTensor>()) {
const auto& tensor = variable.Get<framework::LoDTensor>();
if (tensor.IsInitialized() &&
!platform::is_same_place(tensor.place(), expected_place)) {
framework::LoDTensor tmp_tensor;
framework::TensorCopySync(tensor, expected_place, &tmp_tensor);
return MakePtenDenseTensor(tmp_tensor);
} else {
return MakePtenDenseTensor(tensor);
}
} else if (variable.IsType<pten::SelectedRows>()) {
// TODO(chenweihang): now we don't deal with row and height
// by xiaowei's advice
const auto& tensor = variable.Get<pten::SelectedRows>();
if (!platform::is_same_place(tensor.value().place(), expected_place)) {
framework::Tensor tmp_tensor;
paddle::framework::TensorCopySync(
tensor.value(), expected_place, &tmp_tensor);
// TODO(chenweihang): adapt SelectedRows by xiaowei's design
return MakePtenDenseTensor(tmp_tensor);
} else {
return MakePtenDenseTensor(tensor.value());
}
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared input `%s` type now when call pt kernel.",
framework::ToTypeName(variable.Type())));
}
return {};
}
std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
framework::Variable* variable, const pten::TensorArgDef& arg_def) {
// mutable_data before run kernel, to avoid share output form
// KernelContext to original tensor
if (variable->template IsType<framework::LoDTensor>()) {
auto* tensor = variable->template GetMutable<framework::LoDTensor>();
return MakePtenDenseTensor(*tensor, arg_def);
} else if (variable->template IsType<pten::SelectedRows>()) {
auto* tensor = variable->template GetMutable<pten::SelectedRows>();
// TODO(chenweihang): adapt SelectedRows by xiaowei's design,
// here the row and height will lost in output!
return MakePtenDenseTensor(tensor->value(), arg_def);
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unsupported shared output `%s` type now when call pt kernel.",
framework::ToTypeName(variable->Type())));
}
return {};
}
void MovesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
PADDLE_ENFORCE_NOT_NULL(
src,
platform::errors::InvalidArgument(
"The source DenseTensor is nullptr when move storage."));
PADDLE_ENFORCE_NOT_NULL(
dst,
platform::errors::InvalidArgument(
"The destination Tensor is nullptr when move storage."));
dst->Resize(src->dims());
dst->set_type(pten::TransToProtoVarType(src->dtype()));
auto storage = src->MoveMemoryHolder();
dst->ResetHolderWithType(storage, pten::TransToProtoVarType(src->dtype()));
dst->set_offset(src->meta().offset);
}
void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
MovesStorageBase(src, static_cast<paddle::framework::Tensor*>(dst));
SetLoD(dst->mutable_lod(), src->lod());
}
void SharesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
PADDLE_ENFORCE_NOT_NULL(
src,
......@@ -343,25 +218,6 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) {
SetLoD(dst->mutable_lod(), src->lod());
}
void ReMakePtenDenseTensorBase(const paddle::framework::Tensor& src,
pten::DenseTensor* dst) {
VLOG(3) << "ReMakePtenDenseTensor based Tensor.";
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
meta->dims = src.dims();
meta->dtype = pten::TransToPtenDataType(src.type());
meta->layout = src.layout();
meta->offset = src.offset();
dst->ResetHolder(src.Holder());
}
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
pten::DenseTensor* dst) {
auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst);
SetLoD(&meta->lod, src.lod());
ReMakePtenDenseTensorBase(static_cast<const paddle::framework::Tensor&>(src),
dst);
}
static bool IsSameAllocation(const std::shared_ptr<memory::Allocation>& a,
const std::shared_ptr<memory::Allocation>& b) {
return a->ptr() == b->ptr() && a->size() == b->size() &&
......
......@@ -45,28 +45,8 @@ pten::ScalarArray MakePtenScalarArrayFromVar(
pten::ScalarArray MakePtenScalarArrayFromVarList(
const std::vector<framework::Variable*>& variable_list);
std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
const framework::Variable& variable, const pten::TensorArgDef& arg_def);
std::unique_ptr<pten::TensorBase> MakePtenTensorBaseFromVar(
framework::Variable* variable, const pten::TensorArgDef& arg_def);
void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst);
/**
* In order to improve the compatibility state performance, some tricky tool
* functions are added.
*
* The ReMake** function takes out the LoDTensor information and directly
* replaces it with the corresponding member of the DenseTensor to avoid
* the overhead caused by frequent construction and destruction of the
* DenseTensor.
*/
void ReMakePtenDenseTensor(const paddle::framework::Tensor& src,
pten::DenseTensor* dst);
void MakeVariableFromPtenTensor(pten::DenseTensor* src,
framework::Variable* variable);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册