diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index 8b8423c6173fbf29d4e916741feff3a0302d5a06..a15b16f06a008d709f2f90b4a95d9bf734fc61b4 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -300,12 +300,10 @@ class EagerTensor final { const auto& framework_tensor = var_.Get(); if (defined()) { VLOG(8) << "Sync Var to initialized tensor for: " << name(); - paddle::experimental::ReMakePtenDenseTensor( - framework_tensor, static_cast(impl().get())); + static_cast(*impl()) = framework_tensor; } else { VLOG(8) << "Sync Var to uninitialized tensor for: " << name(); - this->set_impl(std::move( - paddle::experimental::MakePtenDenseTensor(framework_tensor))); + this->set_impl(std::make_shared(framework_tensor)); } var_.Clear(); } diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index fd2522b0336ff89d4edda8da731c467da135d7ba..30fbee57787a69edab11f557b72f1410868c6cc5 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -133,7 +133,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, "is not initialized.", i, in_name)); paddle::experimental::Tensor custom_t; - custom_t.set_impl(std::move(experimental::MakePtenDenseTensor(*x))); + custom_t.set_impl(std::make_shared(*x)); custom_vec_in.emplace_back(custom_t); } custom_vec_ins.emplace_back(custom_vec_in); @@ -145,7 +145,7 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, platform::errors::InvalidArgument( "Input tensor (%s) is not initialized.", in_name)); paddle::experimental::Tensor custom_in; - custom_in.set_impl(std::move(experimental::MakePtenDenseTensor(*x))); + custom_in.set_impl(std::make_shared(*x)); custom_ins.emplace_back(custom_in); } } diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu index 858688dffd8c1cf6716662d71e01279ca1386254..b364cf9b31d568777b6ba4fd26887902972e93d6 100644 --- a/paddle/fluid/framework/data_device_transform_test.cu +++ b/paddle/fluid/framework/data_device_transform_test.cu @@ -75,12 +75,9 @@ class TestKernel : public OpKernel { output->Resize(input->dims()); output->mutable_data(ctx.GetPlace()); - auto pt_input = paddle::experimental::MakePtenDenseTensor(*input); - auto pt_out = paddle::experimental::MakePtenDenseTensor(*output); - pten::funcs::TransformFunctor, T, DeviceContext> functor( - *pt_input, *pt_input, pt_out.get(), - ctx.template device_context(), AddFunctor()); + *input, *input, output, ctx.template device_context(), + AddFunctor()); functor.Run(); } }; diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.cu.h b/paddle/fluid/operators/reduce_ops/reduce_op.cu.h index 62486f62f66f8bf47304681f5c223c9cc2d72a3f..730a84da88b59da6e090bbab22aa66c42e2b06e6 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.cu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.cu.h @@ -45,11 +45,8 @@ void TensorReduceFunctorImpl(const framework::Tensor& x, framework::Tensor* y, gpuStream_t stream) { y->mutable_data(x.place()); - auto pt_x = paddle::experimental::MakePtenDenseTensor(x); - auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); - pten::kernels::TensorReduceFunctorImpl( - *pt_x.get(), pt_y.get(), transform, origin_reduce_dims, stream); + x, y, transform, origin_reduce_dims, stream); } } // namespace operators diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 87f51e4b8002f277a50ca0af5abf1e0f43214758..4f275717bced87995e26e6d86c72d8045adb8ebf 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -252,9 +252,6 @@ class ReduceKernel : public framework::OpKernel { dev_ctx.GetPlace(), static_cast(cast_out_dtype)); - auto pt_x = paddle::experimental::MakePtenDenseTensor(*input); - auto pt_out = paddle::experimental::MakePtenDenseTensor(*output); - std::vector tmp_dims(dims.begin(), dims.end()); // call new kernel @@ -262,8 +259,8 @@ class ReduceKernel : public framework::OpKernel { T, Functor>( static_cast::TYPE&>(dev_ctx), - *pt_x.get(), reduce_all, tmp_dims, keep_dim, - pten::TransToPtenDataType(cast_out_dtype), pt_out.get()); + *input, reduce_all, tmp_dims, keep_dim, + pten::TransToPtenDataType(cast_out_dtype), output); } }; template @@ -724,16 +721,13 @@ class ReduceCudaKernel : public framework::OpKernel { static_cast(input->type())); } - auto pt_x = paddle::experimental::MakePtenDenseTensor(*input); - auto pt_out = paddle::experimental::MakePtenDenseTensor(*output); std::vector dims_int64{dims.begin(), dims.end()}; auto pt_out_dtype = pten::TransToPtenDataType( static_cast(out_dtype)); - pten::Reduce(dev_ctx, *pt_x.get(), reduce_all, - dims_int64, false, pt_out_dtype, - pt_out.get()); + pten::Reduce( + dev_ctx, *input, reduce_all, dims_int64, false, pt_out_dtype, output); } }; #endif diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index 607423d64f53478ac4428393befce815a3b9d540..0aacbe5e3251939390d108e7f0b032a76a05a935 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -130,7 +130,7 @@ void InitEagerTensorWithNumpyValue(EagerTensorObject* self, "Place should be one of " "CPUPlace/XPUPlace/CUDAPlace/CUDAPinnedPlace/NPUPlace")); } - paddle::experimental::ReMakePtenDenseTensor(temp_tensor, impl_ptr); + *impl_ptr = temp_tensor; } void InitEagerTensorWithEagerTensor(EagerTensorObject* self, @@ -164,23 +164,10 @@ void InitEagerTensorWithFrameworkTensor(EagerTensorObject* self, const std::string& name) { self->eager_tensor.set_name(name); if (place == src.place()) { - std::shared_ptr dense_tensor = - std::make_shared( - pten::make_intrusive(place), - pten::DenseTensorMeta(pten::TransToPtenDataType(src.type()), - src.dims())); - paddle::experimental::ReMakePtenDenseTensor(src, dense_tensor.get()); - self->eager_tensor.set_impl(dense_tensor); + self->eager_tensor.set_impl(std::make_shared(src)); VLOG(4) << "Same place, do ShareDataWith"; } else { - std::shared_ptr dense_tensor = - std::make_shared( - pten::make_intrusive( - src.place()), - pten::DenseTensorMeta(pten::TransToPtenDataType(src.type()), - src.dims())); - paddle::experimental::ReMakePtenDenseTensor(src, dense_tensor.get()); - auto temp = egr::EagerTensor(dense_tensor); + auto temp = egr::EagerTensor(std::make_shared(src)); self->eager_tensor.set_impl( temp.copy_to(pten::TransToPtenBackend(place), true).impl()); VLOG(4) << "Different place, do TensorCopy"; diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 35f091e1c8891177ca930f9ca520346d9f871dff..44fc95938392c04341d7b6ef1c1e57c93cbfc314 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -161,9 +161,7 @@ static PyObject* eager_api_read_next_eager_tensor_list(PyObject* self, auto autograd_meta = egr::EagerUtils::autograd_meta(&eager_tensor); autograd_meta->SetPersistable(false); autograd_meta->SetStopGradient(true); - auto tmp = std::move(tensor); - eager_tensor.set_impl( - std::move(paddle::experimental::MakePtenDenseTensor(tmp))); + eager_tensor.set_impl(std::make_shared(tensor)); return eager_tensor; }; for (auto& tensor : tensor_list) { diff --git a/paddle/pten/api/lib/utils/tensor_utils.cc b/paddle/pten/api/lib/utils/tensor_utils.cc index 971476a55db935af616257168b2925d1a23cb603..d1441331755f20b4766f17ba9c4943cfd5cf6a5d 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.cc +++ b/paddle/pten/api/lib/utils/tensor_utils.cc @@ -31,57 +31,9 @@ void SetLoD(DstLoD* dst, const SrcLoD& src) { } } -std::unique_ptr MakePtenDenseTensorBase( - const paddle::framework::Tensor& src) { - VLOG(3) << "MakePtenDenseTensor based Tensor."; - pten::DenseTensorMeta meta{pten::TransToPtenDataType(src.type()), - src.dims(), - src.layout(), - src.offset()}; - if (!src.IsInitialized()) { - return std::make_unique( - std::move(pten::make_intrusive(src.place())), - std::move(meta)); - } - auto shared_storage = pten::make_intrusive(src.Holder()); - return std::make_unique(std::move(shared_storage), - std::move(meta)); -} - std::unique_ptr MakePtenDenseTensor( const paddle::framework::Tensor& src) { - auto out = MakePtenDenseTensorBase( - static_cast(src)); - SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod), - src.lod()); - return std::move(out); -} - -std::unique_ptr MakePtenDenseTensorBase( - const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) { - pten::DenseTensorMeta meta{ - arg_def.dtype, src.dims(), src.layout(), src.offset()}; - - if (src.IsInitialized() && - src.place() == pten::TransToFluidPlace(arg_def.backend)) { - auto shared_storage = pten::make_intrusive(src.Holder()); - return std::make_unique(std::move(shared_storage), - std::move(meta)); - } else { - return std::make_unique( - std::move(pten::make_intrusive( - pten::TransToFluidPlace(arg_def.backend))), - std::move(meta)); - } -} - -std::unique_ptr MakePtenDenseTensor( - const paddle::framework::Tensor& src, const pten::TensorArgDef& arg_def) { - auto out = MakePtenDenseTensorBase( - static_cast(src), arg_def); - SetLoD(&(pten::CompatibleDenseTensorUtils::GetMutableMeta(out.get())->lod), - src.lod()); - return std::move(out); + return std::make_unique(src); } pten::Scalar MakePtenScalar(const paddle::framework::Tensor& src) { @@ -246,83 +198,6 @@ pten::ScalarArray MakePtenScalarArrayFromVarList( return {vector_data}; } -std::unique_ptr MakePtenTensorBaseFromVar( - const framework::Variable& variable, const pten::TensorArgDef& arg_def) { - auto expected_place = pten::TransToFluidPlace(arg_def.backend); - - if (variable.IsType()) { - const auto& tensor = variable.Get(); - - if (tensor.IsInitialized() && - !platform::is_same_place(tensor.place(), expected_place)) { - framework::LoDTensor tmp_tensor; - framework::TensorCopySync(tensor, expected_place, &tmp_tensor); - return MakePtenDenseTensor(tmp_tensor); - } else { - return MakePtenDenseTensor(tensor); - } - } else if (variable.IsType()) { - // TODO(chenweihang): now we don't deal with row and height - // by xiaowei's advice - const auto& tensor = variable.Get(); - if (!platform::is_same_place(tensor.value().place(), expected_place)) { - framework::Tensor tmp_tensor; - paddle::framework::TensorCopySync( - tensor.value(), expected_place, &tmp_tensor); - // TODO(chenweihang): adapt SelectedRows by xiaowei's design - return MakePtenDenseTensor(tmp_tensor); - } else { - return MakePtenDenseTensor(tensor.value()); - } - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported shared input `%s` type now when call pt kernel.", - framework::ToTypeName(variable.Type()))); - } - return {}; -} - -std::unique_ptr MakePtenTensorBaseFromVar( - framework::Variable* variable, const pten::TensorArgDef& arg_def) { - // mutable_data before run kernel, to avoid share output form - // KernelContext to original tensor - if (variable->template IsType()) { - auto* tensor = variable->template GetMutable(); - return MakePtenDenseTensor(*tensor, arg_def); - } else if (variable->template IsType()) { - auto* tensor = variable->template GetMutable(); - // TODO(chenweihang): adapt SelectedRows by xiaowei's design, - // here the row and height will lost in output! - return MakePtenDenseTensor(tensor->value(), arg_def); - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported shared output `%s` type now when call pt kernel.", - framework::ToTypeName(variable->Type()))); - } - return {}; -} - -void MovesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) { - PADDLE_ENFORCE_NOT_NULL( - src, - platform::errors::InvalidArgument( - "The source DenseTensor is nullptr when move storage.")); - PADDLE_ENFORCE_NOT_NULL( - dst, - platform::errors::InvalidArgument( - "The destination Tensor is nullptr when move storage.")); - dst->Resize(src->dims()); - dst->set_type(pten::TransToProtoVarType(src->dtype())); - auto storage = src->MoveMemoryHolder(); - dst->ResetHolderWithType(storage, pten::TransToProtoVarType(src->dtype())); - dst->set_offset(src->meta().offset); -} - -void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { - MovesStorageBase(src, static_cast(dst)); - SetLoD(dst->mutable_lod(), src->lod()); -} - void SharesStorageBase(pten::DenseTensor* src, paddle::framework::Tensor* dst) { PADDLE_ENFORCE_NOT_NULL( src, @@ -343,25 +218,6 @@ void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst) { SetLoD(dst->mutable_lod(), src->lod()); } -void ReMakePtenDenseTensorBase(const paddle::framework::Tensor& src, - pten::DenseTensor* dst) { - VLOG(3) << "ReMakePtenDenseTensor based Tensor."; - auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); - meta->dims = src.dims(); - meta->dtype = pten::TransToPtenDataType(src.type()); - meta->layout = src.layout(); - meta->offset = src.offset(); - dst->ResetHolder(src.Holder()); -} - -void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, - pten::DenseTensor* dst) { - auto* meta = pten::CompatibleDenseTensorUtils::GetMutableMeta(dst); - SetLoD(&meta->lod, src.lod()); - ReMakePtenDenseTensorBase(static_cast(src), - dst); -} - static bool IsSameAllocation(const std::shared_ptr& a, const std::shared_ptr& b) { return a->ptr() == b->ptr() && a->size() == b->size() && diff --git a/paddle/pten/api/lib/utils/tensor_utils.h b/paddle/pten/api/lib/utils/tensor_utils.h index 04c3f0e912b8239f9488dfe9d0f8ffe3b66cb023..32d65eded6ee3e9d350767cd96f074af4533209c 100644 --- a/paddle/pten/api/lib/utils/tensor_utils.h +++ b/paddle/pten/api/lib/utils/tensor_utils.h @@ -45,28 +45,8 @@ pten::ScalarArray MakePtenScalarArrayFromVar( pten::ScalarArray MakePtenScalarArrayFromVarList( const std::vector& variable_list); -std::unique_ptr MakePtenTensorBaseFromVar( - const framework::Variable& variable, const pten::TensorArgDef& arg_def); - -std::unique_ptr MakePtenTensorBaseFromVar( - framework::Variable* variable, const pten::TensorArgDef& arg_def); - -void MovesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); - void SharesStorage(pten::DenseTensor* src, paddle::framework::Tensor* dst); -/** - * In order to improve the compatibility state performance, some tricky tool - * functions are added. - * - * The ReMake** function takes out the LoDTensor information and directly - * replaces it with the corresponding member of the DenseTensor to avoid - * the overhead caused by frequent construction and destruction of the - * DenseTensor. - */ -void ReMakePtenDenseTensor(const paddle::framework::Tensor& src, - pten::DenseTensor* dst); - void MakeVariableFromPtenTensor(pten::DenseTensor* src, framework::Variable* variable);