From add304ed4b419644138c45a36370a65c45612ba6 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Thu, 17 Mar 2022 10:26:16 +0800 Subject: [PATCH] Optimize the performance of C++ API (#40640) * Optimize performance * optimiaze c++ api performance * remove unsed code * fix paddle throw * updata format --- paddle/phi/api/include/tensor.h | 9 ++++++++- paddle/phi/api/lib/api_gen_utils.cc | 12 +++--------- paddle/phi/api/lib/data_transform.cc | 9 +++------ paddle/phi/api/lib/tensor.cc | 14 ++++++++++++-- paddle/phi/core/kernel_factory.h | 8 ++++++++ python/paddle/utils/code_gen/api_base.py | 2 +- 6 files changed, 35 insertions(+), 19 deletions(-) diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index 1312710a80..ce40627bb0 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -324,7 +324,7 @@ class PADDLE_API Tensor final { * * @return std::shared_ptr */ - std::shared_ptr impl() const; + const std::shared_ptr& impl() const; /** * @brief Set the implemention of current Tensor. @@ -333,6 +333,13 @@ class PADDLE_API Tensor final { */ void set_impl(const std::shared_ptr& impl); + /** + * @brief Set the implemention of current Tensor. + * + * @param impl + */ + void set_impl(std::shared_ptr&& impl); + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) /** * @brief Get the stream where the tensor is currently located diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index e1ebe8c646..0c11e2df65 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -95,12 +95,8 @@ paddle::optional MakeMetaTensor( /* ------------------ for output ----------------------- */ phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) { - if (!out->initialized()) { - auto dense_tensor = std::make_shared( - phi::make_intrusive(phi::TransToPhiPlace(backend)), - phi::DenseTensorMeta()); - out->set_impl(dense_tensor); - return dense_tensor.get(); + if (out->impl() == nullptr) { + out->set_impl(std::make_shared()); } return static_cast(out->impl().get()); } @@ -111,9 +107,7 @@ std::vector SetKernelOutput(size_t out_size, out->reserve(out_size); std::vector results(out_size); for (size_t i = 0; i < out_size; ++i) { - auto tensor_ptr = std::make_shared( - phi::make_intrusive(phi::TransToPhiPlace(backend)), - phi::DenseTensorMeta()); + auto tensor_ptr = std::make_shared(); results[i] = tensor_ptr.get(); out->emplace_back(); out->back().set_impl(tensor_ptr); diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc index 79b8ac6d0b..e280ab626d 100644 --- a/paddle/phi/api/lib/data_transform.cc +++ b/paddle/phi/api/lib/data_transform.cc @@ -167,10 +167,7 @@ phi::DenseTensor TransformData(const phi::DenseTensor& tensor, if (NeedTransformPlace( out.place(), target_args_def.backend, transform_flag)) { - phi::DenseTensor result( - phi::make_intrusive( - phi::TransToPhiPlace(target_args_def.backend)), - {out.dtype(), out.dims(), out.layout()}); + phi::DenseTensor result; framework::TransDataDevice( out, phi::TransToPhiPlace(target_args_def.backend), &result); out = result; @@ -190,14 +187,14 @@ std::shared_ptr PrepareData( tensor_in->dtype(), target_args_def.dtype, transform_flag) && !NeedTransformLayout( tensor_in->layout(), target_args_def.layout, transform_flag))) { - return std::dynamic_pointer_cast(tensor_in); + return std::static_pointer_cast(tensor_in); } phi::DenseTensor out = TransformData(*(static_cast(tensor_in.get())), target_args_def, transform_flag); - return std::make_shared(out); + return std::make_shared(std::move(out)); } std::shared_ptr PrepareData( diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc index 40174a505d..6be85d7200 100644 --- a/paddle/phi/api/lib/tensor.cc +++ b/paddle/phi/api/lib/tensor.cc @@ -46,6 +46,7 @@ limitations under the License. */ * In the future, the necessary components will be moved to the this library, * or the corresponding components will be re-implemented. */ + #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/stream/cuda_stream.h" @@ -142,7 +143,12 @@ PlaceType Tensor::place() const { } paddle::platform::Place Tensor::inner_place() const { - return ConvertExtPlaceToInnerPlace(place()); + PADDLE_ENFORCE_NOT_NULL( + impl_, + phi::errors::PermissionDenied( + "Null pointer error, the impl_ of Tensor should not be " + "Null when calling Tensor::inner_place().")); + return impl_->place(); } bool Tensor::is_cpu() const { @@ -286,12 +292,16 @@ Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const { } } -std::shared_ptr Tensor::impl() const { return impl_; } +const std::shared_ptr &Tensor::impl() const { return impl_; } void Tensor::set_impl(const std::shared_ptr &impl) { impl_ = impl; } +void Tensor::set_impl(std::shared_ptr &&impl) { + impl_ = std::move(impl); +} + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) gpuStream_t Tensor::stream() const { return platform::stream::get_current_stream(-1)->raw_stream(); diff --git a/paddle/phi/core/kernel_factory.h b/paddle/phi/core/kernel_factory.h index be91409762..e502b9cb3e 100644 --- a/paddle/phi/core/kernel_factory.h +++ b/paddle/phi/core/kernel_factory.h @@ -197,8 +197,16 @@ class Kernel { const KernelArgsDef& args_def() const { return args_def_; } + const TensorArgDef& InputAt(size_t idx) const { + return args_def_.input_defs().at(idx); + } + TensorArgDef& InputAt(size_t idx) { return args_def_.input_defs().at(idx); } + const TensorArgDef& OutputAt(size_t idx) const { + return args_def_.output_defs().at(idx); + } + TensorArgDef& OutputAt(size_t idx) { return args_def_.output_defs().at(idx); } bool IsValid() { return fn_ != nullptr; } diff --git a/python/paddle/utils/code_gen/api_base.py b/python/paddle/utils/code_gen/api_base.py index d91b76bb70..bf3d7b3d19 100644 --- a/python/paddle/utils/code_gen/api_base.py +++ b/python/paddle/utils/code_gen/api_base.py @@ -698,7 +698,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self self.outputs['types'], 'SetKernelOutput', code_indent, inplace_flag) api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '') return f""" -{code_indent} auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( +{code_indent} const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( {code_indent} "{self.kernel['func'][0]}", {{kernel_backend, kernel_layout, kernel_data_type}}); {code_indent} VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]"; {code_indent} VLOG(6) << "{self.api} API kernel: " << kernel; -- GitLab