未验证 提交 add304ed 编写于 作者: Z zyfncg 提交者: GitHub

Optimize the performance of C++ API (#40640)

* Optimize performance

* optimiaze c++ api performance

* remove unsed code

* fix paddle throw

* updata format
上级 c1931beb
...@@ -324,7 +324,7 @@ class PADDLE_API Tensor final { ...@@ -324,7 +324,7 @@ class PADDLE_API Tensor final {
* *
* @return std::shared_ptr<phi::TensorBase> * @return std::shared_ptr<phi::TensorBase>
*/ */
std::shared_ptr<phi::TensorBase> impl() const; const std::shared_ptr<phi::TensorBase>& impl() const;
/** /**
* @brief Set the implemention of current Tensor. * @brief Set the implemention of current Tensor.
...@@ -333,6 +333,13 @@ class PADDLE_API Tensor final { ...@@ -333,6 +333,13 @@ class PADDLE_API Tensor final {
*/ */
void set_impl(const std::shared_ptr<phi::TensorBase>& impl); void set_impl(const std::shared_ptr<phi::TensorBase>& impl);
/**
* @brief Set the implemention of current Tensor.
*
* @param impl
*/
void set_impl(std::shared_ptr<phi::TensorBase>&& impl);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
/** /**
* @brief Get the stream where the tensor is currently located * @brief Get the stream where the tensor is currently located
......
...@@ -95,12 +95,8 @@ paddle::optional<phi::MetaTensor> MakeMetaTensor( ...@@ -95,12 +95,8 @@ paddle::optional<phi::MetaTensor> MakeMetaTensor(
/* ------------------ for output ----------------------- */ /* ------------------ for output ----------------------- */
phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) { phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) {
if (!out->initialized()) { if (out->impl() == nullptr) {
auto dense_tensor = std::make_shared<phi::DenseTensor>( out->set_impl(std::make_shared<phi::DenseTensor>());
phi::make_intrusive<SharedStorage>(phi::TransToPhiPlace(backend)),
phi::DenseTensorMeta());
out->set_impl(dense_tensor);
return dense_tensor.get();
} }
return static_cast<phi::DenseTensor*>(out->impl().get()); return static_cast<phi::DenseTensor*>(out->impl().get());
} }
...@@ -111,9 +107,7 @@ std::vector<phi::DenseTensor*> SetKernelOutput(size_t out_size, ...@@ -111,9 +107,7 @@ std::vector<phi::DenseTensor*> SetKernelOutput(size_t out_size,
out->reserve(out_size); out->reserve(out_size);
std::vector<phi::DenseTensor*> results(out_size); std::vector<phi::DenseTensor*> results(out_size);
for (size_t i = 0; i < out_size; ++i) { for (size_t i = 0; i < out_size; ++i) {
auto tensor_ptr = std::make_shared<phi::DenseTensor>( auto tensor_ptr = std::make_shared<phi::DenseTensor>();
phi::make_intrusive<SharedStorage>(phi::TransToPhiPlace(backend)),
phi::DenseTensorMeta());
results[i] = tensor_ptr.get(); results[i] = tensor_ptr.get();
out->emplace_back(); out->emplace_back();
out->back().set_impl(tensor_ptr); out->back().set_impl(tensor_ptr);
......
...@@ -167,10 +167,7 @@ phi::DenseTensor TransformData(const phi::DenseTensor& tensor, ...@@ -167,10 +167,7 @@ phi::DenseTensor TransformData(const phi::DenseTensor& tensor,
if (NeedTransformPlace( if (NeedTransformPlace(
out.place(), target_args_def.backend, transform_flag)) { out.place(), target_args_def.backend, transform_flag)) {
phi::DenseTensor result( phi::DenseTensor result;
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(target_args_def.backend)),
{out.dtype(), out.dims(), out.layout()});
framework::TransDataDevice( framework::TransDataDevice(
out, phi::TransToPhiPlace(target_args_def.backend), &result); out, phi::TransToPhiPlace(target_args_def.backend), &result);
out = result; out = result;
...@@ -190,14 +187,14 @@ std::shared_ptr<phi::DenseTensor> PrepareData( ...@@ -190,14 +187,14 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
tensor_in->dtype(), target_args_def.dtype, transform_flag) && tensor_in->dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout( !NeedTransformLayout(
tensor_in->layout(), target_args_def.layout, transform_flag))) { tensor_in->layout(), target_args_def.layout, transform_flag))) {
return std::dynamic_pointer_cast<phi::DenseTensor>(tensor_in); return std::static_pointer_cast<phi::DenseTensor>(tensor_in);
} }
phi::DenseTensor out = phi::DenseTensor out =
TransformData(*(static_cast<phi::DenseTensor*>(tensor_in.get())), TransformData(*(static_cast<phi::DenseTensor*>(tensor_in.get())),
target_args_def, target_args_def,
transform_flag); transform_flag);
return std::make_shared<phi::DenseTensor>(out); return std::make_shared<phi::DenseTensor>(std::move(out));
} }
std::shared_ptr<phi::DenseTensor> PrepareData( std::shared_ptr<phi::DenseTensor> PrepareData(
......
...@@ -46,6 +46,7 @@ limitations under the License. */ ...@@ -46,6 +46,7 @@ limitations under the License. */
* In the future, the necessary components will be moved to the this library, * In the future, the necessary components will be moved to the this library,
* or the corresponding components will be re-implemented. * or the corresponding components will be re-implemented.
*/ */
#include "paddle/fluid/memory/memory.h" #include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/stream/cuda_stream.h" #include "paddle/fluid/platform/stream/cuda_stream.h"
...@@ -142,7 +143,12 @@ PlaceType Tensor::place() const { ...@@ -142,7 +143,12 @@ PlaceType Tensor::place() const {
} }
paddle::platform::Place Tensor::inner_place() const { paddle::platform::Place Tensor::inner_place() const {
return ConvertExtPlaceToInnerPlace(place()); PADDLE_ENFORCE_NOT_NULL(
impl_,
phi::errors::PermissionDenied(
"Null pointer error, the impl_ of Tensor should not be "
"Null when calling Tensor::inner_place()."));
return impl_->place();
} }
bool Tensor::is_cpu() const { bool Tensor::is_cpu() const {
...@@ -286,12 +292,16 @@ Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const { ...@@ -286,12 +292,16 @@ Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const {
} }
} }
std::shared_ptr<phi::TensorBase> Tensor::impl() const { return impl_; } const std::shared_ptr<phi::TensorBase> &Tensor::impl() const { return impl_; }
void Tensor::set_impl(const std::shared_ptr<phi::TensorBase> &impl) { void Tensor::set_impl(const std::shared_ptr<phi::TensorBase> &impl) {
impl_ = impl; impl_ = impl;
} }
void Tensor::set_impl(std::shared_ptr<phi::TensorBase> &&impl) {
impl_ = std::move(impl);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
gpuStream_t Tensor::stream() const { gpuStream_t Tensor::stream() const {
return platform::stream::get_current_stream(-1)->raw_stream(); return platform::stream::get_current_stream(-1)->raw_stream();
......
...@@ -197,8 +197,16 @@ class Kernel { ...@@ -197,8 +197,16 @@ class Kernel {
const KernelArgsDef& args_def() const { return args_def_; } const KernelArgsDef& args_def() const { return args_def_; }
const TensorArgDef& InputAt(size_t idx) const {
return args_def_.input_defs().at(idx);
}
TensorArgDef& InputAt(size_t idx) { return args_def_.input_defs().at(idx); } TensorArgDef& InputAt(size_t idx) { return args_def_.input_defs().at(idx); }
const TensorArgDef& OutputAt(size_t idx) const {
return args_def_.output_defs().at(idx);
}
TensorArgDef& OutputAt(size_t idx) { return args_def_.output_defs().at(idx); } TensorArgDef& OutputAt(size_t idx) { return args_def_.output_defs().at(idx); }
bool IsValid() { return fn_ != nullptr; } bool IsValid() { return fn_ != nullptr; }
......
...@@ -698,7 +698,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self ...@@ -698,7 +698,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self
self.outputs['types'], 'SetKernelOutput', code_indent, inplace_flag) self.outputs['types'], 'SetKernelOutput', code_indent, inplace_flag)
api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '') api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '')
return f""" return f"""
{code_indent} auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( {code_indent} const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{code_indent} "{self.kernel['func'][0]}", {{kernel_backend, kernel_layout, kernel_data_type}}); {code_indent} "{self.kernel['func'][0]}", {{kernel_backend, kernel_layout, kernel_data_type}});
{code_indent} VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]"; {code_indent} VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
{code_indent} VLOG(6) << "{self.api} API kernel: " << kernel; {code_indent} VLOG(6) << "{self.api} API kernel: " << kernel;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册