未验证 提交 add304ed 编写于 作者: Z zyfncg 提交者: GitHub

Optimize the performance of C++ API (#40640)

* Optimize performance

* optimiaze c++ api performance

* remove unsed code

* fix paddle throw

* updata format
上级 c1931beb
......@@ -324,7 +324,7 @@ class PADDLE_API Tensor final {
*
* @return std::shared_ptr<phi::TensorBase>
*/
std::shared_ptr<phi::TensorBase> impl() const;
const std::shared_ptr<phi::TensorBase>& impl() const;
/**
* @brief Set the implemention of current Tensor.
......@@ -333,6 +333,13 @@ class PADDLE_API Tensor final {
*/
void set_impl(const std::shared_ptr<phi::TensorBase>& impl);
/**
* @brief Set the implemention of current Tensor.
*
* @param impl
*/
void set_impl(std::shared_ptr<phi::TensorBase>&& impl);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
/**
* @brief Get the stream where the tensor is currently located
......
......@@ -95,12 +95,8 @@ paddle::optional<phi::MetaTensor> MakeMetaTensor(
/* ------------------ for output ----------------------- */
phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) {
if (!out->initialized()) {
auto dense_tensor = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<SharedStorage>(phi::TransToPhiPlace(backend)),
phi::DenseTensorMeta());
out->set_impl(dense_tensor);
return dense_tensor.get();
if (out->impl() == nullptr) {
out->set_impl(std::make_shared<phi::DenseTensor>());
}
return static_cast<phi::DenseTensor*>(out->impl().get());
}
......@@ -111,9 +107,7 @@ std::vector<phi::DenseTensor*> SetKernelOutput(size_t out_size,
out->reserve(out_size);
std::vector<phi::DenseTensor*> results(out_size);
for (size_t i = 0; i < out_size; ++i) {
auto tensor_ptr = std::make_shared<phi::DenseTensor>(
phi::make_intrusive<SharedStorage>(phi::TransToPhiPlace(backend)),
phi::DenseTensorMeta());
auto tensor_ptr = std::make_shared<phi::DenseTensor>();
results[i] = tensor_ptr.get();
out->emplace_back();
out->back().set_impl(tensor_ptr);
......
......@@ -167,10 +167,7 @@ phi::DenseTensor TransformData(const phi::DenseTensor& tensor,
if (NeedTransformPlace(
out.place(), target_args_def.backend, transform_flag)) {
phi::DenseTensor result(
phi::make_intrusive<paddle::experimental::SharedStorage>(
phi::TransToPhiPlace(target_args_def.backend)),
{out.dtype(), out.dims(), out.layout()});
phi::DenseTensor result;
framework::TransDataDevice(
out, phi::TransToPhiPlace(target_args_def.backend), &result);
out = result;
......@@ -190,14 +187,14 @@ std::shared_ptr<phi::DenseTensor> PrepareData(
tensor_in->dtype(), target_args_def.dtype, transform_flag) &&
!NeedTransformLayout(
tensor_in->layout(), target_args_def.layout, transform_flag))) {
return std::dynamic_pointer_cast<phi::DenseTensor>(tensor_in);
return std::static_pointer_cast<phi::DenseTensor>(tensor_in);
}
phi::DenseTensor out =
TransformData(*(static_cast<phi::DenseTensor*>(tensor_in.get())),
target_args_def,
transform_flag);
return std::make_shared<phi::DenseTensor>(out);
return std::make_shared<phi::DenseTensor>(std::move(out));
}
std::shared_ptr<phi::DenseTensor> PrepareData(
......
......@@ -46,6 +46,7 @@ limitations under the License. */
* In the future, the necessary components will be moved to the this library,
* or the corresponding components will be re-implemented.
*/
#include "paddle/fluid/memory/memory.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/stream/cuda_stream.h"
......@@ -142,7 +143,12 @@ PlaceType Tensor::place() const {
}
paddle::platform::Place Tensor::inner_place() const {
return ConvertExtPlaceToInnerPlace(place());
PADDLE_ENFORCE_NOT_NULL(
impl_,
phi::errors::PermissionDenied(
"Null pointer error, the impl_ of Tensor should not be "
"Null when calling Tensor::inner_place()."));
return impl_->place();
}
bool Tensor::is_cpu() const {
......@@ -286,12 +292,16 @@ Tensor Tensor::slice(int64_t begin_idx, int64_t end_idx) const {
}
}
std::shared_ptr<phi::TensorBase> Tensor::impl() const { return impl_; }
const std::shared_ptr<phi::TensorBase> &Tensor::impl() const { return impl_; }
void Tensor::set_impl(const std::shared_ptr<phi::TensorBase> &impl) {
impl_ = impl;
}
void Tensor::set_impl(std::shared_ptr<phi::TensorBase> &&impl) {
impl_ = std::move(impl);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
gpuStream_t Tensor::stream() const {
return platform::stream::get_current_stream(-1)->raw_stream();
......
......@@ -197,8 +197,16 @@ class Kernel {
const KernelArgsDef& args_def() const { return args_def_; }
const TensorArgDef& InputAt(size_t idx) const {
return args_def_.input_defs().at(idx);
}
TensorArgDef& InputAt(size_t idx) { return args_def_.input_defs().at(idx); }
const TensorArgDef& OutputAt(size_t idx) const {
return args_def_.output_defs().at(idx);
}
TensorArgDef& OutputAt(size_t idx) { return args_def_.output_defs().at(idx); }
bool IsValid() { return fn_ != nullptr; }
......
......@@ -698,7 +698,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self
self.outputs['types'], 'SetKernelOutput', code_indent, inplace_flag)
api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '')
return f"""
{code_indent} auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{code_indent} const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError(
{code_indent} "{self.kernel['func'][0]}", {{kernel_backend, kernel_layout, kernel_data_type}});
{code_indent} VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]";
{code_indent} VLOG(6) << "{self.api} API kernel: " << kernel;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册