From d2aaa7512b527d376b715eb097918dce336b0d87 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Tue, 22 Mar 2022 13:15:00 +0800 Subject: [PATCH] Optimize performance of C++ Api (part2) (#40729) * optimize performance of C++ API * remove stop_data_transform flag temparily --- paddle/phi/api/lib/backend_set.h | 2 +- paddle/phi/api/lib/data_transform.cc | 17 ++++++++--------- paddle/phi/api/lib/data_type_set.h | 2 +- paddle/phi/api/lib/kernel_dispatch.cc | 13 +++++++++++-- paddle/phi/api/lib/kernel_dispatch.h | 23 +++++++++++++---------- paddle/phi/core/compat/convert_utils.cc | 9 +++++---- 6 files changed, 39 insertions(+), 27 deletions(-) diff --git a/paddle/phi/api/lib/backend_set.h b/paddle/phi/api/lib/backend_set.h index 88f7b086715..2aa4f969221 100644 --- a/paddle/phi/api/lib/backend_set.h +++ b/paddle/phi/api/lib/backend_set.h @@ -35,7 +35,7 @@ class BackendSet final { : bitset_(b == Backend::UNDEFINED ? 0 : 1ULL << (static_cast(b) - 1)) {} - uint64_t bitset() const { return bitset_; } + inline uint64_t bitset() const { return bitset_; } bool inline Has(Backend b) const { PD_CHECK(b != Backend::UNDEFINED, "Backend argument can't be UNDEFINED."); diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc index 8bf5f3b481a..7d886e50dbc 100644 --- a/paddle/phi/api/lib/data_transform.cc +++ b/paddle/phi/api/lib/data_transform.cc @@ -39,7 +39,7 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input, const TransformFlag& transform_flag) { bool ret = transform_flag.need_trans_backend() && target != Backend::ALL_BACKEND && - !platform::is_same_place(input, phi::TransToPhiPlace(target)); + phi::TransToPhiBackend(input) != target; return ret; } @@ -180,21 +180,20 @@ std::shared_ptr PrepareData( const phi::TensorArgDef& target_args_def, const TransformFlag& transform_flag) { const auto& tensor_in = input.impl(); - VLOG(6) << tensor_in->dtype() << "\t" << target_args_def.dtype; - if (!transform_flag.NeedTransform() || !tensor_in->initialized() || + phi::DenseTensor& dense_tensor = + *static_cast(tensor_in.get()); + if (!transform_flag.NeedTransform() || !dense_tensor.initialized() || (!NeedTransformPlace( - tensor_in->place(), target_args_def.backend, transform_flag) && + dense_tensor.place(), target_args_def.backend, transform_flag) && !NeedTransformDataType( - tensor_in->dtype(), target_args_def.dtype, transform_flag) && + dense_tensor.dtype(), target_args_def.dtype, transform_flag) && !NeedTransformLayout( - tensor_in->layout(), target_args_def.layout, transform_flag))) { + dense_tensor.layout(), target_args_def.layout, transform_flag))) { return std::static_pointer_cast(tensor_in); } phi::DenseTensor out = - TransformData(*(static_cast(tensor_in.get())), - target_args_def, - transform_flag); + TransformData(dense_tensor, target_args_def, transform_flag); return std::make_shared(std::move(out)); } diff --git a/paddle/phi/api/lib/data_type_set.h b/paddle/phi/api/lib/data_type_set.h index ecc1b37c3a6..4b5e6bde247 100644 --- a/paddle/phi/api/lib/data_type_set.h +++ b/paddle/phi/api/lib/data_type_set.h @@ -30,7 +30,7 @@ class DataTypeSet final { ? 0 : 1ULL << (static_cast(dtype) - 1)) {} - uint64_t bitset() const { return bitset_; } + inline uint64_t bitset() const { return bitset_; } bool inline Has(DataType dtype) const { PD_CHECK(dtype != DataType::UNDEFINED, diff --git a/paddle/phi/api/lib/kernel_dispatch.cc b/paddle/phi/api/lib/kernel_dispatch.cc index 8fcb35550cc..c2f7a7981f0 100644 --- a/paddle/phi/api/lib/kernel_dispatch.cc +++ b/paddle/phi/api/lib/kernel_dispatch.cc @@ -16,13 +16,16 @@ limitations under the License. */ #include "paddle/phi/api/include/context_pool.h" #include "paddle/phi/core/compat/convert_utils.h" +#ifdef _MSC_VER +#include +#endif namespace paddle { namespace experimental { namespace detail { -BackendSet GetTensorBackendSet(const Tensor& t) { - BackendSet backend_set(phi::TransToPhiBackend(t.inner_place())); +BackendSet GetTensorBackendSet(const phi::TensorBase& t) { + BackendSet backend_set(phi::TransToPhiBackend(t.place())); switch (t.layout()) { case DataLayout::MKLDNN: backend_set = backend_set | BackendSet(Backend::MKLDNN); @@ -35,6 +38,11 @@ BackendSet GetTensorBackendSet(const Tensor& t) { } std::size_t CountLeadingZeros(uint64_t val) { +#if defined(__clang__) || defined(__GNUC__) + return __builtin_clzl(val); +#elif defined(_MSC_VER) + return __lzcnt64(val); +#else if (val == 0) { return 64; } @@ -48,6 +56,7 @@ std::size_t CountLeadingZeros(uint64_t val) { } } return zero_bits; +#endif } } // namespace detail diff --git a/paddle/phi/api/lib/kernel_dispatch.h b/paddle/phi/api/lib/kernel_dispatch.h index a2b5dcc4860..25b74e7fe31 100644 --- a/paddle/phi/api/lib/kernel_dispatch.h +++ b/paddle/phi/api/lib/kernel_dispatch.h @@ -33,7 +33,7 @@ namespace paddle { namespace experimental { namespace detail { -BackendSet GetTensorBackendSet(const Tensor& t); +BackendSet GetTensorBackendSet(const phi::TensorBase& t); std::size_t CountLeadingZeros(uint64_t val); } // namespace detail @@ -93,11 +93,13 @@ struct KernelKeyParser : ArgsIterator { // TODO(chenweihang): deal with multiple diff input Tensors // TODO(chenweihang): add global device guard method to set backend void operator()(const Tensor& x) { - key_set.backend_set = key_set.backend_set | detail::GetTensorBackendSet(x); - // TODO(chenweihang): selecte multi layout and dtype - key_set.layout = x.layout(); - key_set.dtype = x.type(); - dtype_set = dtype_set | DataTypeSet(x.dtype()); + const phi::TensorBase& tensor = *x.impl(); + key_set.backend_set = + key_set.backend_set | detail::GetTensorBackendSet(tensor); + // TODO(chenweihang): select multi layout and dtype + key_set.layout = tensor.layout(); + key_set.dtype = tensor.dtype(); + dtype_set = dtype_set | DataTypeSet(key_set.dtype); auto promote_result = PromoteTypes(dtype_set); if (promote_result != DataType::UNDEFINED) { key_set.dtype = promote_result; @@ -105,11 +107,12 @@ struct KernelKeyParser : ArgsIterator { } void operator()(const std::vector& x) { + const phi::TensorBase& tensor = *x.at(0).impl(); key_set.backend_set = - key_set.backend_set | detail::GetTensorBackendSet(x[0]); - // TODO(chenweihang): selecte multi layout and dtype - key_set.layout = x[0].layout(); - key_set.dtype = x[0].type(); + key_set.backend_set | detail::GetTensorBackendSet(tensor); + // TODO(chenweihang): select multi layout and dtype + key_set.layout = tensor.layout(); + key_set.dtype = tensor.dtype(); } // skip other type args, these args don't used in kernel selection diff --git a/paddle/phi/core/compat/convert_utils.cc b/paddle/phi/core/compat/convert_utils.cc index 67245f1da5a..667cee10675 100644 --- a/paddle/phi/core/compat/convert_utils.cc +++ b/paddle/phi/core/compat/convert_utils.cc @@ -26,13 +26,14 @@ limitations under the License. */ namespace phi { Backend TransToPhiBackend(const phi::Place& place) { - if (place.GetType() == phi::AllocationType::CPU) { + auto allocation_type = place.GetType(); + if (allocation_type == phi::AllocationType::CPU) { return Backend::CPU; - } else if (place.GetType() == phi::AllocationType::GPU) { + } else if (allocation_type == phi::AllocationType::GPU) { return Backend::GPU; - } else if (place.GetType() == phi::AllocationType::XPU) { + } else if (allocation_type == phi::AllocationType::XPU) { return Backend::XPU; - } else if (place.GetType() == phi::AllocationType::CUSTOM) { + } else if (allocation_type == phi::AllocationType::CUSTOM) { return static_cast( static_cast(Backend::NUM_BACKENDS) + GetOrRegisterGlobalDeviceTypeId(place.GetDeviceType())); -- GitLab