diff --git a/paddle/phi/api/lib/backend_set.h b/paddle/phi/api/lib/backend_set.h index 88f7b086715d6535be1d5b30e02497c89026fb6f..2aa4f969221d9c0f8936e7fc7c3a41cc04a63454 100644 --- a/paddle/phi/api/lib/backend_set.h +++ b/paddle/phi/api/lib/backend_set.h @@ -35,7 +35,7 @@ class BackendSet final { : bitset_(b == Backend::UNDEFINED ? 0 : 1ULL << (static_cast(b) - 1)) {} - uint64_t bitset() const { return bitset_; } + inline uint64_t bitset() const { return bitset_; } bool inline Has(Backend b) const { PD_CHECK(b != Backend::UNDEFINED, "Backend argument can't be UNDEFINED."); diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc index 8bf5f3b481a0e041b439ffd99a8ac017f4aae50e..7d886e50dbc23ef20557956ae1ae9503fbefbb45 100644 --- a/paddle/phi/api/lib/data_transform.cc +++ b/paddle/phi/api/lib/data_transform.cc @@ -39,7 +39,7 @@ inline bool NeedTransformPlace(const paddle::platform::Place& input, const TransformFlag& transform_flag) { bool ret = transform_flag.need_trans_backend() && target != Backend::ALL_BACKEND && - !platform::is_same_place(input, phi::TransToPhiPlace(target)); + phi::TransToPhiBackend(input) != target; return ret; } @@ -180,21 +180,20 @@ std::shared_ptr PrepareData( const phi::TensorArgDef& target_args_def, const TransformFlag& transform_flag) { const auto& tensor_in = input.impl(); - VLOG(6) << tensor_in->dtype() << "\t" << target_args_def.dtype; - if (!transform_flag.NeedTransform() || !tensor_in->initialized() || + phi::DenseTensor& dense_tensor = + *static_cast(tensor_in.get()); + if (!transform_flag.NeedTransform() || !dense_tensor.initialized() || (!NeedTransformPlace( - tensor_in->place(), target_args_def.backend, transform_flag) && + dense_tensor.place(), target_args_def.backend, transform_flag) && !NeedTransformDataType( - tensor_in->dtype(), target_args_def.dtype, transform_flag) && + dense_tensor.dtype(), target_args_def.dtype, transform_flag) && !NeedTransformLayout( - tensor_in->layout(), target_args_def.layout, transform_flag))) { + dense_tensor.layout(), target_args_def.layout, transform_flag))) { return std::static_pointer_cast(tensor_in); } phi::DenseTensor out = - TransformData(*(static_cast(tensor_in.get())), - target_args_def, - transform_flag); + TransformData(dense_tensor, target_args_def, transform_flag); return std::make_shared(std::move(out)); } diff --git a/paddle/phi/api/lib/data_type_set.h b/paddle/phi/api/lib/data_type_set.h index ecc1b37c3a6af0d55f1cf71e860fd87657ed759b..4b5e6bde247002ca96a964f8739ea06a2036d580 100644 --- a/paddle/phi/api/lib/data_type_set.h +++ b/paddle/phi/api/lib/data_type_set.h @@ -30,7 +30,7 @@ class DataTypeSet final { ? 0 : 1ULL << (static_cast(dtype) - 1)) {} - uint64_t bitset() const { return bitset_; } + inline uint64_t bitset() const { return bitset_; } bool inline Has(DataType dtype) const { PD_CHECK(dtype != DataType::UNDEFINED, diff --git a/paddle/phi/api/lib/kernel_dispatch.cc b/paddle/phi/api/lib/kernel_dispatch.cc index 8fcb35550cce6a142a316ae54fa53ad7b3445862..c2f7a7981f001119608c276a643a68199d0df0c4 100644 --- a/paddle/phi/api/lib/kernel_dispatch.cc +++ b/paddle/phi/api/lib/kernel_dispatch.cc @@ -16,13 +16,16 @@ limitations under the License. */ #include "paddle/phi/api/include/context_pool.h" #include "paddle/phi/core/compat/convert_utils.h" +#ifdef _MSC_VER +#include +#endif namespace paddle { namespace experimental { namespace detail { -BackendSet GetTensorBackendSet(const Tensor& t) { - BackendSet backend_set(phi::TransToPhiBackend(t.inner_place())); +BackendSet GetTensorBackendSet(const phi::TensorBase& t) { + BackendSet backend_set(phi::TransToPhiBackend(t.place())); switch (t.layout()) { case DataLayout::MKLDNN: backend_set = backend_set | BackendSet(Backend::MKLDNN); @@ -35,6 +38,11 @@ BackendSet GetTensorBackendSet(const Tensor& t) { } std::size_t CountLeadingZeros(uint64_t val) { +#if defined(__clang__) || defined(__GNUC__) + return __builtin_clzl(val); +#elif defined(_MSC_VER) + return __lzcnt64(val); +#else if (val == 0) { return 64; } @@ -48,6 +56,7 @@ std::size_t CountLeadingZeros(uint64_t val) { } } return zero_bits; +#endif } } // namespace detail diff --git a/paddle/phi/api/lib/kernel_dispatch.h b/paddle/phi/api/lib/kernel_dispatch.h index a2b5dcc4860dd2843c3f3ae892946e930b935665..25b74e7fe31b955a8b25529871c503179b302629 100644 --- a/paddle/phi/api/lib/kernel_dispatch.h +++ b/paddle/phi/api/lib/kernel_dispatch.h @@ -33,7 +33,7 @@ namespace paddle { namespace experimental { namespace detail { -BackendSet GetTensorBackendSet(const Tensor& t); +BackendSet GetTensorBackendSet(const phi::TensorBase& t); std::size_t CountLeadingZeros(uint64_t val); } // namespace detail @@ -93,11 +93,13 @@ struct KernelKeyParser : ArgsIterator { // TODO(chenweihang): deal with multiple diff input Tensors // TODO(chenweihang): add global device guard method to set backend void operator()(const Tensor& x) { - key_set.backend_set = key_set.backend_set | detail::GetTensorBackendSet(x); - // TODO(chenweihang): selecte multi layout and dtype - key_set.layout = x.layout(); - key_set.dtype = x.type(); - dtype_set = dtype_set | DataTypeSet(x.dtype()); + const phi::TensorBase& tensor = *x.impl(); + key_set.backend_set = + key_set.backend_set | detail::GetTensorBackendSet(tensor); + // TODO(chenweihang): select multi layout and dtype + key_set.layout = tensor.layout(); + key_set.dtype = tensor.dtype(); + dtype_set = dtype_set | DataTypeSet(key_set.dtype); auto promote_result = PromoteTypes(dtype_set); if (promote_result != DataType::UNDEFINED) { key_set.dtype = promote_result; @@ -105,11 +107,12 @@ struct KernelKeyParser : ArgsIterator { } void operator()(const std::vector& x) { + const phi::TensorBase& tensor = *x.at(0).impl(); key_set.backend_set = - key_set.backend_set | detail::GetTensorBackendSet(x[0]); - // TODO(chenweihang): selecte multi layout and dtype - key_set.layout = x[0].layout(); - key_set.dtype = x[0].type(); + key_set.backend_set | detail::GetTensorBackendSet(tensor); + // TODO(chenweihang): select multi layout and dtype + key_set.layout = tensor.layout(); + key_set.dtype = tensor.dtype(); } // skip other type args, these args don't used in kernel selection diff --git a/paddle/phi/core/compat/convert_utils.cc b/paddle/phi/core/compat/convert_utils.cc index 67245f1da5a6b65cdf81d2cd329d7d6b3828852e..667cee10675d8a6b756a6af701b65c63b3b359de 100644 --- a/paddle/phi/core/compat/convert_utils.cc +++ b/paddle/phi/core/compat/convert_utils.cc @@ -26,13 +26,14 @@ limitations under the License. */ namespace phi { Backend TransToPhiBackend(const phi::Place& place) { - if (place.GetType() == phi::AllocationType::CPU) { + auto allocation_type = place.GetType(); + if (allocation_type == phi::AllocationType::CPU) { return Backend::CPU; - } else if (place.GetType() == phi::AllocationType::GPU) { + } else if (allocation_type == phi::AllocationType::GPU) { return Backend::GPU; - } else if (place.GetType() == phi::AllocationType::XPU) { + } else if (allocation_type == phi::AllocationType::XPU) { return Backend::XPU; - } else if (place.GetType() == phi::AllocationType::CUSTOM) { + } else if (allocation_type == phi::AllocationType::CUSTOM) { return static_cast( static_cast(Backend::NUM_BACKENDS) + GetOrRegisterGlobalDeviceTypeId(place.GetDeviceType()));