diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index 9acc8cc8db97bf1a93f0cc214527f011d08aa6a5..de67958d5fe91eb0be0a4e5151b59599c7084162 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -111,6 +111,7 @@ file( "gpu/*.cu.cc" "gpudnn/*.cu" "kps/*.cu" + "legacy/kps/*.cu" "selected_rows/gpu/*.cu" "sparse/gpu/*.cu" "strings/gpu/*.cu" @@ -152,6 +153,8 @@ if(WITH_MKLDNN) kernel_cc "*.cc" "cpu/*.cc" + "legacy/*.cc" + "legacy/cpu/*.cc" "selected_rows/*.cc" "selected_rows/cpu/*.cc" "sparse/*.cc" @@ -168,6 +171,8 @@ else() kernel_cc "*.cc" "cpu/*.cc" + "legacy/*.cc" + "legacy/cpu/*.cc" "selected_rows/*.cc" "selected_rows/cpu/*.cc" "sparse/*.cc" @@ -178,7 +183,8 @@ else() "fusion/cpu/*.cc") endif() -file(GLOB kernel_xpu "xpu/*.cc" "selected_rows/xpu/*.cc" "fusion/xpu/*.cc") +file(GLOB kernel_xpu "xpu/*.cc" "legacy/xpu/*.cc" "selected_rows/xpu/*.cc" + "fusion/xpu/*.cc") if(WITH_MKLDNN) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} get_kerneltype_forvar_utils) @@ -201,6 +207,8 @@ elseif(WITH_XPU) if(WITH_XPU_KP) file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/ DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps/) + file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/legacy/kps/ + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps/) file(GLOB kernel_xpu_kps "${CMAKE_CURRENT_BINARY_DIR}/kps/*.cu") foreach(kernel ${kernel_xpu_kps}) get_filename_component(name ${kernel} NAME_WE) @@ -212,6 +220,8 @@ elseif(WITH_XPU) RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc" "cpu/*.cc" + "legacy/*.cc" + "legacy/cpu/*.cc" "selected_rows/*.cc" "selected_rows/cpu/*.cc" "sparse/*.cc" diff --git a/paddle/phi/kernels/cpu/elementwise_kernel.cc b/paddle/phi/kernels/cpu/elementwise_kernel.cc index 11aac8bbfe3ad37749d1098d81a977db6aaffd2e..9b564679b354e38b157d6b1924aeda4a55d2e6e4 100644 --- a/paddle/phi/kernels/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_kernel.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/phi/kernels/legacy/elementwise_kernel.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" @@ -22,84 +23,48 @@ namespace phi { template -void MaximumRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - dev_ctx.template Alloc(out); - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::MaximumFunctor(), out); +void MaximumKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + MaximumRawKernel(dev_ctx, x, y, axis, out); } template -void MinimumRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - dev_ctx.template Alloc(out); - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::MinimumFunctor(), out); +void MinimumKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + MinimumRawKernel(dev_ctx, x, y, axis, out); } template -void RemainderRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - dev_ctx.template Alloc(out); - auto x_dims = x.dims(); - auto y_dims = y.dims(); - if (x_dims.size() >= y_dims.size()) { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::RemainderFunctor(), out); - } else { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::InverseRemainderFunctor(), out); - } +void RemainderKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + RemainderRawKernel(dev_ctx, x, y, axis, out); } template -void FloorDivideRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - dev_ctx.template Alloc(out); - auto x_dims = x.dims(); - auto y_dims = y.dims(); - if (x_dims.size() >= y_dims.size()) { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::FloorDivideFunctor(), out); - } else { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::InverseFloorDivideFunctor(), out); - } +void FloorDivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + FloorDivideRawKernel(dev_ctx, x, y, axis, out); } template -void ElementwisePowRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - dev_ctx.template Alloc(out); - auto x_dims = x.dims(); - auto y_dims = y.dims(); - if (x_dims.size() >= y_dims.size()) { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::ElementwisePowFunctor(), out); - } else { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::ElementwiseInversePowFunctor(), out); - } +void ElementwisePowKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + ElementwisePowRawKernel(dev_ctx, x, y, axis, out); } template @@ -127,42 +92,38 @@ PD_REGISTER_KERNEL( PD_REGISTER_KERNEL( fmin, CPU, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {} -PD_REGISTER_KERNEL(maximum_raw, +PD_REGISTER_KERNEL(maximum, CPU, ALL_LAYOUT, - phi::MaximumRawKernel, + phi::MaximumKernel, float, double, int, int64_t, phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(minimum_raw, +PD_REGISTER_KERNEL(minimum, CPU, ALL_LAYOUT, - phi::MinimumRawKernel, + phi::MinimumKernel, float, double, int, int64_t, phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(remainder_raw, +PD_REGISTER_KERNEL(remainder, CPU, ALL_LAYOUT, - phi::RemainderRawKernel, + phi::RemainderKernel, float, double, int, int64_t) {} -PD_REGISTER_KERNEL(floor_divide_raw, - CPU, - ALL_LAYOUT, - phi::FloorDivideRawKernel, - int, - int64_t) {} -PD_REGISTER_KERNEL(elementwise_pow_raw, +PD_REGISTER_KERNEL( + floor_divide, CPU, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {} +PD_REGISTER_KERNEL(elementwise_pow, CPU, ALL_LAYOUT, - phi::ElementwisePowRawKernel, + phi::ElementwisePowKernel, float, double, int, diff --git a/paddle/phi/kernels/elementwise_kernel.cc b/paddle/phi/kernels/elementwise_kernel.cc index 98d76c2d944f3d4219c3493b3ed4c06405d58359..0a2a15abd3d75e90cb28622751c3f280cb0d1c15 100644 --- a/paddle/phi/kernels/elementwise_kernel.cc +++ b/paddle/phi/kernels/elementwise_kernel.cc @@ -23,51 +23,6 @@ namespace phi { -template -void MaximumKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - MaximumRawKernel(dev_ctx, x, y, axis, out); -} - -template -void MinimumKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - MinimumRawKernel(dev_ctx, x, y, axis, out); -} - -template -void RemainderKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - RemainderRawKernel(dev_ctx, x, y, axis, out); -} - -template -void FloorDivideKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - FloorDivideRawKernel(dev_ctx, x, y, axis, out); -} - -template -void ElementwisePowKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - ElementwisePowRawKernel(dev_ctx, x, y, axis, out); -} - template void DivideKernel(const Context& dev_ctx, const DenseTensor& x, @@ -105,44 +60,6 @@ void SubtractKernel(const Context& dev_ctx, using complex64 = ::phi::dtype::complex; using complex128 = ::phi::dtype::complex; -PD_REGISTER_KERNEL(maximum, - CPU, - ALL_LAYOUT, - phi::MaximumKernel, - float, - double, - int, - int64_t, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(minimum, - CPU, - ALL_LAYOUT, - phi::MinimumKernel, - float, - double, - int, - int64_t, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(remainder, - CPU, - ALL_LAYOUT, - phi::RemainderKernel, - float, - double, - int, - int64_t) {} -PD_REGISTER_KERNEL( - floor_divide, CPU, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {} -PD_REGISTER_KERNEL(elementwise_pow, - CPU, - ALL_LAYOUT, - phi::ElementwisePowKernel, - float, - double, - int, - int64_t, - phi::dtype::bfloat16) {} - PD_REGISTER_KERNEL(subtract, CPU, ALL_LAYOUT, @@ -192,52 +109,6 @@ PD_REGISTER_KERNEL(divide, complex64, complex128) {} -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - -PD_REGISTER_KERNEL(maximum, - KPS, - ALL_LAYOUT, - phi::MaximumKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(minimum, - KPS, - ALL_LAYOUT, - phi::MinimumKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(remainder, - GPU, - ALL_LAYOUT, - phi::RemainderKernel, - float, - double, - int, - int64_t, - phi::dtype::float16) {} -PD_REGISTER_KERNEL( - floor_divide, KPS, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {} -PD_REGISTER_KERNEL(elementwise_pow, - KPS, - ALL_LAYOUT, - phi::ElementwisePowKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16) {} - -#endif - #if defined(PADDLE_WITH_XPU_KP) && defined(PADDLE_WITH_XPU) PD_REGISTER_KERNEL(subtract, KPS, ALL_LAYOUT, phi::SubtractKernel, float) {} PD_REGISTER_KERNEL(add, KPS, ALL_LAYOUT, phi::AddKernel, float) {} @@ -329,29 +200,3 @@ PD_REGISTER_KERNEL(subtract, phi::dtype::float16, int64_t) {} #endif -#if defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP) -PD_REGISTER_KERNEL(floor_divide, - XPU, - ALL_LAYOUT, - phi::FloorDivideKernel, - float, - phi::dtype::float16) {} -PD_REGISTER_KERNEL( - maximum, XPU, ALL_LAYOUT, phi::MaximumKernel, float, phi::dtype::float16) {} -PD_REGISTER_KERNEL( - minimum, XPU, ALL_LAYOUT, phi::MinimumKernel, float, phi::dtype::float16) {} -PD_REGISTER_KERNEL(remainder, - XPU, - ALL_LAYOUT, - phi::RemainderKernel, - float, - phi::dtype::float16, - int32_t, - int64_t) {} -PD_REGISTER_KERNEL(elementwise_pow, - XPU, - ALL_LAYOUT, - phi::ElementwisePowKernel, - float, - phi::dtype::float16) {} -#endif diff --git a/paddle/phi/kernels/elementwise_kernel.h b/paddle/phi/kernels/elementwise_kernel.h index 3bc4163d59e71f2c61b0d684e660fdd12bbcf5fb..5c01639dd7cc2e359ade1311f625563f80b5e1d9 100644 --- a/paddle/phi/kernels/elementwise_kernel.h +++ b/paddle/phi/kernels/elementwise_kernel.h @@ -31,65 +31,30 @@ void FMinKernel(const Context& dev_ctx, const DenseTensor& y, DenseTensor* out); -template -void MaximumRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - template void MaximumKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, DenseTensor* out); -template -void MinimumRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - template void MinimumKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, DenseTensor* out); -template -void RemainderRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - template void RemainderKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, DenseTensor* out); -template -void FloorDivideRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - template void FloorDivideKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& y, DenseTensor* out); -template -void ElementwisePowRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - template void ElementwisePowKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/kps/elementwise_kernel.cu b/paddle/phi/kernels/kps/elementwise_kernel.cu index 245137943d5710141b04d679505ff319624147d1..80a969c4fabb4ee0fdb123303b7321906594af1a 100644 --- a/paddle/phi/kernels/kps/elementwise_kernel.cu +++ b/paddle/phi/kernels/kps/elementwise_kernel.cu @@ -19,11 +19,10 @@ #endif #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" +#include "paddle/phi/kernels/legacy/elementwise_kernel.h" namespace phi { -// Create the definition of Maximum -DEFINE_CUDA_ELEMENTWISE_OP(Maximum) template void MaximumKernel(const Context& dev_ctx, const DenseTensor& x, @@ -33,8 +32,6 @@ void MaximumKernel(const Context& dev_ctx, MaximumRawKernel(dev_ctx, x, y, axis, out); } -// Create the definition of Minimum -DEFINE_CUDA_ELEMENTWISE_OP(Minimum) template void MinimumKernel(const Context& dev_ctx, const DenseTensor& x, @@ -43,10 +40,16 @@ void MinimumKernel(const Context& dev_ctx, int axis = -1; MinimumRawKernel(dev_ctx, x, y, axis, out); } -// Create the definition of Remainder -DEFINE_CUDA_ELEMENTWISE_OP(Remainder) -// Create the definition of FloorDivide -DEFINE_CUDA_ELEMENTWISE_OP(FloorDivide) + +template +void RemainderKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + RemainderRawKernel(dev_ctx, x, y, axis, out); +} + template void FloorDivideKernel(const Context& dev_ctx, const DenseTensor& x, @@ -73,8 +76,6 @@ void HeavisideKernel(const Context& dev_ctx, dev_ctx, inputs, &outputs, -1, funcs::ElementwiseHeavisideFunctor()); } -// Create the definition of Pow -DEFINE_CUDA_ELEMENTWISE_OP(ElementwisePow) template void ElementwisePowKernel(const Context& dev_ctx, const DenseTensor& x, @@ -86,101 +87,93 @@ void ElementwisePowKernel(const Context& dev_ctx, } // namespace phi -#ifdef PADDLE_WITH_XPU_KP -PD_REGISTER_KERNEL(maximum, KPS, ALL_LAYOUT, phi::MaximumKernel, float) {} -PD_REGISTER_KERNEL(maximum_raw, KPS, ALL_LAYOUT, phi::MaximumRawKernel, float) { -} -PD_REGISTER_KERNEL(minimum, KPS, ALL_LAYOUT, phi::MinimumKernel, float) {} -PD_REGISTER_KERNEL(minimum_raw, KPS, ALL_LAYOUT, phi::MinimumRawKernel, float) { -} -PD_REGISTER_KERNEL(floor_divide, KPS, ALL_LAYOUT, phi::FloorDivideKernel, int) { -} -PD_REGISTER_KERNEL( - floor_divide_raw, KPS, ALL_LAYOUT, phi::FloorDivideRawKernel, int) {} -PD_REGISTER_KERNEL( - elementwise_pow, KPS, ALL_LAYOUT, phi::ElementwisePowKernel, float) {} -PD_REGISTER_KERNEL( - elementwise_pow_raw, KPS, ALL_LAYOUT, phi::ElementwisePowRawKernel, float) { -} +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -#else -using float16 = phi::dtype::float16; -using bfloat16 = phi::dtype::bfloat16; -using complex64 = ::phi::dtype::complex; -using complex128 = ::phi::dtype::complex; - -PD_REGISTER_KERNEL(fmax, +PD_REGISTER_KERNEL(maximum, KPS, ALL_LAYOUT, - phi::FMaxKernel, + phi::MaximumKernel, float, double, int, - float16, - int64_t) {} - -PD_REGISTER_KERNEL(fmin, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(minimum, KPS, ALL_LAYOUT, - phi::FMinKernel, + phi::MinimumKernel, float, double, int, - float16, - int64_t) {} - -PD_REGISTER_KERNEL(maximum_raw, - KPS, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(remainder, + GPU, ALL_LAYOUT, - phi::MaximumRawKernel, + phi::RemainderKernel, float, double, int, int64_t, - float16, - bfloat16) {} -PD_REGISTER_KERNEL(minimum_raw, + phi::dtype::float16) {} +PD_REGISTER_KERNEL( + floor_divide, KPS, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {} +PD_REGISTER_KERNEL(elementwise_pow, KPS, ALL_LAYOUT, - phi::MinimumRawKernel, + phi::ElementwisePowKernel, float, double, int, int64_t, - float16, - bfloat16) {} -PD_REGISTER_KERNEL(remainder_raw, + phi::dtype::float16, + phi::dtype::bfloat16) {} + +#endif + +#ifdef PADDLE_WITH_XPU_KP +PD_REGISTER_KERNEL(maximum, KPS, ALL_LAYOUT, phi::MaximumKernel, float) {} +PD_REGISTER_KERNEL(minimum, KPS, ALL_LAYOUT, phi::MinimumKernel, float) {} +PD_REGISTER_KERNEL(floor_divide, KPS, ALL_LAYOUT, phi::FloorDivideKernel, int) { +} +PD_REGISTER_KERNEL( + elementwise_pow, KPS, ALL_LAYOUT, phi::ElementwisePowKernel, float) {} + +#else +using float16 = phi::dtype::float16; +using bfloat16 = phi::dtype::bfloat16; +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +PD_REGISTER_KERNEL(fmax, KPS, ALL_LAYOUT, - phi::RemainderRawKernel, + phi::FMaxKernel, float, double, int, float16, int64_t) {} -PD_REGISTER_KERNEL(floor_divide_raw, - KPS, - ALL_LAYOUT, - phi::FloorDivideRawKernel, - int, - int64_t) {} -PD_REGISTER_KERNEL(heaviside, + +PD_REGISTER_KERNEL(fmin, KPS, ALL_LAYOUT, - phi::HeavisideKernel, + phi::FMinKernel, float, double, int, float16, int64_t) {} -PD_REGISTER_KERNEL(elementwise_pow_raw, + +PD_REGISTER_KERNEL(heaviside, KPS, ALL_LAYOUT, - phi::ElementwisePowRawKernel, + phi::HeavisideKernel, float, double, int, float16, - bfloat16, int64_t) {} #endif diff --git a/paddle/phi/kernels/legacy/cpu/elementwise_kernel.cc b/paddle/phi/kernels/legacy/cpu/elementwise_kernel.cc index 6d1f8701c3d3daac05a4413fda59fcb396689e6b..a976cb2a0093379f0a65b3d65a764d60afe3ee64 100644 --- a/paddle/phi/kernels/legacy/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/legacy/cpu/elementwise_kernel.cc @@ -143,4 +143,5 @@ PD_REGISTER_KERNEL(elementwise_pow_raw, float, double, int, - int64_t) {} + int64_t, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/legacy/elementwise_kernel.h b/paddle/phi/kernels/legacy/elementwise_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..b51704da7a6d61a955b3620b4c30a3f71217deeb --- /dev/null +++ b/paddle/phi/kernels/legacy/elementwise_kernel.h @@ -0,0 +1,56 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/infermeta/binary.h" + +namespace phi { + +template +void MaximumRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void MinimumRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void RemainderRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void FloorDivideRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void ElementwisePowRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); +} // namespace phi diff --git a/paddle/phi/kernels/legacy/kps/elementwise_kernel.cu b/paddle/phi/kernels/legacy/kps/elementwise_kernel.cu index ec856ffa53b094367b45f2c6a0c838aec992dee8..95cf5d4333e8d8f251a5c490be57311ca9a0f3ff 100644 --- a/paddle/phi/kernels/legacy/kps/elementwise_kernel.cu +++ b/paddle/phi/kernels/legacy/kps/elementwise_kernel.cu @@ -172,5 +172,6 @@ PD_REGISTER_KERNEL(elementwise_pow_raw, double, int, float16, - int64_t) {} + int64_t, + bfloat16) {} #endif diff --git a/paddle/phi/kernels/xpu/elementwise_kernel.cc b/paddle/phi/kernels/xpu/elementwise_kernel.cc index f70f9e743a41147d9ff91cd9ea351aeadeee75af..386ad2e13ff0edffb6174f6d7e5e6e7eacc7a791 100644 --- a/paddle/phi/kernels/xpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/xpu/elementwise_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/legacy/elementwise_kernel.h" #include "paddle/phi/kernels/xpu/elementwise.h" #include "paddle/phi/backends/xpu/xpu_context.h" @@ -21,68 +22,37 @@ namespace phi { template -void FloorDivideRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - using XPUType = typename XPUTypeTrait::Type; - auto f = [](xpu::Context* ctx, - const XPUType* x, - const XPUType* y, - XPUType* z, - const std::vector& xshape, - const std::vector& yshape) { - return xpu::broadcast_floordiv(ctx, x, y, z, xshape, yshape); - }; - - XPUElementwise(dev_ctx, x, y, axis, out, f); +void FloorDivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + FloorDivideRawKernel(dev_ctx, x, y, axis, out); } template -void MaximumRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - using XPUType = typename XPUTypeTrait::Type; - auto f = [](xpu::Context* ctx, - const XPUType* x, - const XPUType* y, - XPUType* z, - const std::vector& xshape, - const std::vector& yshape) { - return xpu::broadcast_max(ctx, x, y, z, xshape, yshape); - }; - - XPUElementwise(dev_ctx, x, y, axis, out, f); +void MaximumKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + MaximumRawKernel(dev_ctx, x, y, axis, out); } template -void MinimumRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - using XPUType = typename XPUTypeTrait::Type; - auto f = [](xpu::Context* ctx, - const XPUType* x, - const XPUType* y, - XPUType* z, - const std::vector& xshape, - const std::vector& yshape) { - return xpu::broadcast_min(ctx, x, y, z, xshape, yshape); - }; - - XPUElementwise(dev_ctx, x, y, axis, out, f); +void MinimumKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + MinimumRawKernel(dev_ctx, x, y, axis, out); } template -void RemainderRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { +void RemainderKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { using XPUType = typename XPUTypeTrait::Type; auto f = [](xpu::Context* ctx, const XPUType* x, @@ -93,59 +63,41 @@ void RemainderRawKernel(const Context& dev_ctx, return xpu::broadcast_mod(ctx, x, y, z, xshape, yshape); }; - XPUElementwise(dev_ctx, x, y, axis, out, f); + XPUElementwise(dev_ctx, x, y, -1, out, f); } template -void ElementwisePowRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - using XPUType = typename XPUTypeTrait::Type; - auto f = [](xpu::Context* ctx, - const XPUType* x, - const XPUType* y, - XPUType* z, - const std::vector& xshape, - const std::vector& yshape) { - return xpu::broadcast_pow(ctx, x, y, z, xshape, yshape); - }; - - XPUElementwise(dev_ctx, x, y, axis, out, f); +void ElementwisePowKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + ElementwisePowRawKernel(dev_ctx, x, y, axis, out); } } // namespace phi -PD_REGISTER_KERNEL(floor_divide_raw, - XPU, - ALL_LAYOUT, - phi::FloorDivideRawKernel, - float, - phi::dtype::float16) {} -PD_REGISTER_KERNEL(maximum_raw, - XPU, - ALL_LAYOUT, - phi::MaximumRawKernel, - float, - phi::dtype::float16) {} -PD_REGISTER_KERNEL(minimum_raw, +PD_REGISTER_KERNEL(floor_divide, XPU, ALL_LAYOUT, - phi::MinimumRawKernel, + phi::FloorDivideKernel, float, phi::dtype::float16) {} -PD_REGISTER_KERNEL(remainder_raw, +PD_REGISTER_KERNEL( + maximum, XPU, ALL_LAYOUT, phi::MaximumKernel, float, phi::dtype::float16) {} +PD_REGISTER_KERNEL( + minimum, XPU, ALL_LAYOUT, phi::MinimumKernel, float, phi::dtype::float16) {} +PD_REGISTER_KERNEL(remainder, XPU, ALL_LAYOUT, - phi::RemainderRawKernel, + phi::RemainderKernel, float, phi::dtype::float16, int32_t, int64_t) {} -PD_REGISTER_KERNEL(elementwise_pow_raw, +PD_REGISTER_KERNEL(elementwise_pow, XPU, ALL_LAYOUT, - phi::ElementwisePowRawKernel, + phi::ElementwisePowKernel, float, phi::dtype::float16) {}