未验证 提交 7a72f7a2 编写于 作者: Z zhangyuqin1998 提交者: GitHub

move_elementwise_raw (#53010)

* setup

* Update elementwise_kernel.cc

* Update elementwise_kernel.cc

* fix

* fix

* Update elementwise_kernel.cu

* fix

* Update elementwise_kernel.cc

* Update elementwise_kernel.cc

* Update elementwise_kernel.cc

* Update elementwise_kernel.cc

* Update elementwise_kernel.cc

* Update elementwise_kernel.cc
上级 06ecc6d2
......@@ -111,6 +111,7 @@ file(
"gpu/*.cu.cc"
"gpudnn/*.cu"
"kps/*.cu"
"legacy/kps/*.cu"
"selected_rows/gpu/*.cu"
"sparse/gpu/*.cu"
"strings/gpu/*.cu"
......@@ -152,6 +153,8 @@ if(WITH_MKLDNN)
kernel_cc
"*.cc"
"cpu/*.cc"
"legacy/*.cc"
"legacy/cpu/*.cc"
"selected_rows/*.cc"
"selected_rows/cpu/*.cc"
"sparse/*.cc"
......@@ -168,6 +171,8 @@ else()
kernel_cc
"*.cc"
"cpu/*.cc"
"legacy/*.cc"
"legacy/cpu/*.cc"
"selected_rows/*.cc"
"selected_rows/cpu/*.cc"
"sparse/*.cc"
......@@ -178,7 +183,8 @@ else()
"fusion/cpu/*.cc")
endif()
file(GLOB kernel_xpu "xpu/*.cc" "selected_rows/xpu/*.cc" "fusion/xpu/*.cc")
file(GLOB kernel_xpu "xpu/*.cc" "legacy/xpu/*.cc" "selected_rows/xpu/*.cc"
"fusion/xpu/*.cc")
if(WITH_MKLDNN)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} get_kerneltype_forvar_utils)
......@@ -201,6 +207,8 @@ elseif(WITH_XPU)
if(WITH_XPU_KP)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/kps/
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps/)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/legacy/kps/
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/kps/)
file(GLOB kernel_xpu_kps "${CMAKE_CURRENT_BINARY_DIR}/kps/*.cu")
foreach(kernel ${kernel_xpu_kps})
get_filename_component(name ${kernel} NAME_WE)
......@@ -212,6 +220,8 @@ elseif(WITH_XPU)
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*.cc"
"cpu/*.cc"
"legacy/*.cc"
"legacy/cpu/*.cc"
"selected_rows/*.cc"
"selected_rows/cpu/*.cc"
"sparse/*.cc"
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/legacy/elementwise_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
......@@ -22,84 +23,48 @@
namespace phi {
template <typename T, typename Context>
void MaximumRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
dev_ctx.template Alloc<T>(out);
funcs::ElementwiseCompute<funcs::MaximumFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::MaximumFunctor<T>(), out);
void MaximumKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
MaximumRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void MinimumRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
dev_ctx.template Alloc<T>(out);
funcs::ElementwiseCompute<funcs::MinimumFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::MinimumFunctor<T>(), out);
void MinimumKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
MinimumRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void RemainderRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
dev_ctx.template Alloc<T>(out);
auto x_dims = x.dims();
auto y_dims = y.dims();
if (x_dims.size() >= y_dims.size()) {
funcs::ElementwiseCompute<funcs::RemainderFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::RemainderFunctor<T>(), out);
} else {
funcs::ElementwiseCompute<funcs::InverseRemainderFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::InverseRemainderFunctor<T>(), out);
}
void RemainderKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
RemainderRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void FloorDivideRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
dev_ctx.template Alloc<T>(out);
auto x_dims = x.dims();
auto y_dims = y.dims();
if (x_dims.size() >= y_dims.size()) {
funcs::ElementwiseCompute<funcs::FloorDivideFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::FloorDivideFunctor<T>(), out);
} else {
funcs::ElementwiseCompute<funcs::InverseFloorDivideFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::InverseFloorDivideFunctor<T>(), out);
}
void FloorDivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
FloorDivideRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void ElementwisePowRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
dev_ctx.template Alloc<T>(out);
auto x_dims = x.dims();
auto y_dims = y.dims();
if (x_dims.size() >= y_dims.size()) {
funcs::ElementwiseCompute<funcs::ElementwisePowFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::ElementwisePowFunctor<T>(), out);
} else {
funcs::ElementwiseCompute<funcs::ElementwiseInversePowFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::ElementwiseInversePowFunctor<T>(), out);
}
void ElementwisePowKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
ElementwisePowRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
......@@ -127,42 +92,38 @@ PD_REGISTER_KERNEL(
PD_REGISTER_KERNEL(
fmin, CPU, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {}
PD_REGISTER_KERNEL(maximum_raw,
PD_REGISTER_KERNEL(maximum,
CPU,
ALL_LAYOUT,
phi::MaximumRawKernel,
phi::MaximumKernel,
float,
double,
int,
int64_t,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(minimum_raw,
PD_REGISTER_KERNEL(minimum,
CPU,
ALL_LAYOUT,
phi::MinimumRawKernel,
phi::MinimumKernel,
float,
double,
int,
int64_t,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(remainder_raw,
PD_REGISTER_KERNEL(remainder,
CPU,
ALL_LAYOUT,
phi::RemainderRawKernel,
phi::RemainderKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_KERNEL(floor_divide_raw,
CPU,
ALL_LAYOUT,
phi::FloorDivideRawKernel,
int,
int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow_raw,
PD_REGISTER_KERNEL(
floor_divide, CPU, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow,
CPU,
ALL_LAYOUT,
phi::ElementwisePowRawKernel,
phi::ElementwisePowKernel,
float,
double,
int,
......
......@@ -23,51 +23,6 @@
namespace phi {
template <typename T, typename Context>
void MaximumKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
MaximumRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void MinimumKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
MinimumRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void RemainderKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
RemainderRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void FloorDivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
FloorDivideRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void ElementwisePowKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
ElementwisePowRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void DivideKernel(const Context& dev_ctx,
const DenseTensor& x,
......@@ -105,44 +60,6 @@ void SubtractKernel(const Context& dev_ctx,
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
PD_REGISTER_KERNEL(maximum,
CPU,
ALL_LAYOUT,
phi::MaximumKernel,
float,
double,
int,
int64_t,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(minimum,
CPU,
ALL_LAYOUT,
phi::MinimumKernel,
float,
double,
int,
int64_t,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(remainder,
CPU,
ALL_LAYOUT,
phi::RemainderKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_KERNEL(
floor_divide, CPU, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow,
CPU,
ALL_LAYOUT,
phi::ElementwisePowKernel,
float,
double,
int,
int64_t,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(subtract,
CPU,
ALL_LAYOUT,
......@@ -192,52 +109,6 @@ PD_REGISTER_KERNEL(divide,
complex64,
complex128) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL(maximum,
KPS,
ALL_LAYOUT,
phi::MaximumKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(minimum,
KPS,
ALL_LAYOUT,
phi::MinimumKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(remainder,
GPU,
ALL_LAYOUT,
phi::RemainderKernel,
float,
double,
int,
int64_t,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(
floor_divide, KPS, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow,
KPS,
ALL_LAYOUT,
phi::ElementwisePowKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
#endif
#if defined(PADDLE_WITH_XPU_KP) && defined(PADDLE_WITH_XPU)
PD_REGISTER_KERNEL(subtract, KPS, ALL_LAYOUT, phi::SubtractKernel, float) {}
PD_REGISTER_KERNEL(add, KPS, ALL_LAYOUT, phi::AddKernel, float) {}
......@@ -329,29 +200,3 @@ PD_REGISTER_KERNEL(subtract,
phi::dtype::float16,
int64_t) {}
#endif
#if defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP)
PD_REGISTER_KERNEL(floor_divide,
XPU,
ALL_LAYOUT,
phi::FloorDivideKernel,
float,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(
maximum, XPU, ALL_LAYOUT, phi::MaximumKernel, float, phi::dtype::float16) {}
PD_REGISTER_KERNEL(
minimum, XPU, ALL_LAYOUT, phi::MinimumKernel, float, phi::dtype::float16) {}
PD_REGISTER_KERNEL(remainder,
XPU,
ALL_LAYOUT,
phi::RemainderKernel,
float,
phi::dtype::float16,
int32_t,
int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow,
XPU,
ALL_LAYOUT,
phi::ElementwisePowKernel,
float,
phi::dtype::float16) {}
#endif
......@@ -31,65 +31,30 @@ void FMinKernel(const Context& dev_ctx,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
void MaximumRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void MaximumKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
void MinimumRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void MinimumKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
void RemainderRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void RemainderKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
void FloorDivideRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void FloorDivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
void ElementwisePowRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void ElementwisePowKernel(const Context& dev_ctx,
const DenseTensor& x,
......
......@@ -19,11 +19,10 @@
#endif
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
#include "paddle/phi/kernels/legacy/elementwise_kernel.h"
namespace phi {
// Create the definition of Maximum
DEFINE_CUDA_ELEMENTWISE_OP(Maximum)
template <typename T, typename Context>
void MaximumKernel(const Context& dev_ctx,
const DenseTensor& x,
......@@ -33,8 +32,6 @@ void MaximumKernel(const Context& dev_ctx,
MaximumRawKernel<T>(dev_ctx, x, y, axis, out);
}
// Create the definition of Minimum
DEFINE_CUDA_ELEMENTWISE_OP(Minimum)
template <typename T, typename Context>
void MinimumKernel(const Context& dev_ctx,
const DenseTensor& x,
......@@ -43,10 +40,16 @@ void MinimumKernel(const Context& dev_ctx,
int axis = -1;
MinimumRawKernel<T>(dev_ctx, x, y, axis, out);
}
// Create the definition of Remainder
DEFINE_CUDA_ELEMENTWISE_OP(Remainder)
// Create the definition of FloorDivide
DEFINE_CUDA_ELEMENTWISE_OP(FloorDivide)
template <typename T, typename Context>
void RemainderKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
RemainderRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void FloorDivideKernel(const Context& dev_ctx,
const DenseTensor& x,
......@@ -73,8 +76,6 @@ void HeavisideKernel(const Context& dev_ctx,
dev_ctx, inputs, &outputs, -1, funcs::ElementwiseHeavisideFunctor<T>());
}
// Create the definition of Pow
DEFINE_CUDA_ELEMENTWISE_OP(ElementwisePow)
template <typename T, typename Context>
void ElementwisePowKernel(const Context& dev_ctx,
const DenseTensor& x,
......@@ -86,101 +87,93 @@ void ElementwisePowKernel(const Context& dev_ctx,
} // namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL(maximum, KPS, ALL_LAYOUT, phi::MaximumKernel, float) {}
PD_REGISTER_KERNEL(maximum_raw, KPS, ALL_LAYOUT, phi::MaximumRawKernel, float) {
}
PD_REGISTER_KERNEL(minimum, KPS, ALL_LAYOUT, phi::MinimumKernel, float) {}
PD_REGISTER_KERNEL(minimum_raw, KPS, ALL_LAYOUT, phi::MinimumRawKernel, float) {
}
PD_REGISTER_KERNEL(floor_divide, KPS, ALL_LAYOUT, phi::FloorDivideKernel, int) {
}
PD_REGISTER_KERNEL(
floor_divide_raw, KPS, ALL_LAYOUT, phi::FloorDivideRawKernel, int) {}
PD_REGISTER_KERNEL(
elementwise_pow, KPS, ALL_LAYOUT, phi::ElementwisePowKernel, float) {}
PD_REGISTER_KERNEL(
elementwise_pow_raw, KPS, ALL_LAYOUT, phi::ElementwisePowRawKernel, float) {
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#else
using float16 = phi::dtype::float16;
using bfloat16 = phi::dtype::bfloat16;
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
PD_REGISTER_KERNEL(fmax,
PD_REGISTER_KERNEL(maximum,
KPS,
ALL_LAYOUT,
phi::FMaxKernel,
phi::MaximumKernel,
float,
double,
int,
float16,
int64_t) {}
PD_REGISTER_KERNEL(fmin,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(minimum,
KPS,
ALL_LAYOUT,
phi::FMinKernel,
phi::MinimumKernel,
float,
double,
int,
float16,
int64_t) {}
PD_REGISTER_KERNEL(maximum_raw,
KPS,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(remainder,
GPU,
ALL_LAYOUT,
phi::MaximumRawKernel,
phi::RemainderKernel,
float,
double,
int,
int64_t,
float16,
bfloat16) {}
PD_REGISTER_KERNEL(minimum_raw,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(
floor_divide, KPS, ALL_LAYOUT, phi::FloorDivideKernel, int, int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow,
KPS,
ALL_LAYOUT,
phi::MinimumRawKernel,
phi::ElementwisePowKernel,
float,
double,
int,
int64_t,
float16,
bfloat16) {}
PD_REGISTER_KERNEL(remainder_raw,
phi::dtype::float16,
phi::dtype::bfloat16) {}
#endif
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL(maximum, KPS, ALL_LAYOUT, phi::MaximumKernel, float) {}
PD_REGISTER_KERNEL(minimum, KPS, ALL_LAYOUT, phi::MinimumKernel, float) {}
PD_REGISTER_KERNEL(floor_divide, KPS, ALL_LAYOUT, phi::FloorDivideKernel, int) {
}
PD_REGISTER_KERNEL(
elementwise_pow, KPS, ALL_LAYOUT, phi::ElementwisePowKernel, float) {}
#else
using float16 = phi::dtype::float16;
using bfloat16 = phi::dtype::bfloat16;
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
PD_REGISTER_KERNEL(fmax,
KPS,
ALL_LAYOUT,
phi::RemainderRawKernel,
phi::FMaxKernel,
float,
double,
int,
float16,
int64_t) {}
PD_REGISTER_KERNEL(floor_divide_raw,
KPS,
ALL_LAYOUT,
phi::FloorDivideRawKernel,
int,
int64_t) {}
PD_REGISTER_KERNEL(heaviside,
PD_REGISTER_KERNEL(fmin,
KPS,
ALL_LAYOUT,
phi::HeavisideKernel,
phi::FMinKernel,
float,
double,
int,
float16,
int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow_raw,
PD_REGISTER_KERNEL(heaviside,
KPS,
ALL_LAYOUT,
phi::ElementwisePowRawKernel,
phi::HeavisideKernel,
float,
double,
int,
float16,
bfloat16,
int64_t) {}
#endif
......@@ -143,4 +143,5 @@ PD_REGISTER_KERNEL(elementwise_pow_raw,
float,
double,
int,
int64_t) {}
int64_t,
phi::dtype::bfloat16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/binary.h"
namespace phi {
template <typename T, typename Context>
void MaximumRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void MinimumRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void RemainderRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void FloorDivideRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void ElementwisePowRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
} // namespace phi
......@@ -172,5 +172,6 @@ PD_REGISTER_KERNEL(elementwise_pow_raw,
double,
int,
float16,
int64_t) {}
int64_t,
bfloat16) {}
#endif
......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/phi/kernels/elementwise_kernel.h"
#include "paddle/phi/kernels/legacy/elementwise_kernel.h"
#include "paddle/phi/kernels/xpu/elementwise.h"
#include "paddle/phi/backends/xpu/xpu_context.h"
......@@ -21,68 +22,37 @@
namespace phi {
template <typename T, typename Context>
void FloorDivideRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_floordiv<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
void FloorDivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
FloorDivideRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void MaximumRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_max<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
void MaximumKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
MaximumRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void MinimumRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_min<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
void MinimumKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
MinimumRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void RemainderRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
void RemainderKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
auto f = [](xpu::Context* ctx,
const XPUType* x,
......@@ -93,59 +63,41 @@ void RemainderRawKernel(const Context& dev_ctx,
return xpu::broadcast_mod<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
XPUElementwise<T, XPUType>(dev_ctx, x, y, -1, out, f);
}
template <typename T, typename Context>
void ElementwisePowRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
auto f = [](xpu::Context* ctx,
const XPUType* x,
const XPUType* y,
XPUType* z,
const std::vector<int>& xshape,
const std::vector<int>& yshape) {
return xpu::broadcast_pow<XPUType>(ctx, x, y, z, xshape, yshape);
};
XPUElementwise<T, XPUType>(dev_ctx, x, y, axis, out, f);
void ElementwisePowKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
ElementwisePowRawKernel<T>(dev_ctx, x, y, axis, out);
}
} // namespace phi
PD_REGISTER_KERNEL(floor_divide_raw,
XPU,
ALL_LAYOUT,
phi::FloorDivideRawKernel,
float,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(maximum_raw,
XPU,
ALL_LAYOUT,
phi::MaximumRawKernel,
float,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(minimum_raw,
PD_REGISTER_KERNEL(floor_divide,
XPU,
ALL_LAYOUT,
phi::MinimumRawKernel,
phi::FloorDivideKernel,
float,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(remainder_raw,
PD_REGISTER_KERNEL(
maximum, XPU, ALL_LAYOUT, phi::MaximumKernel, float, phi::dtype::float16) {}
PD_REGISTER_KERNEL(
minimum, XPU, ALL_LAYOUT, phi::MinimumKernel, float, phi::dtype::float16) {}
PD_REGISTER_KERNEL(remainder,
XPU,
ALL_LAYOUT,
phi::RemainderRawKernel,
phi::RemainderKernel,
float,
phi::dtype::float16,
int32_t,
int64_t) {}
PD_REGISTER_KERNEL(elementwise_pow_raw,
PD_REGISTER_KERNEL(elementwise_pow,
XPU,
ALL_LAYOUT,
phi::ElementwisePowRawKernel,
phi::ElementwisePowKernel,
float,
phi::dtype::float16) {}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册