未验证 提交 2cb19d8f 编写于 作者: Y YuanRisheng 提交者: GitHub

[Phi]Separate AddKernel/DivideKernel/SubtractKernel/MultiplyKernel from...

[Phi]Separate AddKernel/DivideKernel/SubtractKernel/MultiplyKernel from ElementwiseKernel(Part1) (#41806)

* seperate add/div/sub/mul from elementwise

* delete code

* fix compile bugs

* deal with conflict

* fix bugs when compile

* fix windows unit test bug

* fix ci converage bugs
上级 59e8382d
...@@ -42,7 +42,7 @@ using namespace egr_utils_api; // NOLINT ...@@ -42,7 +42,7 @@ using namespace egr_utils_api; // NOLINT
PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT);
PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sum, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT);
......
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT);
PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sum, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT);
......
...@@ -36,7 +36,7 @@ PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); ...@@ -36,7 +36,7 @@ PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT);
#endif #endif
namespace egr { namespace egr {
......
...@@ -69,14 +69,17 @@ PD_DECLARE_KERNEL(split, GPU, ALL_LAYOUT); ...@@ -69,14 +69,17 @@ PD_DECLARE_KERNEL(split, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(concat, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(concat, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(concat_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(concat_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add_raw, KPS, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT);
PD_DECLARE_KERNEL(multiply, KPS, ALL_LAYOUT);
PD_DECLARE_KERNEL(multiply_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(divide, KPS, ALL_LAYOUT);
PD_DECLARE_KERNEL(maximum, GPU, ALL_LAYOUT);
#ifdef PADDLE_WITH_XPU_KP #ifdef PADDLE_WITH_XPU_KP
PD_DECLARE_KERNEL(add_raw, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(max_raw, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(max_raw, GPU, ALL_LAYOUT);
#else #else
PD_DECLARE_KERNEL(add_raw, KPS, ALL_LAYOUT);
PD_DECLARE_KERNEL(max_raw, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(max_raw, KPS, ALL_LAYOUT);
#endif #endif
PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(mean_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(mean_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(sigmoid, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sigmoid, GPU, ALL_LAYOUT);
......
...@@ -38,6 +38,7 @@ PD_DECLARE_KERNEL(matmul_with_flatten, CPU, ALL_LAYOUT); ...@@ -38,6 +38,7 @@ PD_DECLARE_KERNEL(matmul_with_flatten, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul_with_flatten_grad, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_with_flatten_grad, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT);
PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul_with_flatten, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_with_flatten, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(matmul_with_flatten_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_with_flatten_grad, GPU, ALL_LAYOUT);
......
...@@ -30,7 +30,7 @@ USE_OP_ITSELF(elementwise_add); ...@@ -30,7 +30,7 @@ USE_OP_ITSELF(elementwise_add);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT);
#endif #endif
namespace paddle::operators { namespace paddle::operators {
......
...@@ -36,7 +36,7 @@ DECLARE_bool(enable_pe_launch_cinn); ...@@ -36,7 +36,7 @@ DECLARE_bool(enable_pe_launch_cinn);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT);
#endif #endif
namespace paddle::operators { namespace paddle::operators {
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/for_range.h" #include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/complex_kernel.h" #include "paddle/phi/kernels/complex_kernel.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h"
#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/common_shape.h"
#include "paddle/phi/kernels/funcs/diag_functor.h" #include "paddle/phi/kernels/funcs/diag_functor.h"
......
...@@ -21,7 +21,9 @@ ...@@ -21,7 +21,9 @@
#include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/operators/transpose_op.h"
#include "paddle/fluid/platform/for_range.h" #include "paddle/fluid/platform/for_range.h"
#include "paddle/phi/kernels/complex_kernel.h" #include "paddle/phi/kernels/complex_kernel.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_divide_kernel.h"
#include "paddle/phi/kernels/elementwise_multiply_kernel.h"
#include "paddle/phi/kernels/elementwise_subtract_kernel.h"
#include "paddle/phi/kernels/funcs/complex_functors.h" #include "paddle/phi/kernels/funcs/complex_functors.h"
#include "paddle/phi/kernels/funcs/diag_functor.h" #include "paddle/phi/kernels/funcs/diag_functor.h"
#include "paddle/phi/kernels/funcs/lapack/lapack_function.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h"
......
...@@ -26,8 +26,8 @@ limitations under the License. */ ...@@ -26,8 +26,8 @@ limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h"
// only can include the headers in paddle/phi/include dirs // only can include the headers in paddle/phi/include dirs
#include "paddle/phi/kernels/elementwise_grad_kernel.h" #include "paddle/phi/kernels/elementwise_add_grad_kernel.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_add_kernel.h"
#endif #endif
namespace paddle { namespace paddle {
......
...@@ -26,6 +26,9 @@ ...@@ -26,6 +26,9 @@
USE_OP_ITSELF(elementwise_add); USE_OP_ITSELF(elementwise_add);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT);
#endif
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -33,6 +33,7 @@ USE_OP_ITSELF(elementwise_add); ...@@ -33,6 +33,7 @@ USE_OP_ITSELF(elementwise_add);
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT);
#endif #endif
// get paddle matmul op results as baseline // get paddle matmul op results as baseline
......
...@@ -18,7 +18,8 @@ limitations under the License. */ ...@@ -18,7 +18,8 @@ limitations under the License. */
#include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/framework/phi_utils.h"
#include "paddle/fluid/operators/set_value_op.h" #include "paddle/fluid/operators/set_value_op.h"
#include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/operators/svd_helper.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_add_kernel.h"
#include "paddle/phi/kernels/elementwise_subtract_kernel.h"
#include "paddle/phi/kernels/funcs/lapack/lapack_function.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h"
#include "paddle/phi/kernels/funcs/tril_triu_compute.h" #include "paddle/phi/kernels/funcs/tril_triu_compute.h"
#include "paddle/phi/kernels/triangular_solve_kernel.h" #include "paddle/phi/kernels/triangular_solve_kernel.h"
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/elementwise_add_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cpu/elementwise_grad.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h"
namespace phi {
template <typename T>
void AddGradFunc(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dout,
DenseTensor* dx,
DenseTensor* dy,
int axis = -1) {
if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) {
ElementwiseAddGrad<T>(dev_ctx, x, y, out, dout, dx, dy);
} else {
ElemwiseExplicitGradCompute<T, IdentityGrad<T>, IdentityGrad<T>>(
dev_ctx,
x,
y,
out,
dout,
axis,
dx,
dy,
IdentityGrad<T>(),
IdentityGrad<T>());
}
}
template <typename T, typename Context>
void AddGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
phi::AddGradImpl<T>(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc<T>);
}
template <typename T, typename Context>
void AddDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
int axis,
DenseTensor* ddout) {
phi::AddDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
}
template <typename T, typename Context>
void AddTripleGradKernel(const Context& dev_ctx,
const DenseTensor& ddx,
const DenseTensor& ddy,
const DenseTensor& d_ddout,
int axis,
DenseTensor* d_ddx,
DenseTensor* d_ddy) {
phi::AddGradImpl<T>(
dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc<T>);
}
} // namespace phi
PD_REGISTER_KERNEL(add_grad,
CPU,
ALL_LAYOUT,
phi::AddGradKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(add_double_grad,
CPU,
ALL_LAYOUT,
phi::AddDoubleGradKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(add_triple_grad,
CPU,
ALL_LAYOUT,
phi::AddTripleGradKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/cpu/elementwise.h"
#include "paddle/phi/api/ext/dispatch.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
namespace phi {
// Create the definition of Add
DEFINE_CPU_ELEMENTWISE_OP(Add)
template <typename T, typename Context>
void AddKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
AddRawKernel<T>(dev_ctx, x, y, axis, out);
}
} // namespace phi
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16
// using bfloat16 = ::phi::dtype::bfloat16;
PD_REGISTER_KERNEL(add_raw,
CPU,
ALL_LAYOUT,
phi::AddRawKernel,
float,
double,
int16_t,
int,
int64_t,
complex64,
complex128) {}
PD_REGISTER_KERNEL(add,
CPU,
ALL_LAYOUT,
phi::AddKernel,
float,
double,
int16_t,
int,
int64_t,
complex64,
complex128) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/elementwise_divide_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/cpu/elementwise_grad.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void DivideGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
funcs::ElementwiseGradPreProcess(dout, dx);
phi::funcs::ElemwiseGradCompute<Context, T, DivGradDX<T>, DivGradDY<T>>(
dev_ctx, x, y, out, dout, axis, dx, dy, DivGradDX<T>(), DivGradDY<T>());
}
} // namespace phi
PD_REGISTER_KERNEL(divide_grad,
CPU,
ALL_LAYOUT,
phi::DivideGradKernel,
float,
double,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(divide_double_grad,
CPU,
ALL_LAYOUT,
phi::DivideDoubleGradKernel,
float,
double,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/cpu/elementwise.h"
#include "paddle/phi/api/ext/dispatch.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void DivideRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
dev_ctx.template Alloc<T>(out);
if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
SameDimsElementwiseCompute<SameDimsDivideFunctor<CPUContext, T>>()(
dev_ctx, x, y, out);
} else {
auto x_dims = x.dims();
auto y_dims = y.dims();
if (x_dims.size() >= y_dims.size()) {
funcs::ElementwiseCompute<funcs::DivideFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::DivideFunctor<T>(), out);
} else {
funcs::ElementwiseCompute<funcs::InverseDivideFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::InverseDivideFunctor<T>(), out);
}
}
}
template <typename T, typename Context>
void DivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
DivideRawKernel<T>(dev_ctx, x, y, axis, out);
}
} // namespace phi
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16
// using bfloat16 = ::phi::dtype::bfloat16;
PD_REGISTER_KERNEL(divide_raw,
CPU,
ALL_LAYOUT,
phi::DivideRawKernel,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PD_REGISTER_KERNEL(divide,
CPU,
ALL_LAYOUT,
phi::DivideKernel,
float,
double,
int,
int64_t,
complex64,
complex128) {}
...@@ -23,118 +23,6 @@ ...@@ -23,118 +23,6 @@
namespace phi { namespace phi {
template <typename T>
void AddGradFunc(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dout,
DenseTensor* dx,
DenseTensor* dy,
int axis = -1) {
if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) {
ElementwiseAddGrad<T>(dev_ctx, x, y, out, dout, dx, dy);
} else {
ElemwiseExplicitGradCompute<T, IdentityGrad<T>, IdentityGrad<T>>(
dev_ctx,
x,
y,
out,
dout,
axis,
dx,
dy,
IdentityGrad<T>(),
IdentityGrad<T>());
}
}
template <typename T, typename Context>
void AddGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
phi::AddGradImpl<T>(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc<T>);
}
template <typename T, typename Context>
void AddDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
int axis,
DenseTensor* ddout) {
phi::AddDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
}
template <typename T, typename Context>
void AddTripleGradKernel(const Context& dev_ctx,
const DenseTensor& ddx,
const DenseTensor& ddy,
const DenseTensor& d_ddout,
int axis,
DenseTensor* d_ddx,
DenseTensor* d_ddy) {
phi::AddGradImpl<T>(
dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc<T>);
}
template <typename T, typename Context>
void SubtractGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
// skip out
auto* out = &dout;
ElementwiseSubGrad<T>(dev_ctx, x, y, *out, dout, dx, dy, axis);
}
template <typename T, typename Context>
void SubtractDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis,
DenseTensor* ddout) {
phi::SubtractDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
}
template <typename T, typename Context>
void DivideGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
funcs::ElementwiseGradPreProcess(dout, dx);
phi::funcs::ElemwiseGradCompute<Context, T, DivGradDX<T>, DivGradDY<T>>(
dev_ctx, x, y, out, dout, axis, dx, dy, DivGradDX<T>(), DivGradDY<T>());
}
template <typename T, typename Context>
void MultiplyGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
funcs::ElementwiseGradPreProcess(dout, dx);
auto* out = &dout; // out is not necessary
phi::funcs::ElemwiseGradCompute<Context, T, MulGradDX<T>, MulGradDY<T>>(
dev_ctx, x, y, *out, dout, axis, dx, dy, MulGradDX<T>(), MulGradDY<T>());
}
template <typename T, typename Context> template <typename T, typename Context>
void MaximumGradKernel(const Context& dev_ctx, void MaximumGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
...@@ -163,129 +51,6 @@ void MinimumGradKernel(const Context& dev_ctx, ...@@ -163,129 +51,6 @@ void MinimumGradKernel(const Context& dev_ctx,
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(add_grad,
CPU,
ALL_LAYOUT,
phi::AddGradKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(add_double_grad,
CPU,
ALL_LAYOUT,
phi::AddDoubleGradKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(add_triple_grad,
CPU,
ALL_LAYOUT,
phi::AddTripleGradKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(subtract_grad,
CPU,
ALL_LAYOUT,
phi::SubtractGradKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(subtract_double_grad,
CPU,
ALL_LAYOUT,
phi::SubtractDoubleGradKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(divide_grad,
CPU,
ALL_LAYOUT,
phi::DivideGradKernel,
float,
double,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(divide_double_grad,
CPU,
ALL_LAYOUT,
phi::DivideDoubleGradKernel,
float,
double,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(multiply_grad,
CPU,
ALL_LAYOUT,
phi::MultiplyGradKernel,
float,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(multiply_double_grad,
CPU,
ALL_LAYOUT,
phi::MultiplyDoubleGradKernel,
float,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(multiply_triple_grad,
CPU,
ALL_LAYOUT,
phi::MultiplyTripleGradKernel,
float,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(fmax_grad, PD_REGISTER_KERNEL(fmax_grad,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
......
...@@ -21,54 +21,6 @@ ...@@ -21,54 +21,6 @@
namespace phi { namespace phi {
#define DEFINE_CPU_ELEMENTWISE_OP(name) \
template <typename T, typename Context> \
void name##RawKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
dev_ctx.template Alloc<T>(out); \
if (x.dims() == y.dims()) { \
SameDimsElementwiseCompute<SameDims##name##Functor<CPUContext, T>>()( \
dev_ctx, x, y, out); \
} else { \
auto x_dims = x.dims(); \
auto y_dims = y.dims(); \
if (x_dims.size() >= y_dims.size()) { \
funcs::ElementwiseCompute<funcs::name##Functor<T>, T>( \
dev_ctx, x, y, axis, funcs::name##Functor<T>(), out); \
} else { \
funcs::ElementwiseCompute<funcs::Inverse##name##Functor<T>, T>( \
dev_ctx, x, y, axis, funcs::Inverse##name##Functor<T>(), out); \
} \
} \
}
template <typename T, typename Context>
void DivideRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
dev_ctx.template Alloc<T>(out);
if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
SameDimsElementwiseCompute<SameDimsDivideFunctor<CPUContext, T>>()(
dev_ctx, x, y, out);
} else {
auto x_dims = x.dims();
auto y_dims = y.dims();
if (x_dims.size() >= y_dims.size()) {
funcs::ElementwiseCompute<funcs::DivideFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::DivideFunctor<T>(), out);
} else {
funcs::ElementwiseCompute<funcs::InverseDivideFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::InverseDivideFunctor<T>(), out);
}
}
}
template <typename T, typename Context> template <typename T, typename Context>
void MaximumRawKernel(const Context& dev_ctx, void MaximumRawKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
...@@ -142,14 +94,6 @@ void ElementwisePowRawKernel(const Context& dev_ctx, ...@@ -142,14 +94,6 @@ void ElementwisePowRawKernel(const Context& dev_ctx,
funcs::ElementwiseCompute<funcs::ElementwisePowFunctor<T>, T>( funcs::ElementwiseCompute<funcs::ElementwisePowFunctor<T>, T>(
dev_ctx, x, y, axis, funcs::ElementwisePowFunctor<T>(), out); dev_ctx, x, y, axis, funcs::ElementwisePowFunctor<T>(), out);
} }
// Create the definition of Add
DEFINE_CPU_ELEMENTWISE_OP(Add)
// Create the definition of Subtract
DEFINE_CPU_ELEMENTWISE_OP(Subtract)
// Create the definition of Multiply
DEFINE_CPU_ELEMENTWISE_OP(Multiply)
} // namespace phi } // namespace phi
...@@ -165,51 +109,6 @@ PD_REGISTER_KERNEL( ...@@ -165,51 +109,6 @@ PD_REGISTER_KERNEL(
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
fmin, CPU, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {} fmin, CPU, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {}
PD_REGISTER_KERNEL(add_raw,
CPU,
ALL_LAYOUT,
phi::AddRawKernel,
float,
double,
int16_t,
int,
int64_t,
complex64,
complex128) {}
PD_REGISTER_KERNEL(subtract_raw,
CPU,
ALL_LAYOUT,
phi::SubtractRawKernel,
float,
double,
int16_t,
int,
int64_t,
complex64,
complex128,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(divide_raw,
CPU,
ALL_LAYOUT,
phi::DivideRawKernel,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PD_REGISTER_KERNEL(multiply_raw,
CPU,
ALL_LAYOUT,
phi::MultiplyRawKernel,
float,
double,
int,
int64_t,
bool,
complex64,
complex128,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(maximum_raw, PD_REGISTER_KERNEL(maximum_raw,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/elementwise_multiply_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/cpu/elementwise_grad.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void MultiplyGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
funcs::ElementwiseGradPreProcess(dout, dx);
auto* out = &dout; // out is not necessary
phi::funcs::ElemwiseGradCompute<Context, T, MulGradDX<T>, MulGradDY<T>>(
dev_ctx, x, y, *out, dout, axis, dx, dy, MulGradDX<T>(), MulGradDY<T>());
}
} // namespace phi
PD_REGISTER_KERNEL(multiply_grad,
CPU,
ALL_LAYOUT,
phi::MultiplyGradKernel,
float,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(multiply_double_grad,
CPU,
ALL_LAYOUT,
phi::MultiplyDoubleGradKernel,
float,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(multiply_triple_grad,
CPU,
ALL_LAYOUT,
phi::MultiplyTripleGradKernel,
float,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/cpu/elementwise.h"
#include "paddle/phi/api/ext/dispatch.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
namespace phi {
// Create the definition of Multiply
DEFINE_CPU_ELEMENTWISE_OP(Multiply)
template <typename T, typename Context>
void MultiplyKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
MultiplyRawKernel<T>(dev_ctx, x, y, axis, out);
}
} // namespace phi
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16
// using bfloat16 = ::phi::dtype::bfloat16;
PD_REGISTER_KERNEL(multiply_raw,
CPU,
ALL_LAYOUT,
phi::MultiplyRawKernel,
float,
double,
int,
int64_t,
bool,
complex64,
complex128,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(multiply,
CPU,
ALL_LAYOUT,
phi::MultiplyKernel,
float,
double,
int,
int64_t,
bool,
complex64,
complex128,
phi::dtype::bfloat16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/elementwise_subtract_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/cpu/elementwise_grad.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void SubtractGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
// skip out
auto* out = &dout;
ElementwiseSubGrad<T>(dev_ctx, x, y, *out, dout, dx, dy, axis);
}
template <typename T, typename Context>
void SubtractDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis,
DenseTensor* ddout) {
phi::SubtractDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
}
} // namespace phi
PD_REGISTER_KERNEL(subtract_grad,
CPU,
ALL_LAYOUT,
phi::SubtractGradKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(subtract_double_grad,
CPU,
ALL_LAYOUT,
phi::SubtractDoubleGradKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/cpu/elementwise.h"
#include "paddle/phi/api/ext/dispatch.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
namespace phi {
// Create the definition of Subtract
DEFINE_CPU_ELEMENTWISE_OP(Subtract)
template <typename T, typename Context>
void SubtractKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
SubtractRawKernel<T>(dev_ctx, x, y, axis, out);
}
} // namespace phi
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16
// using bfloat16 = ::phi::dtype::bfloat16;
PD_REGISTER_KERNEL(subtract_raw,
CPU,
ALL_LAYOUT,
phi::SubtractRawKernel,
float,
double,
int16_t,
int,
int64_t,
complex64,
complex128,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(subtract,
CPU,
ALL_LAYOUT,
phi::SubtractKernel,
float,
double,
int16_t,
int,
int64_t,
complex64,
complex128,
phi::dtype::bfloat16) {}
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <Eigen/Dense> #include <Eigen/Dense>
#include <Eigen/SVD> #include <Eigen/SVD>
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h"
#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/funcs/compare_functors.h" #include "paddle/phi/kernels/funcs/compare_functors.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/utils/optional.h"
namespace phi {
template <typename T, typename Context>
void AddGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy);
template <typename T, typename Context>
void AddDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
int axis,
DenseTensor* ddout);
template <typename T, typename Context>
void AddTripleGradKernel(const Context& dev_ctx,
const DenseTensor& ddx,
const DenseTensor& ddy,
const DenseTensor& d_ddout,
int axis,
DenseTensor* d_ddx,
DenseTensor* d_ddy);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/binary.h"
namespace phi {
template <typename T, typename Context>
void AddRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void AddKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
DenseTensor Add(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out);
AddKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
}
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/utils/optional.h"
namespace phi {
template <typename T, typename Context>
void DivideGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy);
template <typename T, typename Context>
void DivideDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dx,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
int axis,
DenseTensor* dy,
DenseTensor* dout,
DenseTensor* ddout);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/binary.h"
namespace phi {
template <typename T, typename Context>
void DivideRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void DivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
DenseTensor Divide(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out);
DivideKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
}
} // namespace phi
...@@ -19,111 +19,6 @@ limitations under the License. */ ...@@ -19,111 +19,6 @@ limitations under the License. */
namespace phi { namespace phi {
template <typename T, typename Context>
void AddGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy);
template <typename T, typename Context>
void AddDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
int axis,
DenseTensor* ddout);
template <typename T, typename Context>
void AddTripleGradKernel(const Context& dev_ctx,
const DenseTensor& ddx,
const DenseTensor& ddy,
const DenseTensor& d_ddout,
int axis,
DenseTensor* d_ddx,
DenseTensor* d_ddy);
template <typename T, typename Context>
void SubtractGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy);
template <typename T, typename Context>
void SubtractDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis,
DenseTensor* ddout);
template <typename T, typename Context>
void DivideGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy);
template <typename T, typename Context>
void DivideDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dx,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
int axis,
DenseTensor* dy,
DenseTensor* dout,
DenseTensor* ddout);
template <typename T, typename Context>
void MultiplyGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy);
template <typename T, typename Context>
void MultiplyDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
int axis,
DenseTensor* dx,
DenseTensor* dy,
DenseTensor* ddout);
template <typename T, typename Context>
void MultiplyTripleGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& d_dx,
const DenseTensor& d_dy,
paddle::optional<const DenseTensor&> d_ddout,
int axis,
DenseTensor* d_x,
DenseTensor* d_y,
DenseTensor* d_dout,
DenseTensor* d_ddx,
DenseTensor* d_ddy);
template <typename T, typename Context> template <typename T, typename Context>
void ElementwiseFMaxGradKernel(const Context& dev_ctx, void ElementwiseFMaxGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
...@@ -19,42 +19,6 @@ ...@@ -19,42 +19,6 @@
namespace phi { namespace phi {
template <typename T, typename Context>
void AddKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
AddRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void SubtractKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
SubtractRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void DivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
DivideRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context>
void MultiplyKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
MultiplyRawKernel<T>(dev_ctx, x, y, axis, out);
}
template <typename T, typename Context> template <typename T, typename Context>
void MaximumKernel(const Context& dev_ctx, void MaximumKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
...@@ -105,51 +69,6 @@ void ElementwisePowKernel(const Context& dev_ctx, ...@@ -105,51 +69,6 @@ void ElementwisePowKernel(const Context& dev_ctx,
using complex64 = ::phi::dtype::complex<float>; using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>; using complex128 = ::phi::dtype::complex<double>;
PD_REGISTER_KERNEL(add,
CPU,
ALL_LAYOUT,
phi::AddKernel,
float,
double,
int16_t,
int,
int64_t,
complex64,
complex128) {}
PD_REGISTER_KERNEL(subtract,
CPU,
ALL_LAYOUT,
phi::SubtractKernel,
float,
double,
int16_t,
int,
int64_t,
complex64,
complex128,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(divide,
CPU,
ALL_LAYOUT,
phi::DivideKernel,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PD_REGISTER_KERNEL(multiply,
CPU,
ALL_LAYOUT,
phi::MultiplyKernel,
float,
double,
int,
int64_t,
bool,
complex64,
complex128,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(maximum, PD_REGISTER_KERNEL(maximum,
CPU, CPU,
ALL_LAYOUT, ALL_LAYOUT,
...@@ -183,57 +102,6 @@ PD_REGISTER_KERNEL(elementwise_pow, ...@@ -183,57 +102,6 @@ PD_REGISTER_KERNEL(elementwise_pow,
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_KERNEL(add,
GPU,
ALL_LAYOUT,
phi::AddKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
complex64,
complex128) {}
PD_REGISTER_KERNEL(subtract,
GPU,
ALL_LAYOUT,
phi::SubtractKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::float16,
complex64,
complex128,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(divide,
GPU,
ALL_LAYOUT,
phi::DivideKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
complex64,
complex128) {}
PD_REGISTER_KERNEL(multiply,
GPU,
ALL_LAYOUT,
phi::MultiplyKernel,
float,
double,
int,
int64_t,
bool,
phi::dtype::float16,
phi::dtype::bfloat16,
complex64,
complex128) {}
PD_REGISTER_KERNEL(maximum, PD_REGISTER_KERNEL(maximum,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
......
...@@ -33,58 +33,6 @@ void FMinKernel(const Context& dev_ctx, ...@@ -33,58 +33,6 @@ void FMinKernel(const Context& dev_ctx,
int axis, int axis,
DenseTensor* out); DenseTensor* out);
template <typename T, typename Context>
void AddRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void AddKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
void SubtractRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void SubtractKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
void DivideRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void DivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
void MultiplyRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void MultiplyKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context> template <typename T, typename Context>
void MaximumRawKernel(const Context& dev_ctx, void MaximumRawKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
...@@ -150,50 +98,6 @@ void ElementwisePowKernel(const Context& dev_ctx, ...@@ -150,50 +98,6 @@ void ElementwisePowKernel(const Context& dev_ctx,
const DenseTensor& y, const DenseTensor& y,
DenseTensor* out); DenseTensor* out);
template <typename T, typename Context>
DenseTensor Add(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out);
AddKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
}
template <typename T, typename Context>
DenseTensor Subtract(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out);
SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
}
template <typename T, typename Context>
DenseTensor Divide(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out);
DivideKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
}
template <typename T, typename Context>
DenseTensor Multiply(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out);
MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
}
template <typename T, typename Context> template <typename T, typename Context>
DenseTensor Maximum(const Context& dev_ctx, DenseTensor Maximum(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/utils/optional.h"
namespace phi {
template <typename T, typename Context>
void MultiplyGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy);
template <typename T, typename Context>
void MultiplyDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
int axis,
DenseTensor* dx,
DenseTensor* dy,
DenseTensor* ddout);
template <typename T, typename Context>
void MultiplyTripleGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& d_dx,
const DenseTensor& d_dy,
paddle::optional<const DenseTensor&> d_ddout,
int axis,
DenseTensor* d_x,
DenseTensor* d_y,
DenseTensor* d_dout,
DenseTensor* d_ddx,
DenseTensor* d_ddy);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/binary.h"
namespace phi {
template <typename T, typename Context>
void MultiplyRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void MultiplyKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
DenseTensor Multiply(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out);
MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
}
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/utils/optional.h"
namespace phi {
template <typename T, typename Context>
void SubtractGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy);
template <typename T, typename Context>
void SubtractDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis,
DenseTensor* ddout);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/binary.h"
namespace phi {
template <typename T, typename Context>
void SubtractRawKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void SubtractKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out);
template <typename T, typename Context>
DenseTensor Subtract(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
DenseTensor dense_out;
MetaTensor meta_out(&dense_out);
ElementwiseInferMeta(x, y, &meta_out);
SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
}
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/elementwise_add_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/gpu/elementwise_grad.h"
#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h"
namespace phi {
template <typename T>
void AddGradFunc(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dout,
DenseTensor* dx,
DenseTensor* dy,
int axis = -1) {
if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) {
ElementwiseAddGrad<T>(dev_ctx, x, y, out, dout, dx, dy);
} else {
DefaultElementwiseAddGrad<T>(dev_ctx, x, y, out, dout, dx, dy, axis);
}
}
template <typename T, typename Context>
void AddGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
phi::AddGradImpl<T>(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc<T>);
}
template <typename T, typename Context>
void AddDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
int axis,
DenseTensor* ddout) {
phi::AddDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
}
template <typename T, typename Context>
void AddTripleGradKernel(const Context& dev_ctx,
const DenseTensor& ddx,
const DenseTensor& ddy,
const DenseTensor& d_ddout,
int axis,
DenseTensor* d_ddx,
DenseTensor* d_ddy) {
phi::AddGradImpl<T>(
dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc<T>);
}
} // namespace phi
PD_REGISTER_KERNEL(add_grad,
GPU,
ALL_LAYOUT,
phi::AddGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(add_double_grad,
GPU,
ALL_LAYOUT,
phi::AddDoubleGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(add_triple_grad,
GPU,
ALL_LAYOUT,
phi::AddTripleGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/elementwise_divide_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/gpu/elementwise_grad.h"
#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void DivideGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
const auto place = dev_ctx.GetPlace();
if (dx != nullptr && dy != nullptr) {
std::vector<const DenseTensor*> ins = {&dout, &out, &y};
GetGradXAndYOut<ElementwiseType::kTernary, T>(
dev_ctx,
place,
axis,
ins,
dout,
dx,
dy,
funcs::DivGradXYFunctor<T, T>());
} else if (dx != nullptr && dy == nullptr) {
std::vector<const DenseTensor*> ins = {&dout, &y};
GetGradXOrYOut<ElementwiseType::kBinary, T>(
dev_ctx, place, axis, ins, dout, dx, funcs::DivGradXFunctor<T>());
} else if (dy != nullptr && dx == nullptr) {
std::vector<const DenseTensor*> ins = {&dout, &out, &y};
GetGradXOrYOut<ElementwiseType::kTernary, T>(
dev_ctx, place, axis, ins, dout, dy, funcs::DivGradYFunctor<T>());
}
}
} // namespace phi
PD_REGISTER_KERNEL(divide_grad,
GPU,
ALL_LAYOUT,
phi::DivideGradKernel,
float,
phi::dtype::float16,
phi::dtype::bfloat16,
double,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(divide_double_grad,
GPU,
ALL_LAYOUT,
phi::DivideDoubleGradKernel,
float,
phi::dtype::float16,
phi::dtype::bfloat16,
double,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
...@@ -26,128 +26,6 @@ ...@@ -26,128 +26,6 @@
namespace phi { namespace phi {
template <typename T>
void AddGradFunc(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dout,
DenseTensor* dx,
DenseTensor* dy,
int axis = -1) {
if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) {
ElementwiseAddGrad<T>(dev_ctx, x, y, out, dout, dx, dy);
} else {
DefaultElementwiseAddGrad<T>(dev_ctx, x, y, out, dout, dx, dy, axis);
}
}
template <typename T, typename Context>
void AddGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
phi::AddGradImpl<T>(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc<T>);
}
template <typename T, typename Context>
void AddDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
int axis,
DenseTensor* ddout) {
phi::AddDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
}
template <typename T, typename Context>
void AddTripleGradKernel(const Context& dev_ctx,
const DenseTensor& ddx,
const DenseTensor& ddy,
const DenseTensor& d_ddout,
int axis,
DenseTensor* d_ddx,
DenseTensor* d_ddy) {
phi::AddGradImpl<T>(
dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc<T>);
}
template <typename T, typename Context>
void SubtractGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
// skip out
auto* out = &dout;
if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) {
elementwise_sub_grad<T>(dev_ctx, x, y, *out, dout, dx, dy);
} else {
default_elementwise_sub_grad<T>(dev_ctx, x, y, *out, dout, dx, dy, axis);
}
}
template <typename T, typename Context>
void SubtractDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis,
DenseTensor* ddout) {
phi::SubtractDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
}
template <typename T, typename Context>
void DivideGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& out,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
const auto place = dev_ctx.GetPlace();
if (dx != nullptr && dy != nullptr) {
std::vector<const DenseTensor*> ins = {&dout, &out, &y};
GetGradXAndYOut<ElementwiseType::kTernary, T>(
dev_ctx,
place,
axis,
ins,
dout,
dx,
dy,
funcs::DivGradXYFunctor<T, T>());
} else if (dx != nullptr && dy == nullptr) {
std::vector<const DenseTensor*> ins = {&dout, &y};
GetGradXOrYOut<ElementwiseType::kBinary, T>(
dev_ctx, place, axis, ins, dout, dx, funcs::DivGradXFunctor<T>());
} else if (dy != nullptr && dx == nullptr) {
std::vector<const DenseTensor*> ins = {&dout, &out, &y};
GetGradXOrYOut<ElementwiseType::kTernary, T>(
dev_ctx, place, axis, ins, dout, dy, funcs::DivGradYFunctor<T>());
}
}
template <typename T, typename Context>
void MultiplyGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
funcs::ElementwiseGradPreProcess(dout, dx);
ElementwiseMulGrad<T>(dev_ctx, x, y, dout, dx, dy, axis);
}
template <typename T, typename Context> template <typename T, typename Context>
void MaximumGradKernel(const Context& dev_ctx, void MaximumGradKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
...@@ -211,138 +89,6 @@ void MinimumGradKernel(const Context& dev_ctx, ...@@ -211,138 +89,6 @@ void MinimumGradKernel(const Context& dev_ctx,
} }
} // namespace phi } // namespace phi
PD_REGISTER_KERNEL(add_grad,
GPU,
ALL_LAYOUT,
phi::AddGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(add_double_grad,
GPU,
ALL_LAYOUT,
phi::AddDoubleGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(add_triple_grad,
GPU,
ALL_LAYOUT,
phi::AddTripleGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(subtract_grad,
GPU,
ALL_LAYOUT,
phi::SubtractGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(subtract_double_grad,
GPU,
ALL_LAYOUT,
phi::SubtractDoubleGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(divide_grad,
GPU,
ALL_LAYOUT,
phi::DivideGradKernel,
float,
phi::dtype::float16,
phi::dtype::bfloat16,
double,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(divide_double_grad,
GPU,
ALL_LAYOUT,
phi::DivideDoubleGradKernel,
float,
phi::dtype::float16,
phi::dtype::bfloat16,
double,
int,
int64_t,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(multiply_grad,
GPU,
ALL_LAYOUT,
phi::MultiplyGradKernel,
float,
phi::dtype::float16,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(multiply_double_grad,
GPU,
ALL_LAYOUT,
phi::MultiplyDoubleGradKernel,
float,
phi::dtype::float16,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(multiply_triple_grad,
GPU,
ALL_LAYOUT,
phi::MultiplyTripleGradKernel,
float,
phi::dtype::float16,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(fmax_grad, PD_REGISTER_KERNEL(fmax_grad,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/elementwise_multiply_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/gpu/elementwise_grad.h"
#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void MultiplyGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
funcs::ElementwiseGradPreProcess(dout, dx);
ElementwiseMulGrad<T>(dev_ctx, x, y, dout, dx, dy, axis);
}
} // namespace phi
PD_REGISTER_KERNEL(multiply_grad,
GPU,
ALL_LAYOUT,
phi::MultiplyGradKernel,
float,
phi::dtype::float16,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(multiply_double_grad,
GPU,
ALL_LAYOUT,
phi::MultiplyDoubleGradKernel,
float,
phi::dtype::float16,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(multiply_triple_grad,
GPU,
ALL_LAYOUT,
phi::MultiplyTripleGradKernel,
float,
phi::dtype::float16,
double,
int,
int64_t,
bool,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/elementwise_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/funcs/elementwise_functor.h"
#include "paddle/phi/kernels/gpu/elementwise_grad.h"
#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void SubtractGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
const DenseTensor& dout,
int axis,
DenseTensor* dx,
DenseTensor* dy) {
// skip out
auto* out = &dout;
if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) {
elementwise_sub_grad<T>(dev_ctx, x, y, *out, dout, dx, dy);
} else {
default_elementwise_sub_grad<T>(dev_ctx, x, y, *out, dout, dx, dy, axis);
}
}
template <typename T, typename Context>
void SubtractDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis,
DenseTensor* ddout) {
phi::SubtractDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
}
} // namespace phi
PD_REGISTER_KERNEL(subtract_grad,
GPU,
ALL_LAYOUT,
phi::SubtractGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
PD_REGISTER_KERNEL(subtract_double_grad,
GPU,
ALL_LAYOUT,
phi::SubtractDoubleGradKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
phi::dtype::complex<float>,
phi::dtype::complex<double>) {}
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
#include "paddle/phi/backends/dynload/cusolver.h" #include "paddle/phi/backends/dynload/cusolver.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/abs_kernel.h" #include "paddle/phi/kernels/abs_kernel.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h"
#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/broadcast_function.h"
#include "paddle/phi/kernels/funcs/compare_functors.h" #include "paddle/phi/kernels/funcs/compare_functors.h"
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "paddle/phi/kernels/cholesky_solve_kernel.h" #include "paddle/phi/kernels/cholesky_solve_kernel.h"
#include "paddle/phi/kernels/complex_kernel.h" #include "paddle/phi/kernels/complex_kernel.h"
#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_add_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/expand_kernel.h" #include "paddle/phi/kernels/expand_kernel.h"
#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/blas/blas.h"
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include "paddle/phi/kernels/determinant_grad_kernel.h" #include "paddle/phi/kernels/determinant_grad_kernel.h"
#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/funcs/for_range.h" #include "paddle/phi/kernels/funcs/for_range.h"
......
...@@ -16,7 +16,9 @@ ...@@ -16,7 +16,9 @@
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/complex_kernel.h" #include "paddle/phi/kernels/complex_kernel.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_divide_kernel.h"
#include "paddle/phi/kernels/elementwise_multiply_kernel.h"
#include "paddle/phi/kernels/elementwise_subtract_kernel.h"
#include "paddle/phi/kernels/funcs/diag_functor.h" #include "paddle/phi/kernels/funcs/diag_functor.h"
#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/common.h"
#include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/math_function.h"
......
...@@ -22,6 +22,48 @@ ...@@ -22,6 +22,48 @@
#endif #endif
namespace phi { namespace phi {
#define DEFINE_CPU_ELEMENTWISE_OP(name) \
template <typename T, typename Context> \
void name##RawKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
dev_ctx.template Alloc<T>(out); \
if (x.dims() == y.dims()) { \
SameDimsElementwiseCompute<SameDims##name##Functor<CPUContext, T>>()( \
dev_ctx, x, y, out); \
} else { \
auto x_dims = x.dims(); \
auto y_dims = y.dims(); \
if (x_dims.size() >= y_dims.size()) { \
funcs::ElementwiseCompute<funcs::name##Functor<T>, T>( \
dev_ctx, x, y, axis, funcs::name##Functor<T>(), out); \
} else { \
funcs::ElementwiseCompute<funcs::Inverse##name##Functor<T>, T>( \
dev_ctx, x, y, axis, funcs::Inverse##name##Functor<T>(), out); \
} \
} \
}
#define DEFINE_CUDA_ELEMENTWISE_OP(name) \
template <typename T, typename Context> \
void name##RawKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
std::vector<const DenseTensor*> inputs; \
std::vector<DenseTensor*> outputs; \
inputs.emplace_back(&x); \
inputs.emplace_back(&y); \
outputs.emplace_back(out); \
dev_ctx.template Alloc<T>(out); \
funcs::BroadcastKernel<ElementwiseType::kBinary, T, T>( \
dev_ctx, inputs, &outputs, axis, funcs::name##Functor<T>()); \
}
template <typename T, typename Context> template <typename T, typename Context>
void FMaxKernel(const Context& dev_ctx, void FMaxKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#ifndef PADDLE_WITH_XPU_KP
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#endif
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
namespace phi {
DEFINE_CUDA_ELEMENTWISE_OP(Add)
template <typename T, typename Context>
void AddKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
AddRawKernel<T>(dev_ctx, x, y, axis, out);
}
} // namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL(add_raw, KPS, ALL_LAYOUT, phi::AddRawKernel, float) {}
#else
using float16 = phi::dtype::float16;
using bfloat16 = phi::dtype::bfloat16;
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
PD_REGISTER_KERNEL(add_raw,
KPS,
ALL_LAYOUT,
phi::AddRawKernel,
float,
double,
int16_t,
int,
int64_t,
float16,
bfloat16,
complex64,
complex128) {}
PD_REGISTER_KERNEL(add,
KPS,
ALL_LAYOUT,
phi::AddKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
complex64,
complex128) {}
#endif
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#ifndef PADDLE_WITH_XPU_KP
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#endif
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
namespace phi {
// Create the definition of Divide
DEFINE_CUDA_ELEMENTWISE_OP(Divide)
template <typename T, typename Context>
void DivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
DivideRawKernel<T>(dev_ctx, x, y, axis, out);
}
} // namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL(divide_raw, KPS, ALL_LAYOUT, phi::DivideRawKernel, float) {}
#else
using float16 = phi::dtype::float16;
using bfloat16 = phi::dtype::bfloat16;
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
PD_REGISTER_KERNEL(divide_raw,
KPS,
ALL_LAYOUT,
phi::DivideRawKernel,
float,
double,
int,
int64_t,
float16,
bfloat16,
complex64,
complex128) {}
PD_REGISTER_KERNEL(divide,
KPS,
ALL_LAYOUT,
phi::DivideKernel,
float,
double,
int,
int64_t,
phi::dtype::float16,
phi::dtype::bfloat16,
complex64,
complex128) {}
#endif
...@@ -22,34 +22,6 @@ ...@@ -22,34 +22,6 @@
namespace phi { namespace phi {
#define DEFINE_CUDA_ELEMENTWISE_OP(name) \
template <typename T, typename Context> \
void name##RawKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
std::vector<const DenseTensor*> inputs; \
std::vector<DenseTensor*> outputs; \
inputs.emplace_back(&x); \
inputs.emplace_back(&y); \
outputs.emplace_back(out); \
dev_ctx.template Alloc<T>(out); \
funcs::BroadcastKernel<ElementwiseType::kBinary, T, T>( \
dev_ctx, inputs, &outputs, axis, funcs::name##Functor<T>()); \
}
/**
* Kernels
*/
// Create the definition of Add
DEFINE_CUDA_ELEMENTWISE_OP(Add)
// Create the definition of Subtract
DEFINE_CUDA_ELEMENTWISE_OP(Subtract)
// Create the definition of Multiply
DEFINE_CUDA_ELEMENTWISE_OP(Multiply)
// Create the definition of Divide
DEFINE_CUDA_ELEMENTWISE_OP(Divide)
// Create the definition of Maximum // Create the definition of Maximum
DEFINE_CUDA_ELEMENTWISE_OP(Maximum) DEFINE_CUDA_ELEMENTWISE_OP(Maximum)
// Create the definition of Minimum // Create the definition of Minimum
...@@ -64,12 +36,6 @@ DEFINE_CUDA_ELEMENTWISE_OP(ElementwisePow) ...@@ -64,12 +36,6 @@ DEFINE_CUDA_ELEMENTWISE_OP(ElementwisePow)
} // namespace phi } // namespace phi
#ifdef PADDLE_WITH_XPU_KP #ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL(add_raw, KPS, ALL_LAYOUT, phi::AddRawKernel, float) {}
PD_REGISTER_KERNEL(
subtract_raw, KPS, ALL_LAYOUT, phi::SubtractRawKernel, float) {}
PD_REGISTER_KERNEL(divide_raw, KPS, ALL_LAYOUT, phi::DivideRawKernel, float) {}
PD_REGISTER_KERNEL(
multiply_raw, KPS, ALL_LAYOUT, phi::MultiplyRawKernel, float) {}
PD_REGISTER_KERNEL(maximum_raw, KPS, ALL_LAYOUT, phi::MaximumRawKernel, float) { PD_REGISTER_KERNEL(maximum_raw, KPS, ALL_LAYOUT, phi::MaximumRawKernel, float) {
} }
PD_REGISTER_KERNEL(minimum_raw, KPS, ALL_LAYOUT, phi::MinimumRawKernel, float) { PD_REGISTER_KERNEL(minimum_raw, KPS, ALL_LAYOUT, phi::MinimumRawKernel, float) {
...@@ -89,57 +55,6 @@ PD_REGISTER_KERNEL( ...@@ -89,57 +55,6 @@ PD_REGISTER_KERNEL(
PD_REGISTER_KERNEL( PD_REGISTER_KERNEL(
fmin, KPS, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {} fmin, KPS, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {}
PD_REGISTER_KERNEL(add_raw,
KPS,
ALL_LAYOUT,
phi::AddRawKernel,
float,
double,
int16_t,
int,
int64_t,
float16,
bfloat16,
complex64,
complex128) {}
PD_REGISTER_KERNEL(subtract_raw,
KPS,
ALL_LAYOUT,
phi::SubtractRawKernel,
float,
double,
int16_t,
int,
int64_t,
float16,
bfloat16,
complex64,
complex128) {}
PD_REGISTER_KERNEL(divide_raw,
KPS,
ALL_LAYOUT,
phi::DivideRawKernel,
float,
double,
int,
int64_t,
float16,
bfloat16,
complex64,
complex128) {}
PD_REGISTER_KERNEL(multiply_raw,
KPS,
ALL_LAYOUT,
phi::MultiplyRawKernel,
float,
double,
int,
int64_t,
bool,
float16,
complex64,
complex128,
bfloat16) {}
PD_REGISTER_KERNEL(maximum_raw, PD_REGISTER_KERNEL(maximum_raw,
KPS, KPS,
ALL_LAYOUT, ALL_LAYOUT,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#ifndef PADDLE_WITH_XPU_KP
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#endif
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
namespace phi {
// Create the definition of Multiply
DEFINE_CUDA_ELEMENTWISE_OP(Multiply)
template <typename T, typename Context>
void MultiplyKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
MultiplyRawKernel<T>(dev_ctx, x, y, axis, out);
}
} // namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL(
multiply_raw, KPS, ALL_LAYOUT, phi::MultiplyRawKernel, float) {}
#else
using float16 = phi::dtype::float16;
using bfloat16 = phi::dtype::bfloat16;
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
PD_REGISTER_KERNEL(multiply_raw,
KPS,
ALL_LAYOUT,
phi::MultiplyRawKernel,
float,
double,
int,
int64_t,
bool,
float16,
complex64,
complex128,
bfloat16) {}
PD_REGISTER_KERNEL(multiply,
KPS,
ALL_LAYOUT,
phi::MultiplyKernel,
float,
double,
int,
int64_t,
bool,
phi::dtype::float16,
phi::dtype::bfloat16,
complex64,
complex128) {}
#endif
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#ifndef PADDLE_WITH_XPU_KP
#include "paddle/phi/common/complex.h"
#include "paddle/phi/common/float16.h"
#endif
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h"
namespace phi {
// Create the definition of Subtract
DEFINE_CUDA_ELEMENTWISE_OP(Subtract)
template <typename T, typename Context>
void SubtractKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
int axis = -1;
SubtractRawKernel<T>(dev_ctx, x, y, axis, out);
}
} // namespace phi
#ifdef PADDLE_WITH_XPU_KP
PD_REGISTER_KERNEL(
subtract_raw, KPS, ALL_LAYOUT, phi::SubtractRawKernel, float) {}
#else
using float16 = phi::dtype::float16;
using bfloat16 = phi::dtype::bfloat16;
using complex64 = ::phi::dtype::complex<float>;
using complex128 = ::phi::dtype::complex<double>;
PD_REGISTER_KERNEL(subtract_raw,
KPS,
ALL_LAYOUT,
phi::SubtractRawKernel,
float,
double,
int16_t,
int,
int64_t,
float16,
bfloat16,
complex64,
complex128) {}
PD_REGISTER_KERNEL(subtract,
KPS,
ALL_LAYOUT,
phi::SubtractKernel,
float,
double,
int16_t,
int,
int64_t,
phi::dtype::float16,
complex64,
complex128,
phi::dtype::bfloat16) {}
#endif
...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/phi/kernels/selected_rows/elementwise_kernel.h" #include "paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/bfloat16.h"
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/phi/common/float16.h" #include "paddle/phi/common/float16.h"
#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/enforce.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h"
namespace phi { namespace phi {
namespace sr { namespace sr {
......
...@@ -22,6 +22,9 @@ limitations under the License. */ ...@@ -22,6 +22,9 @@ limitations under the License. */
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(subtract, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(multiply, CPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(divide, CPU, ALL_LAYOUT);
namespace paddle { namespace paddle {
namespace tests { namespace tests {
......
...@@ -16,7 +16,10 @@ limitations under the License. */ ...@@ -16,7 +16,10 @@ limitations under the License. */
#include <memory> #include <memory>
#include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/elementwise_add_kernel.h"
#include "paddle/phi/kernels/elementwise_divide_kernel.h"
#include "paddle/phi/kernels/elementwise_multiply_kernel.h"
#include "paddle/phi/kernels/elementwise_subtract_kernel.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/api/lib/utils/allocator.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册