From 2cb19d8fdc9651c2d3f9250163412b1a063c11aa Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Tue, 19 Apr 2022 11:53:16 +0800 Subject: [PATCH] =?UTF-8?q?[Phi]Separate=20AddKernel/DivideKernel/Subtract?= =?UTF-8?q?Kernel/MultiplyKernel=20from=20ElementwiseKernel=EF=BC=88Part1?= =?UTF-8?q?=EF=BC=89=20(#41806)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * seperate add/div/sub/mul from elementwise * delete code * fix compile bugs * deal with conflict * fix bugs when compile * fix windows unit test bug * fix ci converage bugs --- .../performance_tests/benchmark_eager_cuda.cc | 2 +- .../performance_tests/benchmark_fluid_cuda.cc | 2 +- .../tests/task_tests/fwd_bwd_joint_test.cc | 2 +- .../new_executor/standalone_executor_test.cc | 9 +- paddle/fluid/imperative/tests/test_tracer.cc | 1 + .../cinn/cinn_instruction_run_op_test.cc | 2 +- .../operators/cinn/cinn_launch_op_test.cc | 2 +- paddle/fluid/operators/determinant_op.h | 2 +- paddle/fluid/operators/eig_op.h | 4 +- .../elementwise/elementwise_add_op.h | 4 +- .../test_elementwise_add_op_inplace.cc | 3 + paddle/fluid/operators/feed_forward_test.cu | 1 + paddle/fluid/operators/lu_op.h | 3 +- .../cpu/elementwise_add_grad_kernel.cc | 121 +++++++++ .../phi/kernels/cpu/elementwise_add_kernel.cc | 67 +++++ .../cpu/elementwise_divide_grad_kernel.cc | 62 +++++ .../kernels/cpu/elementwise_divide_kernel.cc | 85 ++++++ .../kernels/cpu/elementwise_grad_kernel.cc | 235 ---------------- paddle/phi/kernels/cpu/elementwise_kernel.cc | 101 ------- .../cpu/elementwise_multiply_grad_kernel.cc | 79 ++++++ .../cpu/elementwise_multiply_kernel.cc | 69 +++++ .../cpu/elementwise_subtract_grad_kernel.cc | 75 ++++++ .../cpu/elementwise_subtract_kernel.cc | 68 +++++ .../phi/kernels/cpu/matrix_rank_tol_kernel.cc | 2 +- .../phi/kernels/elementwise_add_grad_kernel.h | 49 ++++ paddle/phi/kernels/elementwise_add_kernel.h | 45 ++++ .../kernels/elementwise_divide_grad_kernel.h | 44 +++ .../phi/kernels/elementwise_divide_kernel.h | 46 ++++ paddle/phi/kernels/elementwise_grad_kernel.h | 105 -------- paddle/phi/kernels/elementwise_kernel.cc | 132 --------- paddle/phi/kernels/elementwise_kernel.h | 96 ------- .../elementwise_multiply_grad_kernel.h | 60 +++++ .../phi/kernels/elementwise_multiply_kernel.h | 46 ++++ .../elementwise_subtract_grad_kernel.h | 39 +++ .../phi/kernels/elementwise_subtract_kernel.h | 46 ++++ .../gpu/elementwise_add_grad_kernel.cu | 118 ++++++++ .../gpu/elementwise_divide_grad_kernel.cu | 86 ++++++ .../kernels/gpu/elementwise_grad_kernel.cu | 254 ------------------ .../gpu/elementwise_multiply_grad_kernel.cu | 82 ++++++ .../gpu/elementwise_subtract_grad_kernel.cu | 83 ++++++ .../phi/kernels/gpu/matrix_rank_tol_kernel.cu | 2 +- .../impl/cholesky_solve_grad_kernel_impl.h | 2 +- .../impl/determinant_grad_kernel_impl.h | 2 +- .../phi/kernels/impl/eigh_grad_kernel_impl.h | 4 +- .../kernels/impl/elementwise_kernel_impl.h | 42 +++ .../phi/kernels/kps/elementwise_add_kernel.cu | 73 +++++ .../kernels/kps/elementwise_divide_kernel.cu | 73 +++++ paddle/phi/kernels/kps/elementwise_kernel.cu | 85 ------ .../kps/elementwise_multiply_kernel.cu | 75 ++++++ .../kps/elementwise_subtract_kernel.cu | 75 ++++++ ...rnel.cc => elementwise_multiply_kernel.cc} | 4 +- ...kernel.h => elementwise_multiply_kernel.h} | 0 paddle/phi/tests/api/test_elementwise_api.cc | 3 + .../tests/kernels/test_elementwise_dev_api.cc | 5 +- 54 files changed, 1748 insertions(+), 1029 deletions(-) create mode 100644 paddle/phi/kernels/cpu/elementwise_add_grad_kernel.cc create mode 100644 paddle/phi/kernels/cpu/elementwise_add_kernel.cc create mode 100644 paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc create mode 100644 paddle/phi/kernels/cpu/elementwise_divide_kernel.cc create mode 100644 paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc create mode 100644 paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc create mode 100644 paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc create mode 100644 paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc create mode 100644 paddle/phi/kernels/elementwise_add_grad_kernel.h create mode 100644 paddle/phi/kernels/elementwise_add_kernel.h create mode 100644 paddle/phi/kernels/elementwise_divide_grad_kernel.h create mode 100644 paddle/phi/kernels/elementwise_divide_kernel.h create mode 100644 paddle/phi/kernels/elementwise_multiply_grad_kernel.h create mode 100644 paddle/phi/kernels/elementwise_multiply_kernel.h create mode 100644 paddle/phi/kernels/elementwise_subtract_grad_kernel.h create mode 100644 paddle/phi/kernels/elementwise_subtract_kernel.h create mode 100644 paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu create mode 100644 paddle/phi/kernels/gpu/elementwise_divide_grad_kernel.cu create mode 100644 paddle/phi/kernels/gpu/elementwise_multiply_grad_kernel.cu create mode 100644 paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu create mode 100644 paddle/phi/kernels/kps/elementwise_add_kernel.cu create mode 100644 paddle/phi/kernels/kps/elementwise_divide_kernel.cu create mode 100644 paddle/phi/kernels/kps/elementwise_multiply_kernel.cu create mode 100644 paddle/phi/kernels/kps/elementwise_subtract_kernel.cu rename paddle/phi/kernels/selected_rows/{elementwise_kernel.cc => elementwise_multiply_kernel.cc} (96%) rename paddle/phi/kernels/selected_rows/{elementwise_kernel.h => elementwise_multiply_kernel.h} (100%) diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc index 5e79038981..287d6e770d 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc @@ -42,7 +42,7 @@ using namespace egr_utils_api; // NOLINT PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_grad, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc index a3e393b039..d9afd7cc96 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc @@ -41,7 +41,7 @@ PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_grad, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc index d2bef100ca..2d69380cf7 100644 --- a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc +++ b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc @@ -36,7 +36,7 @@ PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); #endif namespace egr { diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc index 5efd0fb420..e03277fb31 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -69,14 +69,17 @@ PD_DECLARE_KERNEL(split, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(concat, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(concat_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add_raw, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(multiply, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(multiply_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(divide, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(maximum, GPU, ALL_LAYOUT); #ifdef PADDLE_WITH_XPU_KP -PD_DECLARE_KERNEL(add_raw, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(max_raw, GPU, ALL_LAYOUT); #else -PD_DECLARE_KERNEL(add_raw, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(max_raw, KPS, ALL_LAYOUT); #endif -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(mean_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sigmoid, GPU, ALL_LAYOUT); diff --git a/paddle/fluid/imperative/tests/test_tracer.cc b/paddle/fluid/imperative/tests/test_tracer.cc index 1c3a04b51a..7bfb3094ba 100644 --- a/paddle/fluid/imperative/tests/test_tracer.cc +++ b/paddle/fluid/imperative/tests/test_tracer.cc @@ -38,6 +38,7 @@ PD_DECLARE_KERNEL(matmul_with_flatten, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_with_flatten_grad, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_with_flatten, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_with_flatten_grad, GPU, ALL_LAYOUT); diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc index 358d0fc6d0..68bc3a0eb5 100644 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc +++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc @@ -30,7 +30,7 @@ USE_OP_ITSELF(elementwise_add); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); #ifdef PADDLE_WITH_CUDA -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); #endif namespace paddle::operators { diff --git a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc index 3e363c56eb..4f922945ea 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc @@ -36,7 +36,7 @@ DECLARE_bool(enable_pe_launch_cinn); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); #ifdef PADDLE_WITH_CUDA -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); #endif namespace paddle::operators { diff --git a/paddle/fluid/operators/determinant_op.h b/paddle/fluid/operators/determinant_op.h index a1fe8a2566..702ff3bfd8 100644 --- a/paddle/fluid/operators/determinant_op.h +++ b/paddle/fluid/operators/determinant_op.h @@ -22,7 +22,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/complex_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/diag_functor.h" diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h index 6daf05a9d7..fe898a6c41 100644 --- a/paddle/fluid/operators/eig_op.h +++ b/paddle/fluid/operators/eig_op.h @@ -21,7 +21,9 @@ #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/complex_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_divide_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" +#include "paddle/phi/kernels/elementwise_subtract_kernel.h" #include "paddle/phi/kernels/funcs/complex_functors.h" #include "paddle/phi/kernels/funcs/diag_functor.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index c28abb916b..d77d4ed036 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -26,8 +26,8 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op.h" // only can include the headers in paddle/phi/include dirs -#include "paddle/phi/kernels/elementwise_grad_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_add_grad_kernel.h" +#include "paddle/phi/kernels/elementwise_add_kernel.h" #endif namespace paddle { diff --git a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc index ce5c6b701d..f28aae9eed 100644 --- a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc +++ b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc @@ -26,6 +26,9 @@ USE_OP_ITSELF(elementwise_add); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); +#endif namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/feed_forward_test.cu b/paddle/fluid/operators/feed_forward_test.cu index e5ebdad1e4..61b80219a2 100644 --- a/paddle/fluid/operators/feed_forward_test.cu +++ b/paddle/fluid/operators/feed_forward_test.cu @@ -33,6 +33,7 @@ USE_OP_ITSELF(elementwise_add); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); #endif // get paddle matmul op results as baseline diff --git a/paddle/fluid/operators/lu_op.h b/paddle/fluid/operators/lu_op.h index d6170b7000..8ef3d60c0d 100644 --- a/paddle/fluid/operators/lu_op.h +++ b/paddle/fluid/operators/lu_op.h @@ -18,7 +18,8 @@ limitations under the License. */ #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/operators/set_value_op.h" #include "paddle/fluid/operators/svd_helper.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_add_kernel.h" +#include "paddle/phi/kernels/elementwise_subtract_kernel.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" #include "paddle/phi/kernels/funcs/tril_triu_compute.h" #include "paddle/phi/kernels/triangular_solve_kernel.h" diff --git a/paddle/phi/kernels/cpu/elementwise_add_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_add_grad_kernel.cc new file mode 100644 index 0000000000..f8a89b997b --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_add_grad_kernel.cc @@ -0,0 +1,121 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_add_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/elementwise_grad.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void AddGradFunc(const CPUContext& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx, + DenseTensor* dy, + int axis = -1) { + if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { + ElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy); + } else { + ElemwiseExplicitGradCompute, IdentityGrad>( + dev_ctx, + x, + y, + out, + dout, + axis, + dx, + dy, + IdentityGrad(), + IdentityGrad()); + } +} + +template +void AddGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + phi::AddGradImpl(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc); +} + +template +void AddDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + const DenseTensor& dout, + paddle::optional ddx, + paddle::optional ddy, + int axis, + DenseTensor* ddout) { + phi::AddDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); +} + +template +void AddTripleGradKernel(const Context& dev_ctx, + const DenseTensor& ddx, + const DenseTensor& ddy, + const DenseTensor& d_ddout, + int axis, + DenseTensor* d_ddx, + DenseTensor* d_ddy) { + phi::AddGradImpl( + dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc); +} + +} // namespace phi + +PD_REGISTER_KERNEL(add_grad, + CPU, + ALL_LAYOUT, + phi::AddGradKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(add_double_grad, + CPU, + ALL_LAYOUT, + phi::AddDoubleGradKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(add_triple_grad, + CPU, + ALL_LAYOUT, + phi::AddTripleGradKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/elementwise_add_kernel.cc b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc new file mode 100644 index 0000000000..6070264547 --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/cpu/elementwise.h" +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Add +DEFINE_CPU_ELEMENTWISE_OP(Add) + +template +void AddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + AddRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::phi::dtype::bfloat16; + +PD_REGISTER_KERNEL(add_raw, + CPU, + ALL_LAYOUT, + phi::AddRawKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128) {} + +PD_REGISTER_KERNEL(add, + CPU, + ALL_LAYOUT, + phi::AddKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128) {} diff --git a/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc new file mode 100644 index 0000000000..b6541ec0e6 --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_divide_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/cpu/elementwise_grad.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void DivideGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + funcs::ElementwiseGradPreProcess(dout, dx); + phi::funcs::ElemwiseGradCompute, DivGradDY>( + dev_ctx, x, y, out, dout, axis, dx, dy, DivGradDX(), DivGradDY()); +} + +} // namespace phi + +PD_REGISTER_KERNEL(divide_grad, + CPU, + ALL_LAYOUT, + phi::DivideGradKernel, + float, + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(divide_double_grad, + CPU, + ALL_LAYOUT, + phi::DivideDoubleGradKernel, + float, + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc b/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc new file mode 100644 index 0000000000..d380621818 --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/cpu/elementwise.h" +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +template +void DivideRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out) { + // allocate memory for out + dev_ctx.template Alloc(out); + if (x.dims() == y.dims() && std::is_floating_point::value) { + SameDimsElementwiseCompute>()( + dev_ctx, x, y, out); + } else { + auto x_dims = x.dims(); + auto y_dims = y.dims(); + if (x_dims.size() >= y_dims.size()) { + funcs::ElementwiseCompute, T>( + dev_ctx, x, y, axis, funcs::DivideFunctor(), out); + } else { + funcs::ElementwiseCompute, T>( + dev_ctx, x, y, axis, funcs::InverseDivideFunctor(), out); + } + } +} + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + DivideRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::phi::dtype::bfloat16; + +PD_REGISTER_KERNEL(divide_raw, + CPU, + ALL_LAYOUT, + phi::DivideRawKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} +PD_REGISTER_KERNEL(divide, + CPU, + ALL_LAYOUT, + phi::DivideKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} diff --git a/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc index f452d9ffb7..3f5e0b8a4d 100644 --- a/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc @@ -23,118 +23,6 @@ namespace phi { -template -void AddGradFunc(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dout, - DenseTensor* dx, - DenseTensor* dy, - int axis = -1) { - if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { - ElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy); - } else { - ElemwiseExplicitGradCompute, IdentityGrad>( - dev_ctx, - x, - y, - out, - dout, - axis, - dx, - dy, - IdentityGrad(), - IdentityGrad()); - } -} - -template -void AddGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - phi::AddGradImpl(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc); -} - -template -void AddDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - const DenseTensor& dout, - paddle::optional ddx, - paddle::optional ddy, - int axis, - DenseTensor* ddout) { - phi::AddDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); -} - -template -void AddTripleGradKernel(const Context& dev_ctx, - const DenseTensor& ddx, - const DenseTensor& ddy, - const DenseTensor& d_ddout, - int axis, - DenseTensor* d_ddx, - DenseTensor* d_ddy) { - phi::AddGradImpl( - dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc); -} - -template -void SubtractGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - // skip out - auto* out = &dout; - ElementwiseSubGrad(dev_ctx, x, y, *out, dout, dx, dy, axis); -} - -template -void SubtractDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - paddle::optional ddx, - paddle::optional ddy, - const DenseTensor& dout, - int axis, - DenseTensor* ddout) { - phi::SubtractDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); -} - -template -void DivideGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - funcs::ElementwiseGradPreProcess(dout, dx); - phi::funcs::ElemwiseGradCompute, DivGradDY>( - dev_ctx, x, y, out, dout, axis, dx, dy, DivGradDX(), DivGradDY()); -} - -template -void MultiplyGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - funcs::ElementwiseGradPreProcess(dout, dx); - auto* out = &dout; // out is not necessary - phi::funcs::ElemwiseGradCompute, MulGradDY>( - dev_ctx, x, y, *out, dout, axis, dx, dy, MulGradDX(), MulGradDY()); -} - template void MaximumGradKernel(const Context& dev_ctx, const DenseTensor& x, @@ -163,129 +51,6 @@ void MinimumGradKernel(const Context& dev_ctx, } // namespace phi -PD_REGISTER_KERNEL(add_grad, - CPU, - ALL_LAYOUT, - phi::AddGradKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(add_double_grad, - CPU, - ALL_LAYOUT, - phi::AddDoubleGradKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(add_triple_grad, - CPU, - ALL_LAYOUT, - phi::AddTripleGradKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(subtract_grad, - CPU, - ALL_LAYOUT, - phi::SubtractGradKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(subtract_double_grad, - CPU, - ALL_LAYOUT, - phi::SubtractDoubleGradKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(divide_grad, - CPU, - ALL_LAYOUT, - phi::DivideGradKernel, - float, - double, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(divide_double_grad, - CPU, - ALL_LAYOUT, - phi::DivideDoubleGradKernel, - float, - double, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_grad, - CPU, - ALL_LAYOUT, - phi::MultiplyGradKernel, - float, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_double_grad, - CPU, - ALL_LAYOUT, - phi::MultiplyDoubleGradKernel, - float, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_triple_grad, - CPU, - ALL_LAYOUT, - phi::MultiplyTripleGradKernel, - float, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - PD_REGISTER_KERNEL(fmax_grad, CPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/cpu/elementwise_kernel.cc b/paddle/phi/kernels/cpu/elementwise_kernel.cc index a91ca1ee32..7478f69d91 100644 --- a/paddle/phi/kernels/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_kernel.cc @@ -21,54 +21,6 @@ namespace phi { -#define DEFINE_CPU_ELEMENTWISE_OP(name) \ - template \ - void name##RawKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - dev_ctx.template Alloc(out); \ - if (x.dims() == y.dims()) { \ - SameDimsElementwiseCompute>()( \ - dev_ctx, x, y, out); \ - } else { \ - auto x_dims = x.dims(); \ - auto y_dims = y.dims(); \ - if (x_dims.size() >= y_dims.size()) { \ - funcs::ElementwiseCompute, T>( \ - dev_ctx, x, y, axis, funcs::name##Functor(), out); \ - } else { \ - funcs::ElementwiseCompute, T>( \ - dev_ctx, x, y, axis, funcs::Inverse##name##Functor(), out); \ - } \ - } \ - } - -template -void DivideRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - dev_ctx.template Alloc(out); - if (x.dims() == y.dims() && std::is_floating_point::value) { - SameDimsElementwiseCompute>()( - dev_ctx, x, y, out); - } else { - auto x_dims = x.dims(); - auto y_dims = y.dims(); - if (x_dims.size() >= y_dims.size()) { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::DivideFunctor(), out); - } else { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::InverseDivideFunctor(), out); - } - } -} - template void MaximumRawKernel(const Context& dev_ctx, const DenseTensor& x, @@ -142,14 +94,6 @@ void ElementwisePowRawKernel(const Context& dev_ctx, funcs::ElementwiseCompute, T>( dev_ctx, x, y, axis, funcs::ElementwisePowFunctor(), out); } -// Create the definition of Add -DEFINE_CPU_ELEMENTWISE_OP(Add) - -// Create the definition of Subtract -DEFINE_CPU_ELEMENTWISE_OP(Subtract) - -// Create the definition of Multiply -DEFINE_CPU_ELEMENTWISE_OP(Multiply) } // namespace phi @@ -165,51 +109,6 @@ PD_REGISTER_KERNEL( PD_REGISTER_KERNEL( fmin, CPU, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {} -PD_REGISTER_KERNEL(add_raw, - CPU, - ALL_LAYOUT, - phi::AddRawKernel, - float, - double, - int16_t, - int, - int64_t, - complex64, - complex128) {} -PD_REGISTER_KERNEL(subtract_raw, - CPU, - ALL_LAYOUT, - phi::SubtractRawKernel, - float, - double, - int16_t, - int, - int64_t, - complex64, - complex128, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(divide_raw, - CPU, - ALL_LAYOUT, - phi::DivideRawKernel, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PD_REGISTER_KERNEL(multiply_raw, - CPU, - ALL_LAYOUT, - phi::MultiplyRawKernel, - float, - double, - int, - int64_t, - bool, - complex64, - complex128, - phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(maximum_raw, CPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc new file mode 100644 index 0000000000..6055541c80 --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_multiply_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/cpu/elementwise_grad.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void MultiplyGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + funcs::ElementwiseGradPreProcess(dout, dx); + auto* out = &dout; // out is not necessary + phi::funcs::ElemwiseGradCompute, MulGradDY>( + dev_ctx, x, y, *out, dout, axis, dx, dy, MulGradDX(), MulGradDY()); +} + +} // namespace phi + +PD_REGISTER_KERNEL(multiply_grad, + CPU, + ALL_LAYOUT, + phi::MultiplyGradKernel, + float, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(multiply_double_grad, + CPU, + ALL_LAYOUT, + phi::MultiplyDoubleGradKernel, + float, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(multiply_triple_grad, + CPU, + ALL_LAYOUT, + phi::MultiplyTripleGradKernel, + float, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc b/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc new file mode 100644 index 0000000000..2424a53301 --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/cpu/elementwise.h" +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Multiply +DEFINE_CPU_ELEMENTWISE_OP(Multiply) + +template +void MultiplyKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + MultiplyRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::phi::dtype::bfloat16; + +PD_REGISTER_KERNEL(multiply_raw, + CPU, + ALL_LAYOUT, + phi::MultiplyRawKernel, + float, + double, + int, + int64_t, + bool, + complex64, + complex128, + phi::dtype::bfloat16) {} + +PD_REGISTER_KERNEL(multiply, + CPU, + ALL_LAYOUT, + phi::MultiplyKernel, + float, + double, + int, + int64_t, + bool, + complex64, + complex128, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc new file mode 100644 index 0000000000..c785eacb9a --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_subtract_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/elementwise_grad.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void SubtractGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + // skip out + auto* out = &dout; + ElementwiseSubGrad(dev_ctx, x, y, *out, dout, dx, dy, axis); +} + +template +void SubtractDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + paddle::optional ddx, + paddle::optional ddy, + const DenseTensor& dout, + int axis, + DenseTensor* ddout) { + phi::SubtractDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); +} + +} // namespace phi + +PD_REGISTER_KERNEL(subtract_grad, + CPU, + ALL_LAYOUT, + phi::SubtractGradKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(subtract_double_grad, + CPU, + ALL_LAYOUT, + phi::SubtractDoubleGradKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc b/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc new file mode 100644 index 0000000000..0e97852ac3 --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/cpu/elementwise.h" +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Subtract +DEFINE_CPU_ELEMENTWISE_OP(Subtract) + +template +void SubtractKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + SubtractRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::phi::dtype::bfloat16; + +PD_REGISTER_KERNEL(subtract_raw, + CPU, + ALL_LAYOUT, + phi::SubtractRawKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(subtract, + CPU, + ALL_LAYOUT, + phi::SubtractKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc b/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc index 77c7631710..3bfc07319e 100644 --- a/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc +++ b/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc @@ -17,7 +17,7 @@ #include #include #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/compare_functors.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/elementwise_add_grad_kernel.h b/paddle/phi/kernels/elementwise_add_grad_kernel.h new file mode 100644 index 0000000000..9b754cfefe --- /dev/null +++ b/paddle/phi/kernels/elementwise_add_grad_kernel.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/utils/optional.h" + +namespace phi { + +template +void AddGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy); + +template +void AddDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + const DenseTensor& dout, + paddle::optional ddx, + paddle::optional ddy, + int axis, + DenseTensor* ddout); + +template +void AddTripleGradKernel(const Context& dev_ctx, + const DenseTensor& ddx, + const DenseTensor& ddy, + const DenseTensor& d_ddout, + int axis, + DenseTensor* d_ddx, + DenseTensor* d_ddy); + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_add_kernel.h b/paddle/phi/kernels/elementwise_add_kernel.h new file mode 100644 index 0000000000..3245c450aa --- /dev/null +++ b/paddle/phi/kernels/elementwise_add_kernel.h @@ -0,0 +1,45 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/infermeta/binary.h" + +namespace phi { +template +void AddRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void AddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +DenseTensor Add(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + AddKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_divide_grad_kernel.h b/paddle/phi/kernels/elementwise_divide_grad_kernel.h new file mode 100644 index 0000000000..6d29dae99a --- /dev/null +++ b/paddle/phi/kernels/elementwise_divide_grad_kernel.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/utils/optional.h" + +namespace phi { + +template +void DivideGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy); + +template +void DivideDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dx, + paddle::optional ddx, + paddle::optional ddy, + int axis, + DenseTensor* dy, + DenseTensor* dout, + DenseTensor* ddout); + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_divide_kernel.h b/paddle/phi/kernels/elementwise_divide_kernel.h new file mode 100644 index 0000000000..5555b69fde --- /dev/null +++ b/paddle/phi/kernels/elementwise_divide_kernel.h @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/infermeta/binary.h" + +namespace phi { + +template +void DivideRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +DenseTensor Divide(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + DivideKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_grad_kernel.h b/paddle/phi/kernels/elementwise_grad_kernel.h index 0e730fbfbf..6f2f2915ec 100644 --- a/paddle/phi/kernels/elementwise_grad_kernel.h +++ b/paddle/phi/kernels/elementwise_grad_kernel.h @@ -19,111 +19,6 @@ limitations under the License. */ namespace phi { -template -void AddGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy); - -template -void AddDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - const DenseTensor& dout, - paddle::optional ddx, - paddle::optional ddy, - int axis, - DenseTensor* ddout); - -template -void AddTripleGradKernel(const Context& dev_ctx, - const DenseTensor& ddx, - const DenseTensor& ddy, - const DenseTensor& d_ddout, - int axis, - DenseTensor* d_ddx, - DenseTensor* d_ddy); - -template -void SubtractGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy); - -template -void SubtractDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - paddle::optional ddx, - paddle::optional ddy, - const DenseTensor& dout, - int axis, - DenseTensor* ddout); - -template -void DivideGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy); - -template -void DivideDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dx, - paddle::optional ddx, - paddle::optional ddy, - int axis, - DenseTensor* dy, - DenseTensor* dout, - DenseTensor* ddout); - -template -void MultiplyGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy); - -template -void MultiplyDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - paddle::optional ddx, - paddle::optional ddy, - int axis, - DenseTensor* dx, - DenseTensor* dy, - DenseTensor* ddout); - -template -void MultiplyTripleGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - paddle::optional ddx, - paddle::optional ddy, - const DenseTensor& d_dx, - const DenseTensor& d_dy, - paddle::optional d_ddout, - int axis, - DenseTensor* d_x, - DenseTensor* d_y, - DenseTensor* d_dout, - DenseTensor* d_ddx, - DenseTensor* d_ddy); - template void ElementwiseFMaxGradKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/elementwise_kernel.cc b/paddle/phi/kernels/elementwise_kernel.cc index 6cd602e47b..4cee24d2f8 100644 --- a/paddle/phi/kernels/elementwise_kernel.cc +++ b/paddle/phi/kernels/elementwise_kernel.cc @@ -19,42 +19,6 @@ namespace phi { -template -void AddKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - AddRawKernel(dev_ctx, x, y, axis, out); -} - -template -void SubtractKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - SubtractRawKernel(dev_ctx, x, y, axis, out); -} - -template -void DivideKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - DivideRawKernel(dev_ctx, x, y, axis, out); -} - -template -void MultiplyKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - MultiplyRawKernel(dev_ctx, x, y, axis, out); -} - template void MaximumKernel(const Context& dev_ctx, const DenseTensor& x, @@ -105,51 +69,6 @@ void ElementwisePowKernel(const Context& dev_ctx, using complex64 = ::phi::dtype::complex; using complex128 = ::phi::dtype::complex; -PD_REGISTER_KERNEL(add, - CPU, - ALL_LAYOUT, - phi::AddKernel, - float, - double, - int16_t, - int, - int64_t, - complex64, - complex128) {} -PD_REGISTER_KERNEL(subtract, - CPU, - ALL_LAYOUT, - phi::SubtractKernel, - float, - double, - int16_t, - int, - int64_t, - complex64, - complex128, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(divide, - CPU, - ALL_LAYOUT, - phi::DivideKernel, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PD_REGISTER_KERNEL(multiply, - CPU, - ALL_LAYOUT, - phi::MultiplyKernel, - float, - double, - int, - int64_t, - bool, - complex64, - complex128, - phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(maximum, CPU, ALL_LAYOUT, @@ -183,57 +102,6 @@ PD_REGISTER_KERNEL(elementwise_pow, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_REGISTER_KERNEL(add, - GPU, - ALL_LAYOUT, - phi::AddKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(subtract, - GPU, - ALL_LAYOUT, - phi::SubtractKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::float16, - complex64, - complex128, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(divide, - GPU, - ALL_LAYOUT, - phi::DivideKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(multiply, - GPU, - ALL_LAYOUT, - phi::MultiplyKernel, - float, - double, - int, - int64_t, - bool, - phi::dtype::float16, - phi::dtype::bfloat16, - complex64, - complex128) {} PD_REGISTER_KERNEL(maximum, GPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/elementwise_kernel.h b/paddle/phi/kernels/elementwise_kernel.h index 09b6b02e37..37fe895d40 100644 --- a/paddle/phi/kernels/elementwise_kernel.h +++ b/paddle/phi/kernels/elementwise_kernel.h @@ -33,58 +33,6 @@ void FMinKernel(const Context& dev_ctx, int axis, DenseTensor* out); -template -void AddRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void AddKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - -template -void SubtractRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void SubtractKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - -template -void DivideRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void DivideKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - -template -void MultiplyRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void MultiplyKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - template void MaximumRawKernel(const Context& dev_ctx, const DenseTensor& x, @@ -150,50 +98,6 @@ void ElementwisePowKernel(const Context& dev_ctx, const DenseTensor& y, DenseTensor* out); -template -DenseTensor Add(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - AddKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - -template -DenseTensor Subtract(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - SubtractKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - -template -DenseTensor Divide(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - DivideKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - -template -DenseTensor Multiply(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - MultiplyKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - template DenseTensor Maximum(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/elementwise_multiply_grad_kernel.h b/paddle/phi/kernels/elementwise_multiply_grad_kernel.h new file mode 100644 index 0000000000..517948a50d --- /dev/null +++ b/paddle/phi/kernels/elementwise_multiply_grad_kernel.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/utils/optional.h" + +namespace phi { + +template +void MultiplyGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy); + +template +void MultiplyDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + paddle::optional ddx, + paddle::optional ddy, + int axis, + DenseTensor* dx, + DenseTensor* dy, + DenseTensor* ddout); + +template +void MultiplyTripleGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + paddle::optional ddx, + paddle::optional ddy, + const DenseTensor& d_dx, + const DenseTensor& d_dy, + paddle::optional d_ddout, + int axis, + DenseTensor* d_x, + DenseTensor* d_y, + DenseTensor* d_dout, + DenseTensor* d_ddx, + DenseTensor* d_ddy); + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_multiply_kernel.h b/paddle/phi/kernels/elementwise_multiply_kernel.h new file mode 100644 index 0000000000..608ae95d2b --- /dev/null +++ b/paddle/phi/kernels/elementwise_multiply_kernel.h @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/infermeta/binary.h" + +namespace phi { + +template +void MultiplyRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void MultiplyKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +DenseTensor Multiply(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + MultiplyKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_subtract_grad_kernel.h b/paddle/phi/kernels/elementwise_subtract_grad_kernel.h new file mode 100644 index 0000000000..7be91b4b9f --- /dev/null +++ b/paddle/phi/kernels/elementwise_subtract_grad_kernel.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/utils/optional.h" + +namespace phi { +template +void SubtractGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy); + +template +void SubtractDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + paddle::optional ddx, + paddle::optional ddy, + const DenseTensor& dout, + int axis, + DenseTensor* ddout); + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_subtract_kernel.h b/paddle/phi/kernels/elementwise_subtract_kernel.h new file mode 100644 index 0000000000..1f6c4383df --- /dev/null +++ b/paddle/phi/kernels/elementwise_subtract_kernel.h @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/infermeta/binary.h" + +namespace phi { + +template +void SubtractRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void SubtractKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +DenseTensor Subtract(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + SubtractKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +} // namespace phi diff --git a/paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu new file mode 100644 index 0000000000..8dd4d0184c --- /dev/null +++ b/paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu @@ -0,0 +1,118 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_add_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/gpu/elementwise_grad.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void AddGradFunc(const GPUContext& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx, + DenseTensor* dy, + int axis = -1) { + if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { + ElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy); + } else { + DefaultElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy, axis); + } +} + +template +void AddGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + phi::AddGradImpl(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc); +} + +template +void AddDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + const DenseTensor& dout, + paddle::optional ddx, + paddle::optional ddy, + int axis, + DenseTensor* ddout) { + phi::AddDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); +} + +template +void AddTripleGradKernel(const Context& dev_ctx, + const DenseTensor& ddx, + const DenseTensor& ddy, + const DenseTensor& d_ddout, + int axis, + DenseTensor* d_ddx, + DenseTensor* d_ddy) { + phi::AddGradImpl( + dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc); +} + +} // namespace phi + +PD_REGISTER_KERNEL(add_grad, + GPU, + ALL_LAYOUT, + phi::AddGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(add_double_grad, + GPU, + ALL_LAYOUT, + phi::AddDoubleGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(add_triple_grad, + GPU, + ALL_LAYOUT, + phi::AddTripleGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/elementwise_divide_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_divide_grad_kernel.cu new file mode 100644 index 0000000000..57bf6da406 --- /dev/null +++ b/paddle/phi/kernels/gpu/elementwise_divide_grad_kernel.cu @@ -0,0 +1,86 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_divide_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/gpu/elementwise_grad.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void DivideGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + const auto place = dev_ctx.GetPlace(); + if (dx != nullptr && dy != nullptr) { + std::vector ins = {&dout, &out, &y}; + GetGradXAndYOut( + dev_ctx, + place, + axis, + ins, + dout, + dx, + dy, + funcs::DivGradXYFunctor()); + } else if (dx != nullptr && dy == nullptr) { + std::vector ins = {&dout, &y}; + GetGradXOrYOut( + dev_ctx, place, axis, ins, dout, dx, funcs::DivGradXFunctor()); + } else if (dy != nullptr && dx == nullptr) { + std::vector ins = {&dout, &out, &y}; + GetGradXOrYOut( + dev_ctx, place, axis, ins, dout, dy, funcs::DivGradYFunctor()); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(divide_grad, + GPU, + ALL_LAYOUT, + phi::DivideGradKernel, + float, + phi::dtype::float16, + phi::dtype::bfloat16, + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(divide_double_grad, + GPU, + ALL_LAYOUT, + phi::DivideDoubleGradKernel, + float, + phi::dtype::float16, + phi::dtype::bfloat16, + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu index fae7978d3d..c814e7b3bb 100644 --- a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu @@ -26,128 +26,6 @@ namespace phi { -template -void AddGradFunc(const GPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dout, - DenseTensor* dx, - DenseTensor* dy, - int axis = -1) { - if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { - ElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy); - } else { - DefaultElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy, axis); - } -} - -template -void AddGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - phi::AddGradImpl(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc); -} - -template -void AddDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - const DenseTensor& dout, - paddle::optional ddx, - paddle::optional ddy, - int axis, - DenseTensor* ddout) { - phi::AddDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); -} - -template -void AddTripleGradKernel(const Context& dev_ctx, - const DenseTensor& ddx, - const DenseTensor& ddy, - const DenseTensor& d_ddout, - int axis, - DenseTensor* d_ddx, - DenseTensor* d_ddy) { - phi::AddGradImpl( - dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc); -} - -template -void SubtractGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - // skip out - auto* out = &dout; - if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { - elementwise_sub_grad(dev_ctx, x, y, *out, dout, dx, dy); - } else { - default_elementwise_sub_grad(dev_ctx, x, y, *out, dout, dx, dy, axis); - } -} - -template -void SubtractDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - paddle::optional ddx, - paddle::optional ddy, - const DenseTensor& dout, - int axis, - DenseTensor* ddout) { - phi::SubtractDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); -} - -template -void DivideGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - const auto place = dev_ctx.GetPlace(); - if (dx != nullptr && dy != nullptr) { - std::vector ins = {&dout, &out, &y}; - GetGradXAndYOut( - dev_ctx, - place, - axis, - ins, - dout, - dx, - dy, - funcs::DivGradXYFunctor()); - } else if (dx != nullptr && dy == nullptr) { - std::vector ins = {&dout, &y}; - GetGradXOrYOut( - dev_ctx, place, axis, ins, dout, dx, funcs::DivGradXFunctor()); - } else if (dy != nullptr && dx == nullptr) { - std::vector ins = {&dout, &out, &y}; - GetGradXOrYOut( - dev_ctx, place, axis, ins, dout, dy, funcs::DivGradYFunctor()); - } -} - -template -void MultiplyGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - funcs::ElementwiseGradPreProcess(dout, dx); - ElementwiseMulGrad(dev_ctx, x, y, dout, dx, dy, axis); -} - template void MaximumGradKernel(const Context& dev_ctx, const DenseTensor& x, @@ -211,138 +89,6 @@ void MinimumGradKernel(const Context& dev_ctx, } } // namespace phi -PD_REGISTER_KERNEL(add_grad, - GPU, - ALL_LAYOUT, - phi::AddGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(add_double_grad, - GPU, - ALL_LAYOUT, - phi::AddDoubleGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(add_triple_grad, - GPU, - ALL_LAYOUT, - phi::AddTripleGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(subtract_grad, - GPU, - ALL_LAYOUT, - phi::SubtractGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(subtract_double_grad, - GPU, - ALL_LAYOUT, - phi::SubtractDoubleGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(divide_grad, - GPU, - ALL_LAYOUT, - phi::DivideGradKernel, - float, - phi::dtype::float16, - phi::dtype::bfloat16, - double, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(divide_double_grad, - GPU, - ALL_LAYOUT, - phi::DivideDoubleGradKernel, - float, - phi::dtype::float16, - phi::dtype::bfloat16, - double, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_grad, - GPU, - ALL_LAYOUT, - phi::MultiplyGradKernel, - float, - phi::dtype::float16, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_double_grad, - GPU, - ALL_LAYOUT, - phi::MultiplyDoubleGradKernel, - float, - phi::dtype::float16, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_triple_grad, - GPU, - ALL_LAYOUT, - phi::MultiplyTripleGradKernel, - float, - phi::dtype::float16, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} PD_REGISTER_KERNEL(fmax_grad, GPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/gpu/elementwise_multiply_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_multiply_grad_kernel.cu new file mode 100644 index 0000000000..3442d7f028 --- /dev/null +++ b/paddle/phi/kernels/gpu/elementwise_multiply_grad_kernel.cu @@ -0,0 +1,82 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_multiply_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/gpu/elementwise_grad.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void MultiplyGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + funcs::ElementwiseGradPreProcess(dout, dx); + ElementwiseMulGrad(dev_ctx, x, y, dout, dx, dy, axis); +} + +} // namespace phi + +PD_REGISTER_KERNEL(multiply_grad, + GPU, + ALL_LAYOUT, + phi::MultiplyGradKernel, + float, + phi::dtype::float16, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(multiply_double_grad, + GPU, + ALL_LAYOUT, + phi::MultiplyDoubleGradKernel, + float, + phi::dtype::float16, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(multiply_triple_grad, + GPU, + ALL_LAYOUT, + phi::MultiplyTripleGradKernel, + float, + phi::dtype::float16, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu new file mode 100644 index 0000000000..20f3b73e40 --- /dev/null +++ b/paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu @@ -0,0 +1,83 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/gpu/elementwise_grad.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void SubtractGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + // skip out + auto* out = &dout; + if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { + elementwise_sub_grad(dev_ctx, x, y, *out, dout, dx, dy); + } else { + default_elementwise_sub_grad(dev_ctx, x, y, *out, dout, dx, dy, axis); + } +} + +template +void SubtractDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + paddle::optional ddx, + paddle::optional ddy, + const DenseTensor& dout, + int axis, + DenseTensor* ddout) { + phi::SubtractDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); +} + +} // namespace phi + +PD_REGISTER_KERNEL(subtract_grad, + GPU, + ALL_LAYOUT, + phi::SubtractGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(subtract_double_grad, + GPU, + ALL_LAYOUT, + phi::SubtractDoubleGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu b/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu index be6cdc7825..84768866cc 100644 --- a/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu +++ b/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu @@ -23,7 +23,7 @@ #include "paddle/phi/backends/dynload/cusolver.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/abs_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/compare_functors.h" diff --git a/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h b/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h index 8c37091ef1..371644e643 100644 --- a/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h @@ -19,7 +19,7 @@ #include "paddle/phi/kernels/cholesky_solve_kernel.h" #include "paddle/phi/kernels/complex_kernel.h" #include "paddle/phi/kernels/copy_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_add_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/expand_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h index e4356e9af3..ab1c33d50a 100644 --- a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h @@ -17,7 +17,7 @@ #include "paddle/phi/kernels/determinant_grad_kernel.h" #include "paddle/phi/kernels/copy_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h b/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h index 5e06435b28..f39786fff2 100644 --- a/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h @@ -16,7 +16,9 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/complex_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_divide_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" +#include "paddle/phi/kernels/elementwise_subtract_kernel.h" #include "paddle/phi/kernels/funcs/diag_functor.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/impl/elementwise_kernel_impl.h b/paddle/phi/kernels/impl/elementwise_kernel_impl.h index d5c2c559b2..b126ca9b84 100644 --- a/paddle/phi/kernels/impl/elementwise_kernel_impl.h +++ b/paddle/phi/kernels/impl/elementwise_kernel_impl.h @@ -22,6 +22,48 @@ #endif namespace phi { + +#define DEFINE_CPU_ELEMENTWISE_OP(name) \ + template \ + void name##RawKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + dev_ctx.template Alloc(out); \ + if (x.dims() == y.dims()) { \ + SameDimsElementwiseCompute>()( \ + dev_ctx, x, y, out); \ + } else { \ + auto x_dims = x.dims(); \ + auto y_dims = y.dims(); \ + if (x_dims.size() >= y_dims.size()) { \ + funcs::ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, funcs::name##Functor(), out); \ + } else { \ + funcs::ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, funcs::Inverse##name##Functor(), out); \ + } \ + } \ + } + +#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ + template \ + void name##RawKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + std::vector inputs; \ + std::vector outputs; \ + inputs.emplace_back(&x); \ + inputs.emplace_back(&y); \ + outputs.emplace_back(out); \ + dev_ctx.template Alloc(out); \ + funcs::BroadcastKernel( \ + dev_ctx, inputs, &outputs, axis, funcs::name##Functor()); \ + } + template void FMaxKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/kps/elementwise_add_kernel.cu b/paddle/phi/kernels/kps/elementwise_add_kernel.cu new file mode 100644 index 0000000000..b5532c6143 --- /dev/null +++ b/paddle/phi/kernels/kps/elementwise_add_kernel.cu @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#ifndef PADDLE_WITH_XPU_KP +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#endif +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +DEFINE_CUDA_ELEMENTWISE_OP(Add) + +template +void AddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + AddRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +#ifdef PADDLE_WITH_XPU_KP +PD_REGISTER_KERNEL(add_raw, KPS, ALL_LAYOUT, phi::AddRawKernel, float) {} +#else + +using float16 = phi::dtype::float16; +using bfloat16 = phi::dtype::bfloat16; +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +PD_REGISTER_KERNEL(add_raw, + KPS, + ALL_LAYOUT, + phi::AddRawKernel, + float, + double, + int16_t, + int, + int64_t, + float16, + bfloat16, + complex64, + complex128) {} +PD_REGISTER_KERNEL(add, + KPS, + ALL_LAYOUT, + phi::AddKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + complex64, + complex128) {} +#endif diff --git a/paddle/phi/kernels/kps/elementwise_divide_kernel.cu b/paddle/phi/kernels/kps/elementwise_divide_kernel.cu new file mode 100644 index 0000000000..852babe29d --- /dev/null +++ b/paddle/phi/kernels/kps/elementwise_divide_kernel.cu @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#ifndef PADDLE_WITH_XPU_KP +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#endif +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Divide +DEFINE_CUDA_ELEMENTWISE_OP(Divide) + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + DivideRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +#ifdef PADDLE_WITH_XPU_KP +PD_REGISTER_KERNEL(divide_raw, KPS, ALL_LAYOUT, phi::DivideRawKernel, float) {} +#else + +using float16 = phi::dtype::float16; +using bfloat16 = phi::dtype::bfloat16; +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +PD_REGISTER_KERNEL(divide_raw, + KPS, + ALL_LAYOUT, + phi::DivideRawKernel, + float, + double, + int, + int64_t, + float16, + bfloat16, + complex64, + complex128) {} + +PD_REGISTER_KERNEL(divide, + KPS, + ALL_LAYOUT, + phi::DivideKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + complex64, + complex128) {} +#endif diff --git a/paddle/phi/kernels/kps/elementwise_kernel.cu b/paddle/phi/kernels/kps/elementwise_kernel.cu index 01a34c0f85..5ccd3b1a48 100644 --- a/paddle/phi/kernels/kps/elementwise_kernel.cu +++ b/paddle/phi/kernels/kps/elementwise_kernel.cu @@ -22,34 +22,6 @@ namespace phi { -#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ - template \ - void name##RawKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - std::vector inputs; \ - std::vector outputs; \ - inputs.emplace_back(&x); \ - inputs.emplace_back(&y); \ - outputs.emplace_back(out); \ - dev_ctx.template Alloc(out); \ - funcs::BroadcastKernel( \ - dev_ctx, inputs, &outputs, axis, funcs::name##Functor()); \ - } - -/** - * Kernels - */ -// Create the definition of Add -DEFINE_CUDA_ELEMENTWISE_OP(Add) -// Create the definition of Subtract -DEFINE_CUDA_ELEMENTWISE_OP(Subtract) -// Create the definition of Multiply -DEFINE_CUDA_ELEMENTWISE_OP(Multiply) -// Create the definition of Divide -DEFINE_CUDA_ELEMENTWISE_OP(Divide) // Create the definition of Maximum DEFINE_CUDA_ELEMENTWISE_OP(Maximum) // Create the definition of Minimum @@ -64,12 +36,6 @@ DEFINE_CUDA_ELEMENTWISE_OP(ElementwisePow) } // namespace phi #ifdef PADDLE_WITH_XPU_KP -PD_REGISTER_KERNEL(add_raw, KPS, ALL_LAYOUT, phi::AddRawKernel, float) {} -PD_REGISTER_KERNEL( - subtract_raw, KPS, ALL_LAYOUT, phi::SubtractRawKernel, float) {} -PD_REGISTER_KERNEL(divide_raw, KPS, ALL_LAYOUT, phi::DivideRawKernel, float) {} -PD_REGISTER_KERNEL( - multiply_raw, KPS, ALL_LAYOUT, phi::MultiplyRawKernel, float) {} PD_REGISTER_KERNEL(maximum_raw, KPS, ALL_LAYOUT, phi::MaximumRawKernel, float) { } PD_REGISTER_KERNEL(minimum_raw, KPS, ALL_LAYOUT, phi::MinimumRawKernel, float) { @@ -89,57 +55,6 @@ PD_REGISTER_KERNEL( PD_REGISTER_KERNEL( fmin, KPS, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {} -PD_REGISTER_KERNEL(add_raw, - KPS, - ALL_LAYOUT, - phi::AddRawKernel, - float, - double, - int16_t, - int, - int64_t, - float16, - bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(subtract_raw, - KPS, - ALL_LAYOUT, - phi::SubtractRawKernel, - float, - double, - int16_t, - int, - int64_t, - float16, - bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(divide_raw, - KPS, - ALL_LAYOUT, - phi::DivideRawKernel, - float, - double, - int, - int64_t, - float16, - bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(multiply_raw, - KPS, - ALL_LAYOUT, - phi::MultiplyRawKernel, - float, - double, - int, - int64_t, - bool, - float16, - complex64, - complex128, - bfloat16) {} PD_REGISTER_KERNEL(maximum_raw, KPS, ALL_LAYOUT, diff --git a/paddle/phi/kernels/kps/elementwise_multiply_kernel.cu b/paddle/phi/kernels/kps/elementwise_multiply_kernel.cu new file mode 100644 index 0000000000..8bede0198c --- /dev/null +++ b/paddle/phi/kernels/kps/elementwise_multiply_kernel.cu @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#ifndef PADDLE_WITH_XPU_KP +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#endif +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Multiply +DEFINE_CUDA_ELEMENTWISE_OP(Multiply) + +template +void MultiplyKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + MultiplyRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +#ifdef PADDLE_WITH_XPU_KP +PD_REGISTER_KERNEL( + multiply_raw, KPS, ALL_LAYOUT, phi::MultiplyRawKernel, float) {} +#else + +using float16 = phi::dtype::float16; +using bfloat16 = phi::dtype::bfloat16; +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +PD_REGISTER_KERNEL(multiply_raw, + KPS, + ALL_LAYOUT, + phi::MultiplyRawKernel, + float, + double, + int, + int64_t, + bool, + float16, + complex64, + complex128, + bfloat16) {} +PD_REGISTER_KERNEL(multiply, + KPS, + ALL_LAYOUT, + phi::MultiplyKernel, + float, + double, + int, + int64_t, + bool, + phi::dtype::float16, + phi::dtype::bfloat16, + complex64, + complex128) {} +#endif diff --git a/paddle/phi/kernels/kps/elementwise_subtract_kernel.cu b/paddle/phi/kernels/kps/elementwise_subtract_kernel.cu new file mode 100644 index 0000000000..757dedb99c --- /dev/null +++ b/paddle/phi/kernels/kps/elementwise_subtract_kernel.cu @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#ifndef PADDLE_WITH_XPU_KP +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#endif +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Subtract +DEFINE_CUDA_ELEMENTWISE_OP(Subtract) + +template +void SubtractKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + SubtractRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +#ifdef PADDLE_WITH_XPU_KP +PD_REGISTER_KERNEL( + subtract_raw, KPS, ALL_LAYOUT, phi::SubtractRawKernel, float) {} +#else + +using float16 = phi::dtype::float16; +using bfloat16 = phi::dtype::bfloat16; +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +PD_REGISTER_KERNEL(subtract_raw, + KPS, + ALL_LAYOUT, + phi::SubtractRawKernel, + float, + double, + int16_t, + int, + int64_t, + float16, + bfloat16, + complex64, + complex128) {} +PD_REGISTER_KERNEL(subtract, + KPS, + ALL_LAYOUT, + phi::SubtractKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::float16, + complex64, + complex128, + phi::dtype::bfloat16) {} +#endif diff --git a/paddle/phi/kernels/selected_rows/elementwise_kernel.cc b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc similarity index 96% rename from paddle/phi/kernels/selected_rows/elementwise_kernel.cc rename to paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc index 7fba3244a6..9fe8eef7ec 100644 --- a/paddle/phi/kernels/selected_rows/elementwise_kernel.cc +++ b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/phi/kernels/selected_rows/elementwise_kernel.h" +#include "paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/bfloat16.h" @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" namespace phi { namespace sr { diff --git a/paddle/phi/kernels/selected_rows/elementwise_kernel.h b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h similarity index 100% rename from paddle/phi/kernels/selected_rows/elementwise_kernel.h rename to paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h diff --git a/paddle/phi/tests/api/test_elementwise_api.cc b/paddle/phi/tests/api/test_elementwise_api.cc index d4013a788c..fb4c68a87c 100644 --- a/paddle/phi/tests/api/test_elementwise_api.cc +++ b/paddle/phi/tests/api/test_elementwise_api.cc @@ -22,6 +22,9 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(subtract, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(multiply, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(divide, CPU, ALL_LAYOUT); namespace paddle { namespace tests { diff --git a/paddle/phi/tests/kernels/test_elementwise_dev_api.cc b/paddle/phi/tests/kernels/test_elementwise_dev_api.cc index 9552c02976..36b200d4d4 100644 --- a/paddle/phi/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/phi/tests/kernels/test_elementwise_dev_api.cc @@ -16,7 +16,10 @@ limitations under the License. */ #include #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_add_kernel.h" +#include "paddle/phi/kernels/elementwise_divide_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" +#include "paddle/phi/kernels/elementwise_subtract_kernel.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/phi/api/lib/utils/allocator.h" -- GitLab