diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc index 5e790389819f53b250db8797c7a8b3466818abfb..287d6e770dea2cce4387e8aebf53f2826f71725e 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc @@ -42,7 +42,7 @@ using namespace egr_utils_api; // NOLINT PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_grad, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc index a3e393b039425e506066b485bc8a8688bff20d96..d9afd7cc96523729833ecf3cd72456f07f761586 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc @@ -41,7 +41,7 @@ PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_grad, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc index d2bef100ca2b581f230816ca56ebeda97bfc8a0c..2d69380cf78d90c69d1e0760ca77d584708f284c 100644 --- a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc +++ b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc @@ -36,7 +36,7 @@ PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(full, GPU, ALL_LAYOUT); -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); #endif namespace egr { diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc index 5efd0fb42077f0af79579a501fd3a3caaa9a25dd..e03277fb3179980baac980ba503c40b590309bc1 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -69,14 +69,17 @@ PD_DECLARE_KERNEL(split, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(concat, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(concat_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add_raw, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(multiply, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(multiply_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(divide, KPS, ALL_LAYOUT); +PD_DECLARE_KERNEL(maximum, GPU, ALL_LAYOUT); #ifdef PADDLE_WITH_XPU_KP -PD_DECLARE_KERNEL(add_raw, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(max_raw, GPU, ALL_LAYOUT); #else -PD_DECLARE_KERNEL(add_raw, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(max_raw, KPS, ALL_LAYOUT); #endif -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(mean_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(sigmoid, GPU, ALL_LAYOUT); diff --git a/paddle/fluid/imperative/tests/test_tracer.cc b/paddle/fluid/imperative/tests/test_tracer.cc index 1c3a04b51abd036325801af484bb1d800152c328..7bfb3094ba2869269e178d53ec9ed0a834ee9d29 100644 --- a/paddle/fluid/imperative/tests/test_tracer.cc +++ b/paddle/fluid/imperative/tests/test_tracer.cc @@ -38,6 +38,7 @@ PD_DECLARE_KERNEL(matmul_with_flatten, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_with_flatten_grad, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); PD_DECLARE_KERNEL(sum_grad, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_with_flatten, GPU, ALL_LAYOUT); PD_DECLARE_KERNEL(matmul_with_flatten_grad, GPU, ALL_LAYOUT); diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc index 358d0fc6d078e124e1629df3eee730d5d5d8b079..68bc3a0eb5c531628b62dc56b0080fb04ee57105 100644 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc +++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc @@ -30,7 +30,7 @@ USE_OP_ITSELF(elementwise_add); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); #ifdef PADDLE_WITH_CUDA -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); #endif namespace paddle::operators { diff --git a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc index 3e363c56eb93e52f80375ac70bacd7b65c626144..4f922945eae00a7188d147e21e0bc6216008c39c 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc @@ -36,7 +36,7 @@ DECLARE_bool(enable_pe_launch_cinn); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); #ifdef PADDLE_WITH_CUDA -PD_DECLARE_KERNEL(add, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); #endif namespace paddle::operators { diff --git a/paddle/fluid/operators/determinant_op.h b/paddle/fluid/operators/determinant_op.h index a1fe8a25665ec84b38a535f541a2cbe33d0a7fcf..702ff3bfd87b0c439483afeca3761d460a8aa862 100644 --- a/paddle/fluid/operators/determinant_op.h +++ b/paddle/fluid/operators/determinant_op.h @@ -22,7 +22,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/complex_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/diag_functor.h" diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h index 6daf05a9d778dfb194225f59321ffc3eb40235db..fe898a6c41c2a57349ff09d2cc23b7aea2b66cb9 100644 --- a/paddle/fluid/operators/eig_op.h +++ b/paddle/fluid/operators/eig_op.h @@ -21,7 +21,9 @@ #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/complex_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_divide_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" +#include "paddle/phi/kernels/elementwise_subtract_kernel.h" #include "paddle/phi/kernels/funcs/complex_functors.h" #include "paddle/phi/kernels/funcs/diag_functor.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index c28abb916b7a7d59d5a1974bed63e43b2f32ef2c..d77d4ed036394e50f1cf6affcfa7045636df6ac0 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -26,8 +26,8 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op.h" // only can include the headers in paddle/phi/include dirs -#include "paddle/phi/kernels/elementwise_grad_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_add_grad_kernel.h" +#include "paddle/phi/kernels/elementwise_add_kernel.h" #endif namespace paddle { diff --git a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc index ce5c6b701d95894db8e3a84215f537352914706a..f28aae9eed37bc2d0272db18648c55c4d9eb3513 100644 --- a/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc +++ b/paddle/fluid/operators/elementwise/test_elementwise_add_op_inplace.cc @@ -26,6 +26,9 @@ USE_OP_ITSELF(elementwise_add); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); +#endif namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/feed_forward_test.cu b/paddle/fluid/operators/feed_forward_test.cu index e5ebdad1e443476baee3f511dc446c40bb59dfd9..61b80219a26b4e352ed52919b69329a0052e9d92 100644 --- a/paddle/fluid/operators/feed_forward_test.cu +++ b/paddle/fluid/operators/feed_forward_test.cu @@ -33,6 +33,7 @@ USE_OP_ITSELF(elementwise_add); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) PD_DECLARE_KERNEL(add_grad, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(add, KPS, ALL_LAYOUT); #endif // get paddle matmul op results as baseline diff --git a/paddle/fluid/operators/lu_op.h b/paddle/fluid/operators/lu_op.h index d6170b7000d635fd306f82efd34074ca0945f476..8ef3d60c0dc0cc4035eeb6ed213a2abb726807eb 100644 --- a/paddle/fluid/operators/lu_op.h +++ b/paddle/fluid/operators/lu_op.h @@ -18,7 +18,8 @@ limitations under the License. */ #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/operators/set_value_op.h" #include "paddle/fluid/operators/svd_helper.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_add_kernel.h" +#include "paddle/phi/kernels/elementwise_subtract_kernel.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" #include "paddle/phi/kernels/funcs/tril_triu_compute.h" #include "paddle/phi/kernels/triangular_solve_kernel.h" diff --git a/paddle/phi/kernels/cpu/elementwise_add_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_add_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..f8a89b997b41395a8e8da6bb6c4861b2eb974f29 --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_add_grad_kernel.cc @@ -0,0 +1,121 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_add_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/elementwise_grad.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void AddGradFunc(const CPUContext& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx, + DenseTensor* dy, + int axis = -1) { + if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { + ElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy); + } else { + ElemwiseExplicitGradCompute, IdentityGrad>( + dev_ctx, + x, + y, + out, + dout, + axis, + dx, + dy, + IdentityGrad(), + IdentityGrad()); + } +} + +template +void AddGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + phi::AddGradImpl(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc); +} + +template +void AddDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + const DenseTensor& dout, + paddle::optional ddx, + paddle::optional ddy, + int axis, + DenseTensor* ddout) { + phi::AddDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); +} + +template +void AddTripleGradKernel(const Context& dev_ctx, + const DenseTensor& ddx, + const DenseTensor& ddy, + const DenseTensor& d_ddout, + int axis, + DenseTensor* d_ddx, + DenseTensor* d_ddy) { + phi::AddGradImpl( + dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc); +} + +} // namespace phi + +PD_REGISTER_KERNEL(add_grad, + CPU, + ALL_LAYOUT, + phi::AddGradKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(add_double_grad, + CPU, + ALL_LAYOUT, + phi::AddDoubleGradKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(add_triple_grad, + CPU, + ALL_LAYOUT, + phi::AddTripleGradKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/elementwise_add_kernel.cc b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..607026454724937db841150bdf96d6ea8b976a4c --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/cpu/elementwise.h" +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Add +DEFINE_CPU_ELEMENTWISE_OP(Add) + +template +void AddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + AddRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::phi::dtype::bfloat16; + +PD_REGISTER_KERNEL(add_raw, + CPU, + ALL_LAYOUT, + phi::AddRawKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128) {} + +PD_REGISTER_KERNEL(add, + CPU, + ALL_LAYOUT, + phi::AddKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128) {} diff --git a/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..b6541ec0e681847d2d45da5ff70fba4075562d8e --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_divide_grad_kernel.cc @@ -0,0 +1,62 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_divide_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/cpu/elementwise_grad.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void DivideGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + funcs::ElementwiseGradPreProcess(dout, dx); + phi::funcs::ElemwiseGradCompute, DivGradDY>( + dev_ctx, x, y, out, dout, axis, dx, dy, DivGradDX(), DivGradDY()); +} + +} // namespace phi + +PD_REGISTER_KERNEL(divide_grad, + CPU, + ALL_LAYOUT, + phi::DivideGradKernel, + float, + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(divide_double_grad, + CPU, + ALL_LAYOUT, + phi::DivideDoubleGradKernel, + float, + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc b/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..d380621818b3595f3df5f08653848ec60eb741ab --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/cpu/elementwise.h" +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +template +void DivideRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out) { + // allocate memory for out + dev_ctx.template Alloc(out); + if (x.dims() == y.dims() && std::is_floating_point::value) { + SameDimsElementwiseCompute>()( + dev_ctx, x, y, out); + } else { + auto x_dims = x.dims(); + auto y_dims = y.dims(); + if (x_dims.size() >= y_dims.size()) { + funcs::ElementwiseCompute, T>( + dev_ctx, x, y, axis, funcs::DivideFunctor(), out); + } else { + funcs::ElementwiseCompute, T>( + dev_ctx, x, y, axis, funcs::InverseDivideFunctor(), out); + } + } +} + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + DivideRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::phi::dtype::bfloat16; + +PD_REGISTER_KERNEL(divide_raw, + CPU, + ALL_LAYOUT, + phi::DivideRawKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} +PD_REGISTER_KERNEL(divide, + CPU, + ALL_LAYOUT, + phi::DivideKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} diff --git a/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc index f452d9ffb7e8950ba2beec7b6cb1b7b09e746851..3f5e0b8a4d8ee6266e96afb2431e8ac38b68e3f9 100644 --- a/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc @@ -23,118 +23,6 @@ namespace phi { -template -void AddGradFunc(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dout, - DenseTensor* dx, - DenseTensor* dy, - int axis = -1) { - if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { - ElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy); - } else { - ElemwiseExplicitGradCompute, IdentityGrad>( - dev_ctx, - x, - y, - out, - dout, - axis, - dx, - dy, - IdentityGrad(), - IdentityGrad()); - } -} - -template -void AddGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - phi::AddGradImpl(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc); -} - -template -void AddDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - const DenseTensor& dout, - paddle::optional ddx, - paddle::optional ddy, - int axis, - DenseTensor* ddout) { - phi::AddDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); -} - -template -void AddTripleGradKernel(const Context& dev_ctx, - const DenseTensor& ddx, - const DenseTensor& ddy, - const DenseTensor& d_ddout, - int axis, - DenseTensor* d_ddx, - DenseTensor* d_ddy) { - phi::AddGradImpl( - dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc); -} - -template -void SubtractGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - // skip out - auto* out = &dout; - ElementwiseSubGrad(dev_ctx, x, y, *out, dout, dx, dy, axis); -} - -template -void SubtractDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - paddle::optional ddx, - paddle::optional ddy, - const DenseTensor& dout, - int axis, - DenseTensor* ddout) { - phi::SubtractDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); -} - -template -void DivideGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - funcs::ElementwiseGradPreProcess(dout, dx); - phi::funcs::ElemwiseGradCompute, DivGradDY>( - dev_ctx, x, y, out, dout, axis, dx, dy, DivGradDX(), DivGradDY()); -} - -template -void MultiplyGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - funcs::ElementwiseGradPreProcess(dout, dx); - auto* out = &dout; // out is not necessary - phi::funcs::ElemwiseGradCompute, MulGradDY>( - dev_ctx, x, y, *out, dout, axis, dx, dy, MulGradDX(), MulGradDY()); -} - template void MaximumGradKernel(const Context& dev_ctx, const DenseTensor& x, @@ -163,129 +51,6 @@ void MinimumGradKernel(const Context& dev_ctx, } // namespace phi -PD_REGISTER_KERNEL(add_grad, - CPU, - ALL_LAYOUT, - phi::AddGradKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(add_double_grad, - CPU, - ALL_LAYOUT, - phi::AddDoubleGradKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(add_triple_grad, - CPU, - ALL_LAYOUT, - phi::AddTripleGradKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(subtract_grad, - CPU, - ALL_LAYOUT, - phi::SubtractGradKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(subtract_double_grad, - CPU, - ALL_LAYOUT, - phi::SubtractDoubleGradKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(divide_grad, - CPU, - ALL_LAYOUT, - phi::DivideGradKernel, - float, - double, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(divide_double_grad, - CPU, - ALL_LAYOUT, - phi::DivideDoubleGradKernel, - float, - double, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_grad, - CPU, - ALL_LAYOUT, - phi::MultiplyGradKernel, - float, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_double_grad, - CPU, - ALL_LAYOUT, - phi::MultiplyDoubleGradKernel, - float, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_triple_grad, - CPU, - ALL_LAYOUT, - phi::MultiplyTripleGradKernel, - float, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - PD_REGISTER_KERNEL(fmax_grad, CPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/cpu/elementwise_kernel.cc b/paddle/phi/kernels/cpu/elementwise_kernel.cc index a91ca1ee3244bdbdde2c9c248317e40d45b3dc17..7478f69d915f1fbfd83363b212da9d39f4718f30 100644 --- a/paddle/phi/kernels/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_kernel.cc @@ -21,54 +21,6 @@ namespace phi { -#define DEFINE_CPU_ELEMENTWISE_OP(name) \ - template \ - void name##RawKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - dev_ctx.template Alloc(out); \ - if (x.dims() == y.dims()) { \ - SameDimsElementwiseCompute>()( \ - dev_ctx, x, y, out); \ - } else { \ - auto x_dims = x.dims(); \ - auto y_dims = y.dims(); \ - if (x_dims.size() >= y_dims.size()) { \ - funcs::ElementwiseCompute, T>( \ - dev_ctx, x, y, axis, funcs::name##Functor(), out); \ - } else { \ - funcs::ElementwiseCompute, T>( \ - dev_ctx, x, y, axis, funcs::Inverse##name##Functor(), out); \ - } \ - } \ - } - -template -void DivideRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - dev_ctx.template Alloc(out); - if (x.dims() == y.dims() && std::is_floating_point::value) { - SameDimsElementwiseCompute>()( - dev_ctx, x, y, out); - } else { - auto x_dims = x.dims(); - auto y_dims = y.dims(); - if (x_dims.size() >= y_dims.size()) { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::DivideFunctor(), out); - } else { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::InverseDivideFunctor(), out); - } - } -} - template void MaximumRawKernel(const Context& dev_ctx, const DenseTensor& x, @@ -142,14 +94,6 @@ void ElementwisePowRawKernel(const Context& dev_ctx, funcs::ElementwiseCompute, T>( dev_ctx, x, y, axis, funcs::ElementwisePowFunctor(), out); } -// Create the definition of Add -DEFINE_CPU_ELEMENTWISE_OP(Add) - -// Create the definition of Subtract -DEFINE_CPU_ELEMENTWISE_OP(Subtract) - -// Create the definition of Multiply -DEFINE_CPU_ELEMENTWISE_OP(Multiply) } // namespace phi @@ -165,51 +109,6 @@ PD_REGISTER_KERNEL( PD_REGISTER_KERNEL( fmin, CPU, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {} -PD_REGISTER_KERNEL(add_raw, - CPU, - ALL_LAYOUT, - phi::AddRawKernel, - float, - double, - int16_t, - int, - int64_t, - complex64, - complex128) {} -PD_REGISTER_KERNEL(subtract_raw, - CPU, - ALL_LAYOUT, - phi::SubtractRawKernel, - float, - double, - int16_t, - int, - int64_t, - complex64, - complex128, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(divide_raw, - CPU, - ALL_LAYOUT, - phi::DivideRawKernel, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PD_REGISTER_KERNEL(multiply_raw, - CPU, - ALL_LAYOUT, - phi::MultiplyRawKernel, - float, - double, - int, - int64_t, - bool, - complex64, - complex128, - phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(maximum_raw, CPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..6055541c805f0adef86b50e2944d821ea952dcbc --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_multiply_grad_kernel.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_multiply_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/cpu/elementwise_grad.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void MultiplyGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + funcs::ElementwiseGradPreProcess(dout, dx); + auto* out = &dout; // out is not necessary + phi::funcs::ElemwiseGradCompute, MulGradDY>( + dev_ctx, x, y, *out, dout, axis, dx, dy, MulGradDX(), MulGradDY()); +} + +} // namespace phi + +PD_REGISTER_KERNEL(multiply_grad, + CPU, + ALL_LAYOUT, + phi::MultiplyGradKernel, + float, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(multiply_double_grad, + CPU, + ALL_LAYOUT, + phi::MultiplyDoubleGradKernel, + float, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(multiply_triple_grad, + CPU, + ALL_LAYOUT, + phi::MultiplyTripleGradKernel, + float, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc b/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..2424a5330109c06d132c5029f1b7b3508aaa022e --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/cpu/elementwise.h" +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Multiply +DEFINE_CPU_ELEMENTWISE_OP(Multiply) + +template +void MultiplyKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + MultiplyRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::phi::dtype::bfloat16; + +PD_REGISTER_KERNEL(multiply_raw, + CPU, + ALL_LAYOUT, + phi::MultiplyRawKernel, + float, + double, + int, + int64_t, + bool, + complex64, + complex128, + phi::dtype::bfloat16) {} + +PD_REGISTER_KERNEL(multiply, + CPU, + ALL_LAYOUT, + phi::MultiplyKernel, + float, + double, + int, + int64_t, + bool, + complex64, + complex128, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..c785eacb9a8bc75ac39ab645cc38f9cee81b58bb --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_subtract_grad_kernel.cc @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_subtract_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/elementwise_grad.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void SubtractGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + // skip out + auto* out = &dout; + ElementwiseSubGrad(dev_ctx, x, y, *out, dout, dx, dy, axis); +} + +template +void SubtractDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + paddle::optional ddx, + paddle::optional ddy, + const DenseTensor& dout, + int axis, + DenseTensor* ddout) { + phi::SubtractDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); +} + +} // namespace phi + +PD_REGISTER_KERNEL(subtract_grad, + CPU, + ALL_LAYOUT, + phi::SubtractGradKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(subtract_double_grad, + CPU, + ALL_LAYOUT, + phi::SubtractDoubleGradKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc b/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..0e97852ac33e14b7aa321746e67ee9894f88deea --- /dev/null +++ b/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/cpu/elementwise.h" +#include "paddle/phi/api/ext/dispatch.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Subtract +DEFINE_CPU_ELEMENTWISE_OP(Subtract) + +template +void SubtractKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + SubtractRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::phi::dtype::bfloat16; + +PD_REGISTER_KERNEL(subtract_raw, + CPU, + ALL_LAYOUT, + phi::SubtractRawKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(subtract, + CPU, + ALL_LAYOUT, + phi::SubtractKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc b/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc index 77c763171088cfb049f8ceed170a96e2ba4ad00d..3bfc07319e98dac12fcec00a6172ea113f654b29 100644 --- a/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc +++ b/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc @@ -17,7 +17,7 @@ #include #include #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/compare_functors.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/elementwise_add_grad_kernel.h b/paddle/phi/kernels/elementwise_add_grad_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..9b754cfefe3657e1c0b5e1ad1528d4b42b023cd1 --- /dev/null +++ b/paddle/phi/kernels/elementwise_add_grad_kernel.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/utils/optional.h" + +namespace phi { + +template +void AddGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy); + +template +void AddDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + const DenseTensor& dout, + paddle::optional ddx, + paddle::optional ddy, + int axis, + DenseTensor* ddout); + +template +void AddTripleGradKernel(const Context& dev_ctx, + const DenseTensor& ddx, + const DenseTensor& ddy, + const DenseTensor& d_ddout, + int axis, + DenseTensor* d_ddx, + DenseTensor* d_ddy); + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_add_kernel.h b/paddle/phi/kernels/elementwise_add_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..3245c450aaebea72e5d642a39a8e33b95aa8ce9f --- /dev/null +++ b/paddle/phi/kernels/elementwise_add_kernel.h @@ -0,0 +1,45 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/infermeta/binary.h" + +namespace phi { +template +void AddRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void AddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +DenseTensor Add(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + AddKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_divide_grad_kernel.h b/paddle/phi/kernels/elementwise_divide_grad_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..6d29dae99a1314a750faac8479cfab3bd2b56664 --- /dev/null +++ b/paddle/phi/kernels/elementwise_divide_grad_kernel.h @@ -0,0 +1,44 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/utils/optional.h" + +namespace phi { + +template +void DivideGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy); + +template +void DivideDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dx, + paddle::optional ddx, + paddle::optional ddy, + int axis, + DenseTensor* dy, + DenseTensor* dout, + DenseTensor* ddout); + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_divide_kernel.h b/paddle/phi/kernels/elementwise_divide_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..5555b69fde1dea84870bb19bd16d9b65fb92786e --- /dev/null +++ b/paddle/phi/kernels/elementwise_divide_kernel.h @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/infermeta/binary.h" + +namespace phi { + +template +void DivideRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +DenseTensor Divide(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + DivideKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_grad_kernel.h b/paddle/phi/kernels/elementwise_grad_kernel.h index 0e730fbfbfa4de7fddc29d648b8a40d5e3e31951..6f2f2915ecf9e7e1bb6096d3cf1a257d588e826a 100644 --- a/paddle/phi/kernels/elementwise_grad_kernel.h +++ b/paddle/phi/kernels/elementwise_grad_kernel.h @@ -19,111 +19,6 @@ limitations under the License. */ namespace phi { -template -void AddGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy); - -template -void AddDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - const DenseTensor& dout, - paddle::optional ddx, - paddle::optional ddy, - int axis, - DenseTensor* ddout); - -template -void AddTripleGradKernel(const Context& dev_ctx, - const DenseTensor& ddx, - const DenseTensor& ddy, - const DenseTensor& d_ddout, - int axis, - DenseTensor* d_ddx, - DenseTensor* d_ddy); - -template -void SubtractGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy); - -template -void SubtractDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - paddle::optional ddx, - paddle::optional ddy, - const DenseTensor& dout, - int axis, - DenseTensor* ddout); - -template -void DivideGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy); - -template -void DivideDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dx, - paddle::optional ddx, - paddle::optional ddy, - int axis, - DenseTensor* dy, - DenseTensor* dout, - DenseTensor* ddout); - -template -void MultiplyGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy); - -template -void MultiplyDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - paddle::optional ddx, - paddle::optional ddy, - int axis, - DenseTensor* dx, - DenseTensor* dy, - DenseTensor* ddout); - -template -void MultiplyTripleGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - paddle::optional ddx, - paddle::optional ddy, - const DenseTensor& d_dx, - const DenseTensor& d_dy, - paddle::optional d_ddout, - int axis, - DenseTensor* d_x, - DenseTensor* d_y, - DenseTensor* d_dout, - DenseTensor* d_ddx, - DenseTensor* d_ddy); - template void ElementwiseFMaxGradKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/elementwise_kernel.cc b/paddle/phi/kernels/elementwise_kernel.cc index 6cd602e47b8e6ba2b3085abfad58d3b229f77b7d..4cee24d2f8069d899e2ef84d5802149dc146e68a 100644 --- a/paddle/phi/kernels/elementwise_kernel.cc +++ b/paddle/phi/kernels/elementwise_kernel.cc @@ -19,42 +19,6 @@ namespace phi { -template -void AddKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - AddRawKernel(dev_ctx, x, y, axis, out); -} - -template -void SubtractKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - SubtractRawKernel(dev_ctx, x, y, axis, out); -} - -template -void DivideKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - DivideRawKernel(dev_ctx, x, y, axis, out); -} - -template -void MultiplyKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out) { - int axis = -1; - MultiplyRawKernel(dev_ctx, x, y, axis, out); -} - template void MaximumKernel(const Context& dev_ctx, const DenseTensor& x, @@ -105,51 +69,6 @@ void ElementwisePowKernel(const Context& dev_ctx, using complex64 = ::phi::dtype::complex; using complex128 = ::phi::dtype::complex; -PD_REGISTER_KERNEL(add, - CPU, - ALL_LAYOUT, - phi::AddKernel, - float, - double, - int16_t, - int, - int64_t, - complex64, - complex128) {} -PD_REGISTER_KERNEL(subtract, - CPU, - ALL_LAYOUT, - phi::SubtractKernel, - float, - double, - int16_t, - int, - int64_t, - complex64, - complex128, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(divide, - CPU, - ALL_LAYOUT, - phi::DivideKernel, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PD_REGISTER_KERNEL(multiply, - CPU, - ALL_LAYOUT, - phi::MultiplyKernel, - float, - double, - int, - int64_t, - bool, - complex64, - complex128, - phi::dtype::bfloat16) {} PD_REGISTER_KERNEL(maximum, CPU, ALL_LAYOUT, @@ -183,57 +102,6 @@ PD_REGISTER_KERNEL(elementwise_pow, #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PD_REGISTER_KERNEL(add, - GPU, - ALL_LAYOUT, - phi::AddKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(subtract, - GPU, - ALL_LAYOUT, - phi::SubtractKernel, - float, - double, - int16_t, - int, - int64_t, - phi::dtype::float16, - complex64, - complex128, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(divide, - GPU, - ALL_LAYOUT, - phi::DivideKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(multiply, - GPU, - ALL_LAYOUT, - phi::MultiplyKernel, - float, - double, - int, - int64_t, - bool, - phi::dtype::float16, - phi::dtype::bfloat16, - complex64, - complex128) {} PD_REGISTER_KERNEL(maximum, GPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/elementwise_kernel.h b/paddle/phi/kernels/elementwise_kernel.h index 09b6b02e372571095de200b856a2a0103ef05182..37fe895d4051fe3d95a0ac46cf3fd45a7c022ed3 100644 --- a/paddle/phi/kernels/elementwise_kernel.h +++ b/paddle/phi/kernels/elementwise_kernel.h @@ -33,58 +33,6 @@ void FMinKernel(const Context& dev_ctx, int axis, DenseTensor* out); -template -void AddRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void AddKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - -template -void SubtractRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void SubtractKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - -template -void DivideRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void DivideKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - -template -void MultiplyRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void MultiplyKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - template void MaximumRawKernel(const Context& dev_ctx, const DenseTensor& x, @@ -150,50 +98,6 @@ void ElementwisePowKernel(const Context& dev_ctx, const DenseTensor& y, DenseTensor* out); -template -DenseTensor Add(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - AddKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - -template -DenseTensor Subtract(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - SubtractKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - -template -DenseTensor Divide(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - DivideKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - -template -DenseTensor Multiply(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - MultiplyKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - template DenseTensor Maximum(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/elementwise_multiply_grad_kernel.h b/paddle/phi/kernels/elementwise_multiply_grad_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..517948a50d1b18380ba29334d0b91d4a32225e88 --- /dev/null +++ b/paddle/phi/kernels/elementwise_multiply_grad_kernel.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/utils/optional.h" + +namespace phi { + +template +void MultiplyGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy); + +template +void MultiplyDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + paddle::optional ddx, + paddle::optional ddy, + int axis, + DenseTensor* dx, + DenseTensor* dy, + DenseTensor* ddout); + +template +void MultiplyTripleGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + paddle::optional ddx, + paddle::optional ddy, + const DenseTensor& d_dx, + const DenseTensor& d_dy, + paddle::optional d_ddout, + int axis, + DenseTensor* d_x, + DenseTensor* d_y, + DenseTensor* d_dout, + DenseTensor* d_ddx, + DenseTensor* d_ddy); + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_multiply_kernel.h b/paddle/phi/kernels/elementwise_multiply_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..608ae95d2ba4b8ee4a3f9b38d3387faafc8589ab --- /dev/null +++ b/paddle/phi/kernels/elementwise_multiply_kernel.h @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/infermeta/binary.h" + +namespace phi { + +template +void MultiplyRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void MultiplyKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +DenseTensor Multiply(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + MultiplyKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_subtract_grad_kernel.h b/paddle/phi/kernels/elementwise_subtract_grad_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..7be91b4b9f4cdfcfac5e448ef7e803436b937504 --- /dev/null +++ b/paddle/phi/kernels/elementwise_subtract_grad_kernel.h @@ -0,0 +1,39 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/utils/optional.h" + +namespace phi { +template +void SubtractGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy); + +template +void SubtractDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + paddle::optional ddx, + paddle::optional ddy, + const DenseTensor& dout, + int axis, + DenseTensor* ddout); + +} // namespace phi diff --git a/paddle/phi/kernels/elementwise_subtract_kernel.h b/paddle/phi/kernels/elementwise_subtract_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..1f6c4383df5d8661a766600a1f969aa6ffb90231 --- /dev/null +++ b/paddle/phi/kernels/elementwise_subtract_kernel.h @@ -0,0 +1,46 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/infermeta/binary.h" + +namespace phi { + +template +void SubtractRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void SubtractKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +DenseTensor Subtract(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + SubtractKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +} // namespace phi diff --git a/paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..8dd4d0184c267613615af7cbf266201785149e72 --- /dev/null +++ b/paddle/phi/kernels/gpu/elementwise_add_grad_kernel.cu @@ -0,0 +1,118 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_add_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/gpu/elementwise_grad.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void AddGradFunc(const GPUContext& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, + DenseTensor* dx, + DenseTensor* dy, + int axis = -1) { + if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { + ElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy); + } else { + DefaultElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy, axis); + } +} + +template +void AddGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + phi::AddGradImpl(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc); +} + +template +void AddDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + const DenseTensor& dout, + paddle::optional ddx, + paddle::optional ddy, + int axis, + DenseTensor* ddout) { + phi::AddDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); +} + +template +void AddTripleGradKernel(const Context& dev_ctx, + const DenseTensor& ddx, + const DenseTensor& ddy, + const DenseTensor& d_ddout, + int axis, + DenseTensor* d_ddx, + DenseTensor* d_ddy) { + phi::AddGradImpl( + dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc); +} + +} // namespace phi + +PD_REGISTER_KERNEL(add_grad, + GPU, + ALL_LAYOUT, + phi::AddGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(add_double_grad, + GPU, + ALL_LAYOUT, + phi::AddDoubleGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(add_triple_grad, + GPU, + ALL_LAYOUT, + phi::AddTripleGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/elementwise_divide_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_divide_grad_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..57bf6da4060d34acdbf44ac80d4577e24247417d --- /dev/null +++ b/paddle/phi/kernels/gpu/elementwise_divide_grad_kernel.cu @@ -0,0 +1,86 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_divide_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/gpu/elementwise_grad.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void DivideGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& out, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + const auto place = dev_ctx.GetPlace(); + if (dx != nullptr && dy != nullptr) { + std::vector ins = {&dout, &out, &y}; + GetGradXAndYOut( + dev_ctx, + place, + axis, + ins, + dout, + dx, + dy, + funcs::DivGradXYFunctor()); + } else if (dx != nullptr && dy == nullptr) { + std::vector ins = {&dout, &y}; + GetGradXOrYOut( + dev_ctx, place, axis, ins, dout, dx, funcs::DivGradXFunctor()); + } else if (dy != nullptr && dx == nullptr) { + std::vector ins = {&dout, &out, &y}; + GetGradXOrYOut( + dev_ctx, place, axis, ins, dout, dy, funcs::DivGradYFunctor()); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(divide_grad, + GPU, + ALL_LAYOUT, + phi::DivideGradKernel, + float, + phi::dtype::float16, + phi::dtype::bfloat16, + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(divide_double_grad, + GPU, + ALL_LAYOUT, + phi::DivideDoubleGradKernel, + float, + phi::dtype::float16, + phi::dtype::bfloat16, + double, + int, + int64_t, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu index fae7978d3d2ea0518879224364335eea68b3a831..c814e7b3bb63d659fec5919f7506b2d63bfd3508 100644 --- a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu @@ -26,128 +26,6 @@ namespace phi { -template -void AddGradFunc(const GPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dout, - DenseTensor* dx, - DenseTensor* dy, - int axis = -1) { - if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { - ElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy); - } else { - DefaultElementwiseAddGrad(dev_ctx, x, y, out, dout, dx, dy, axis); - } -} - -template -void AddGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - phi::AddGradImpl(dev_ctx, x, y, dout, axis, dx, dy, AddGradFunc); -} - -template -void AddDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - const DenseTensor& dout, - paddle::optional ddx, - paddle::optional ddy, - int axis, - DenseTensor* ddout) { - phi::AddDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); -} - -template -void AddTripleGradKernel(const Context& dev_ctx, - const DenseTensor& ddx, - const DenseTensor& ddy, - const DenseTensor& d_ddout, - int axis, - DenseTensor* d_ddx, - DenseTensor* d_ddy) { - phi::AddGradImpl( - dev_ctx, ddx, ddy, d_ddout, axis, d_ddx, d_ddy, AddGradFunc); -} - -template -void SubtractGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - // skip out - auto* out = &dout; - if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { - elementwise_sub_grad(dev_ctx, x, y, *out, dout, dx, dy); - } else { - default_elementwise_sub_grad(dev_ctx, x, y, *out, dout, dx, dy, axis); - } -} - -template -void SubtractDoubleGradKernel(const Context& dev_ctx, - const DenseTensor& y, - paddle::optional ddx, - paddle::optional ddy, - const DenseTensor& dout, - int axis, - DenseTensor* ddout) { - phi::SubtractDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); -} - -template -void DivideGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& out, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - const auto place = dev_ctx.GetPlace(); - if (dx != nullptr && dy != nullptr) { - std::vector ins = {&dout, &out, &y}; - GetGradXAndYOut( - dev_ctx, - place, - axis, - ins, - dout, - dx, - dy, - funcs::DivGradXYFunctor()); - } else if (dx != nullptr && dy == nullptr) { - std::vector ins = {&dout, &y}; - GetGradXOrYOut( - dev_ctx, place, axis, ins, dout, dx, funcs::DivGradXFunctor()); - } else if (dy != nullptr && dx == nullptr) { - std::vector ins = {&dout, &out, &y}; - GetGradXOrYOut( - dev_ctx, place, axis, ins, dout, dy, funcs::DivGradYFunctor()); - } -} - -template -void MultiplyGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - const DenseTensor& dout, - int axis, - DenseTensor* dx, - DenseTensor* dy) { - funcs::ElementwiseGradPreProcess(dout, dx); - ElementwiseMulGrad(dev_ctx, x, y, dout, dx, dy, axis); -} - template void MaximumGradKernel(const Context& dev_ctx, const DenseTensor& x, @@ -211,138 +89,6 @@ void MinimumGradKernel(const Context& dev_ctx, } } // namespace phi -PD_REGISTER_KERNEL(add_grad, - GPU, - ALL_LAYOUT, - phi::AddGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(add_double_grad, - GPU, - ALL_LAYOUT, - phi::AddDoubleGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(add_triple_grad, - GPU, - ALL_LAYOUT, - phi::AddTripleGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(subtract_grad, - GPU, - ALL_LAYOUT, - phi::SubtractGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(subtract_double_grad, - GPU, - ALL_LAYOUT, - phi::SubtractDoubleGradKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(divide_grad, - GPU, - ALL_LAYOUT, - phi::DivideGradKernel, - float, - phi::dtype::float16, - phi::dtype::bfloat16, - double, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(divide_double_grad, - GPU, - ALL_LAYOUT, - phi::DivideDoubleGradKernel, - float, - phi::dtype::float16, - phi::dtype::bfloat16, - double, - int, - int64_t, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_grad, - GPU, - ALL_LAYOUT, - phi::MultiplyGradKernel, - float, - phi::dtype::float16, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_double_grad, - GPU, - ALL_LAYOUT, - phi::MultiplyDoubleGradKernel, - float, - phi::dtype::float16, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} - -PD_REGISTER_KERNEL(multiply_triple_grad, - GPU, - ALL_LAYOUT, - phi::MultiplyTripleGradKernel, - float, - phi::dtype::float16, - double, - int, - int64_t, - bool, - phi::dtype::bfloat16, - phi::dtype::complex, - phi::dtype::complex) {} PD_REGISTER_KERNEL(fmax_grad, GPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/gpu/elementwise_multiply_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_multiply_grad_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..3442d7f02853913a200c413fe1f7ebe0690e2627 --- /dev/null +++ b/paddle/phi/kernels/gpu/elementwise_multiply_grad_kernel.cu @@ -0,0 +1,82 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_multiply_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/gpu/elementwise_grad.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void MultiplyGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + funcs::ElementwiseGradPreProcess(dout, dx); + ElementwiseMulGrad(dev_ctx, x, y, dout, dx, dy, axis); +} + +} // namespace phi + +PD_REGISTER_KERNEL(multiply_grad, + GPU, + ALL_LAYOUT, + phi::MultiplyGradKernel, + float, + phi::dtype::float16, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(multiply_double_grad, + GPU, + ALL_LAYOUT, + phi::MultiplyDoubleGradKernel, + float, + phi::dtype::float16, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(multiply_triple_grad, + GPU, + ALL_LAYOUT, + phi::MultiplyTripleGradKernel, + float, + phi::dtype::float16, + double, + int, + int64_t, + bool, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..20f3b73e4094f651827504b26eb6f1f9cd11af21 --- /dev/null +++ b/paddle/phi/kernels/gpu/elementwise_subtract_grad_kernel.cu @@ -0,0 +1,83 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/elementwise_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/funcs/elementwise_functor.h" +#include "paddle/phi/kernels/gpu/elementwise_grad.h" +#include "paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h" + +namespace phi { + +template +void SubtractGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + // skip out + auto* out = &dout; + if (dx != nullptr && dy != nullptr && (dx->dims() == dy->dims())) { + elementwise_sub_grad(dev_ctx, x, y, *out, dout, dx, dy); + } else { + default_elementwise_sub_grad(dev_ctx, x, y, *out, dout, dx, dy, axis); + } +} + +template +void SubtractDoubleGradKernel(const Context& dev_ctx, + const DenseTensor& y, + paddle::optional ddx, + paddle::optional ddy, + const DenseTensor& dout, + int axis, + DenseTensor* ddout) { + phi::SubtractDoubleGradImpl(dev_ctx, y, ddx, ddy, dout, axis, ddout); +} + +} // namespace phi + +PD_REGISTER_KERNEL(subtract_grad, + GPU, + ALL_LAYOUT, + phi::SubtractGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} + +PD_REGISTER_KERNEL(subtract_double_grad, + GPU, + ALL_LAYOUT, + phi::SubtractDoubleGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + phi::dtype::complex, + phi::dtype::complex) {} diff --git a/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu b/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu index be6cdc7825575857eb4504895a1c1b9ee521895d..84768866cc9e7582b711be274ed79028a89788a2 100644 --- a/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu +++ b/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu @@ -23,7 +23,7 @@ #include "paddle/phi/backends/dynload/cusolver.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/abs_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/compare_functors.h" diff --git a/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h b/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h index 8c37091ef1b54def8ef81161a4ac0a85fc265967..371644e6434a43bb1a74f7d298a02bc1ebbee760 100644 --- a/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h @@ -19,7 +19,7 @@ #include "paddle/phi/kernels/cholesky_solve_kernel.h" #include "paddle/phi/kernels/complex_kernel.h" #include "paddle/phi/kernels/copy_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_add_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/expand_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h index e4356e9af39372cd330991502078a13520d05586..ab1c33d50a45626a238b25ab6ecbd9cb4944a491 100644 --- a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h @@ -17,7 +17,7 @@ #include "paddle/phi/kernels/determinant_grad_kernel.h" #include "paddle/phi/kernels/copy_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h b/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h index 5e06435b28e2719c2e9fc18de034073f9674a977..f39786fff2665f43b14667f5d9cbea78712bf3ff 100644 --- a/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h @@ -16,7 +16,9 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/complex_kernel.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_divide_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" +#include "paddle/phi/kernels/elementwise_subtract_kernel.h" #include "paddle/phi/kernels/funcs/diag_functor.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/impl/elementwise_kernel_impl.h b/paddle/phi/kernels/impl/elementwise_kernel_impl.h index d5c2c559b2c0690c7bb6c85261353791448f030f..b126ca9b842275610d2dc238a9cd517089d93a7d 100644 --- a/paddle/phi/kernels/impl/elementwise_kernel_impl.h +++ b/paddle/phi/kernels/impl/elementwise_kernel_impl.h @@ -22,6 +22,48 @@ #endif namespace phi { + +#define DEFINE_CPU_ELEMENTWISE_OP(name) \ + template \ + void name##RawKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + dev_ctx.template Alloc(out); \ + if (x.dims() == y.dims()) { \ + SameDimsElementwiseCompute>()( \ + dev_ctx, x, y, out); \ + } else { \ + auto x_dims = x.dims(); \ + auto y_dims = y.dims(); \ + if (x_dims.size() >= y_dims.size()) { \ + funcs::ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, funcs::name##Functor(), out); \ + } else { \ + funcs::ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, funcs::Inverse##name##Functor(), out); \ + } \ + } \ + } + +#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ + template \ + void name##RawKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + std::vector inputs; \ + std::vector outputs; \ + inputs.emplace_back(&x); \ + inputs.emplace_back(&y); \ + outputs.emplace_back(out); \ + dev_ctx.template Alloc(out); \ + funcs::BroadcastKernel( \ + dev_ctx, inputs, &outputs, axis, funcs::name##Functor()); \ + } + template void FMaxKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/kps/elementwise_add_kernel.cu b/paddle/phi/kernels/kps/elementwise_add_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..b5532c614314f0064c508555171a7099e6fcdd5f --- /dev/null +++ b/paddle/phi/kernels/kps/elementwise_add_kernel.cu @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#ifndef PADDLE_WITH_XPU_KP +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#endif +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +DEFINE_CUDA_ELEMENTWISE_OP(Add) + +template +void AddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + AddRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +#ifdef PADDLE_WITH_XPU_KP +PD_REGISTER_KERNEL(add_raw, KPS, ALL_LAYOUT, phi::AddRawKernel, float) {} +#else + +using float16 = phi::dtype::float16; +using bfloat16 = phi::dtype::bfloat16; +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +PD_REGISTER_KERNEL(add_raw, + KPS, + ALL_LAYOUT, + phi::AddRawKernel, + float, + double, + int16_t, + int, + int64_t, + float16, + bfloat16, + complex64, + complex128) {} +PD_REGISTER_KERNEL(add, + KPS, + ALL_LAYOUT, + phi::AddKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + complex64, + complex128) {} +#endif diff --git a/paddle/phi/kernels/kps/elementwise_divide_kernel.cu b/paddle/phi/kernels/kps/elementwise_divide_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..852babe29dbf7a23b5386f40aec97aa5810a659d --- /dev/null +++ b/paddle/phi/kernels/kps/elementwise_divide_kernel.cu @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#ifndef PADDLE_WITH_XPU_KP +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#endif +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Divide +DEFINE_CUDA_ELEMENTWISE_OP(Divide) + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + DivideRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +#ifdef PADDLE_WITH_XPU_KP +PD_REGISTER_KERNEL(divide_raw, KPS, ALL_LAYOUT, phi::DivideRawKernel, float) {} +#else + +using float16 = phi::dtype::float16; +using bfloat16 = phi::dtype::bfloat16; +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +PD_REGISTER_KERNEL(divide_raw, + KPS, + ALL_LAYOUT, + phi::DivideRawKernel, + float, + double, + int, + int64_t, + float16, + bfloat16, + complex64, + complex128) {} + +PD_REGISTER_KERNEL(divide, + KPS, + ALL_LAYOUT, + phi::DivideKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16, + complex64, + complex128) {} +#endif diff --git a/paddle/phi/kernels/kps/elementwise_kernel.cu b/paddle/phi/kernels/kps/elementwise_kernel.cu index 01a34c0f85eda0302fa581d7c65137c1923677b9..5ccd3b1a482109bfa04a84844b23d95b9e38186e 100644 --- a/paddle/phi/kernels/kps/elementwise_kernel.cu +++ b/paddle/phi/kernels/kps/elementwise_kernel.cu @@ -22,34 +22,6 @@ namespace phi { -#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ - template \ - void name##RawKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - std::vector inputs; \ - std::vector outputs; \ - inputs.emplace_back(&x); \ - inputs.emplace_back(&y); \ - outputs.emplace_back(out); \ - dev_ctx.template Alloc(out); \ - funcs::BroadcastKernel( \ - dev_ctx, inputs, &outputs, axis, funcs::name##Functor()); \ - } - -/** - * Kernels - */ -// Create the definition of Add -DEFINE_CUDA_ELEMENTWISE_OP(Add) -// Create the definition of Subtract -DEFINE_CUDA_ELEMENTWISE_OP(Subtract) -// Create the definition of Multiply -DEFINE_CUDA_ELEMENTWISE_OP(Multiply) -// Create the definition of Divide -DEFINE_CUDA_ELEMENTWISE_OP(Divide) // Create the definition of Maximum DEFINE_CUDA_ELEMENTWISE_OP(Maximum) // Create the definition of Minimum @@ -64,12 +36,6 @@ DEFINE_CUDA_ELEMENTWISE_OP(ElementwisePow) } // namespace phi #ifdef PADDLE_WITH_XPU_KP -PD_REGISTER_KERNEL(add_raw, KPS, ALL_LAYOUT, phi::AddRawKernel, float) {} -PD_REGISTER_KERNEL( - subtract_raw, KPS, ALL_LAYOUT, phi::SubtractRawKernel, float) {} -PD_REGISTER_KERNEL(divide_raw, KPS, ALL_LAYOUT, phi::DivideRawKernel, float) {} -PD_REGISTER_KERNEL( - multiply_raw, KPS, ALL_LAYOUT, phi::MultiplyRawKernel, float) {} PD_REGISTER_KERNEL(maximum_raw, KPS, ALL_LAYOUT, phi::MaximumRawKernel, float) { } PD_REGISTER_KERNEL(minimum_raw, KPS, ALL_LAYOUT, phi::MinimumRawKernel, float) { @@ -89,57 +55,6 @@ PD_REGISTER_KERNEL( PD_REGISTER_KERNEL( fmin, KPS, ALL_LAYOUT, phi::FMinKernel, float, double, int, int64_t) {} -PD_REGISTER_KERNEL(add_raw, - KPS, - ALL_LAYOUT, - phi::AddRawKernel, - float, - double, - int16_t, - int, - int64_t, - float16, - bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(subtract_raw, - KPS, - ALL_LAYOUT, - phi::SubtractRawKernel, - float, - double, - int16_t, - int, - int64_t, - float16, - bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(divide_raw, - KPS, - ALL_LAYOUT, - phi::DivideRawKernel, - float, - double, - int, - int64_t, - float16, - bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(multiply_raw, - KPS, - ALL_LAYOUT, - phi::MultiplyRawKernel, - float, - double, - int, - int64_t, - bool, - float16, - complex64, - complex128, - bfloat16) {} PD_REGISTER_KERNEL(maximum_raw, KPS, ALL_LAYOUT, diff --git a/paddle/phi/kernels/kps/elementwise_multiply_kernel.cu b/paddle/phi/kernels/kps/elementwise_multiply_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..8bede0198c2fa100b90835e66968a147485558b3 --- /dev/null +++ b/paddle/phi/kernels/kps/elementwise_multiply_kernel.cu @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#ifndef PADDLE_WITH_XPU_KP +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#endif +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Multiply +DEFINE_CUDA_ELEMENTWISE_OP(Multiply) + +template +void MultiplyKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + MultiplyRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +#ifdef PADDLE_WITH_XPU_KP +PD_REGISTER_KERNEL( + multiply_raw, KPS, ALL_LAYOUT, phi::MultiplyRawKernel, float) {} +#else + +using float16 = phi::dtype::float16; +using bfloat16 = phi::dtype::bfloat16; +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +PD_REGISTER_KERNEL(multiply_raw, + KPS, + ALL_LAYOUT, + phi::MultiplyRawKernel, + float, + double, + int, + int64_t, + bool, + float16, + complex64, + complex128, + bfloat16) {} +PD_REGISTER_KERNEL(multiply, + KPS, + ALL_LAYOUT, + phi::MultiplyKernel, + float, + double, + int, + int64_t, + bool, + phi::dtype::float16, + phi::dtype::bfloat16, + complex64, + complex128) {} +#endif diff --git a/paddle/phi/kernels/kps/elementwise_subtract_kernel.cu b/paddle/phi/kernels/kps/elementwise_subtract_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..757dedb99c9310f3fe1600bd7c204bd5dcda3f5e --- /dev/null +++ b/paddle/phi/kernels/kps/elementwise_subtract_kernel.cu @@ -0,0 +1,75 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#ifndef PADDLE_WITH_XPU_KP +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" +#endif +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" + +namespace phi { + +// Create the definition of Subtract +DEFINE_CUDA_ELEMENTWISE_OP(Subtract) + +template +void SubtractKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + SubtractRawKernel(dev_ctx, x, y, axis, out); +} + +} // namespace phi + +#ifdef PADDLE_WITH_XPU_KP +PD_REGISTER_KERNEL( + subtract_raw, KPS, ALL_LAYOUT, phi::SubtractRawKernel, float) {} +#else + +using float16 = phi::dtype::float16; +using bfloat16 = phi::dtype::bfloat16; +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +PD_REGISTER_KERNEL(subtract_raw, + KPS, + ALL_LAYOUT, + phi::SubtractRawKernel, + float, + double, + int16_t, + int, + int64_t, + float16, + bfloat16, + complex64, + complex128) {} +PD_REGISTER_KERNEL(subtract, + KPS, + ALL_LAYOUT, + phi::SubtractKernel, + float, + double, + int16_t, + int, + int64_t, + phi::dtype::float16, + complex64, + complex128, + phi::dtype::bfloat16) {} +#endif diff --git a/paddle/phi/kernels/selected_rows/elementwise_kernel.cc b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc similarity index 96% rename from paddle/phi/kernels/selected_rows/elementwise_kernel.cc rename to paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc index 7fba3244a60eedc032290aa82f99e9eb01e0ff6b..9fe8eef7ec82a3ea51206fda5bb8ddff4718eb98 100644 --- a/paddle/phi/kernels/selected_rows/elementwise_kernel.cc +++ b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/phi/kernels/selected_rows/elementwise_kernel.h" +#include "paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/bfloat16.h" @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/phi/common/float16.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" namespace phi { namespace sr { diff --git a/paddle/phi/kernels/selected_rows/elementwise_kernel.h b/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h similarity index 100% rename from paddle/phi/kernels/selected_rows/elementwise_kernel.h rename to paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.h diff --git a/paddle/phi/tests/api/test_elementwise_api.cc b/paddle/phi/tests/api/test_elementwise_api.cc index d4013a788c76cb4f049574ee893320088fe7ac2c..fb4c68a87cb25be3c3c154cbb5b334fb0e3702c8 100644 --- a/paddle/phi/tests/api/test_elementwise_api.cc +++ b/paddle/phi/tests/api/test_elementwise_api.cc @@ -22,6 +22,9 @@ limitations under the License. */ #include "paddle/phi/core/kernel_registry.h" PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(subtract, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(multiply, CPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(divide, CPU, ALL_LAYOUT); namespace paddle { namespace tests { diff --git a/paddle/phi/tests/kernels/test_elementwise_dev_api.cc b/paddle/phi/tests/kernels/test_elementwise_dev_api.cc index 9552c02976f30d11601967034815545f94ff1f97..36b200d4d44940b2b7f73e75ea7339517dd5f5e3 100644 --- a/paddle/phi/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/phi/tests/kernels/test_elementwise_dev_api.cc @@ -16,7 +16,10 @@ limitations under the License. */ #include #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/kernels/elementwise_kernel.h" +#include "paddle/phi/kernels/elementwise_add_kernel.h" +#include "paddle/phi/kernels/elementwise_divide_kernel.h" +#include "paddle/phi/kernels/elementwise_multiply_kernel.h" +#include "paddle/phi/kernels/elementwise_subtract_kernel.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/phi/api/lib/utils/allocator.h"