diff --git a/paddle/fluid/operators/determinant_op.h b/paddle/fluid/operators/determinant_op.h index e6de0ee3548b7442ac5e059331502cac441020e5..a1fe8a25665ec84b38a535f541a2cbe33d0a7fcf 100644 --- a/paddle/fluid/operators/determinant_op.h +++ b/paddle/fluid/operators/determinant_op.h @@ -22,6 +22,7 @@ #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/complex_kernel.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/common_shape.h" #include "paddle/phi/kernels/funcs/diag_functor.h" @@ -30,7 +31,6 @@ #include "paddle/phi/kernels/funcs/unsqueeze.h" #include "paddle/phi/kernels/impl/determinant_grad_kernel_impl.h" #include "paddle/phi/kernels/impl/determinant_kernel_impl.h" -#include "paddle/phi/kernels/math_kernel.h" #include "paddle/phi/kernels/matmul_kernel.h" #include "paddle/phi/kernels/transpose_kernel.h" diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h index 5e4c83e1a45ebdb96a0e764cfa2d3997442ae1ea..6daf05a9d778dfb194225f59321ffc3eb40235db 100644 --- a/paddle/fluid/operators/eig_op.h +++ b/paddle/fluid/operators/eig_op.h @@ -21,13 +21,13 @@ #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/complex_kernel.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/funcs/complex_functors.h" #include "paddle/phi/kernels/funcs/diag_functor.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/slice.h" #include "paddle/phi/kernels/funcs/unsqueeze.h" -#include "paddle/phi/kernels/math_kernel.h" #include "paddle/phi/kernels/matmul_kernel.h" #include "paddle/phi/kernels/transpose_kernel.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index a995877778e4770ea8ae64c051a71b31c1fb1e29..c28abb916b7a7d59d5a1974bed63e43b2f32ef2c 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -27,7 +27,7 @@ limitations under the License. */ // only can include the headers in paddle/phi/include dirs #include "paddle/phi/kernels/elementwise_grad_kernel.h" -#include "paddle/phi/kernels/math_kernel.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #endif namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h index 58a3123c7e332f50b0830577436528f1e8df1cdf..6f4aba93d56e2a8227a8578067ac934d41243fb6 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h @@ -18,7 +18,7 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/platform/cpu_info.h" -#include "paddle/phi/kernels/math_kernel.h" +#include "paddle/phi/kernels/elementwise_kernel.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lu_op.h b/paddle/fluid/operators/lu_op.h index 6e2ac4617da4df8e4ebaf92d4193ef8b3368b97a..2414ae68438fd4e3cff94d60f400063b72116714 100644 --- a/paddle/fluid/operators/lu_op.h +++ b/paddle/fluid/operators/lu_op.h @@ -18,9 +18,9 @@ limitations under the License. */ #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/operators/set_value_op.h" #include "paddle/fluid/operators/svd_helper.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" #include "paddle/phi/kernels/funcs/tril_triu_compute.h" -#include "paddle/phi/kernels/math_kernel.h" #include "paddle/phi/kernels/triangular_solve_kernel.h" namespace paddle { diff --git a/paddle/phi/kernels/CMakeLists.txt b/paddle/phi/kernels/CMakeLists.txt index d16f5f725df50dad83ca53cb957a0bff3fcd5120..02b5b2d74ad2914f60a1df08e500b06733b95aaa 100644 --- a/paddle/phi/kernels/CMakeLists.txt +++ b/paddle/phi/kernels/CMakeLists.txt @@ -27,7 +27,7 @@ kernel_library(full_kernel DEPS ${COMMON_KERNEL_DEPS} empty_kernel) # Some kernels depend on some targets that are not commonly used. # These targets are not suitable for common dependencies. # In this case, you need to manually generate them here. -set(MANUAL_BUILD_KERNELS eigh_kernel gumbel_softmax_kernel gumbel_softmax_grad_kernel math_kernel +set(MANUAL_BUILD_KERNELS eigh_kernel gumbel_softmax_kernel gumbel_softmax_grad_kernel matrix_power_kernel matrix_power_grad_kernel maxout_kernel maxout_grad_kernel pool_kernel put_along_axis_kernel put_along_axis_grad_kernel segment_pool_kernel segment_pool_grad_kernel softmax_kernel softmax_grad_kernel take_along_axis_kernel take_along_axis_grad_kernel @@ -35,7 +35,6 @@ set(MANUAL_BUILD_KERNELS eigh_kernel gumbel_softmax_kernel gumbel_softmax_grad_k kernel_library(eigh_kernel DEPS ${COMMON_KERNEL_DEPS} lapack_function) kernel_library(gumbel_softmax_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) kernel_library(gumbel_softmax_grad_kernel DEPS ${COMMON_KERNEL_DEPS} softmax) -kernel_library(math_kernel DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel) kernel_library(matrix_power_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) kernel_library(matrix_power_grad_kernel DEPS ${COMMON_KERNEL_DEPS} matrix_inverse) kernel_library(maxout_kernel DEPS ${COMMON_KERNEL_DEPS} maxouting) diff --git a/paddle/phi/kernels/cpu/elementwise_kernel.cc b/paddle/phi/kernels/cpu/elementwise_kernel.cc index 37ad18df56ec30c838dd5bd03c484d7889e976c0..095d11720ce26622c31e517286d6f656869e62ff 100644 --- a/paddle/phi/kernels/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_kernel.cc @@ -12,10 +12,81 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/phi/kernels/cpu/elementwise.h" +#include "paddle/phi/api/ext/dispatch.h" #include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/common/bfloat16.h" +#include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" +namespace phi { + +#define DEFINE_CPU_ELEMENTWISE_OP(name) \ + template \ + void name##RawKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + dev_ctx.template Alloc(out); \ + if (x.dims() == y.dims()) { \ + SameDimsElementwiseCompute>()( \ + dev_ctx, x, y, out); \ + } else { \ + auto x_dims = x.dims(); \ + auto y_dims = y.dims(); \ + if (x_dims.size() >= y_dims.size()) { \ + funcs::ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, funcs::name##Functor(), out); \ + } else { \ + funcs::ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, funcs::Inverse##name##Functor(), out); \ + } \ + } \ + } + +template +void DivideRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out) { + // allocate memory for out + dev_ctx.template Alloc(out); + if (x.dims() == y.dims() && std::is_floating_point::value) { + SameDimsElementwiseCompute>()( + dev_ctx, x, y, out); + } else { + auto x_dims = x.dims(); + auto y_dims = y.dims(); + if (x_dims.size() >= y_dims.size()) { + funcs::ElementwiseCompute, T>( + dev_ctx, x, y, axis, funcs::DivideFunctor(), out); + } else { + funcs::ElementwiseCompute, T>( + dev_ctx, x, y, axis, funcs::InverseDivideFunctor(), out); + } + } +} + +// Create the definition of Add +DEFINE_CPU_ELEMENTWISE_OP(Add) + +// Create the definition of Subtract +DEFINE_CPU_ELEMENTWISE_OP(Subtract) + +// Create the definition of Multiply +DEFINE_CPU_ELEMENTWISE_OP(Multiply) + +} // namespace phi + +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::phi::dtype::bfloat16; + PD_REGISTER_KERNEL(elementwise_fmax, CPU, ALL_LAYOUT, @@ -33,3 +104,49 @@ PD_REGISTER_KERNEL(elementwise_fmin, double, int, int64_t) {} + +PD_REGISTER_KERNEL(add_raw, + CPU, + ALL_LAYOUT, + phi::AddRawKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128) {} +PD_REGISTER_KERNEL(subtract_raw, + CPU, + ALL_LAYOUT, + phi::SubtractRawKernel, + float, + double, + int16_t, + int, + int64_t, + complex64, + complex128, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(divide_raw, + CPU, + ALL_LAYOUT, + phi::DivideRawKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} +PD_REGISTER_KERNEL(multiply_raw, + CPU, + ALL_LAYOUT, + phi::MultiplyRawKernel, + float, + double, + int, + int64_t, + bool, + complex64, + complex128, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/cpu/math_kernel.cc b/paddle/phi/kernels/cpu/math_kernel.cc deleted file mode 100644 index 0047940fd1704be2862a4a0a4bf46f4886221464..0000000000000000000000000000000000000000 --- a/paddle/phi/kernels/cpu/math_kernel.cc +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/kernels/math_kernel.h" - -#include "paddle/phi/api/ext/dispatch.h" -#include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/common/scalar.h" -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/cpu/elementwise.h" -#include "paddle/phi/kernels/funcs/elementwise_base.h" -#include "paddle/phi/kernels/funcs/elementwise_functor.h" - -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/framework/eigen.h" -#include "paddle/phi/common/bfloat16.h" -#include "paddle/phi/common/complex.h" - -namespace phi { - -#define DEFINE_CPU_ELEMENTWISE_OP(name) \ - template \ - void name##RawKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - dev_ctx.template Alloc(out); \ - if (x.dims() == y.dims()) { \ - SameDimsElementwiseCompute>()( \ - dev_ctx, x, y, out); \ - } else { \ - auto x_dims = x.dims(); \ - auto y_dims = y.dims(); \ - if (x_dims.size() >= y_dims.size()) { \ - funcs::ElementwiseCompute, T>( \ - dev_ctx, x, y, axis, funcs::name##Functor(), out); \ - } else { \ - funcs::ElementwiseCompute, T>( \ - dev_ctx, x, y, axis, funcs::Inverse##name##Functor(), out); \ - } \ - } \ - } - -template -void DivideRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - dev_ctx.template Alloc(out); - if (x.dims() == y.dims() && std::is_floating_point::value) { - SameDimsElementwiseCompute>()( - dev_ctx, x, y, out); - } else { - auto x_dims = x.dims(); - auto y_dims = y.dims(); - if (x_dims.size() >= y_dims.size()) { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::DivideFunctor(), out); - } else { - funcs::ElementwiseCompute, T>( - dev_ctx, x, y, axis, funcs::InverseDivideFunctor(), out); - } - } -} - -// Create the definition of Add -DEFINE_CPU_ELEMENTWISE_OP(Add) - -// Create the definition of Subtract -DEFINE_CPU_ELEMENTWISE_OP(Subtract) - -// Create the definition of Multiply -DEFINE_CPU_ELEMENTWISE_OP(Multiply) - -} // namespace phi - -using complex64 = ::phi::dtype::complex; -using complex128 = ::phi::dtype::complex; - -// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 -// using bfloat16 = ::phi::dtype::bfloat16; -PD_REGISTER_KERNEL(add_raw, - CPU, - ALL_LAYOUT, - phi::AddRawKernel, - float, - double, - int16_t, - int, - int64_t, - complex64, - complex128) {} -PD_REGISTER_KERNEL(subtract_raw, - CPU, - ALL_LAYOUT, - phi::SubtractRawKernel, - float, - double, - int16_t, - int, - int64_t, - complex64, - complex128, - phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(divide_raw, - CPU, - ALL_LAYOUT, - phi::DivideRawKernel, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PD_REGISTER_KERNEL(multiply_raw, - CPU, - ALL_LAYOUT, - phi::MultiplyRawKernel, - float, - double, - int, - int64_t, - bool, - complex64, - complex128, - phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc b/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc index 636018ffa68003bc85af22e580bc4ae0768fb1b7..ae1e406d16eec44168b2b7232586293bf90e4bd8 100644 --- a/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc +++ b/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc @@ -17,12 +17,12 @@ #include #include #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/compare_functors.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" #include "paddle/phi/kernels/impl/matrix_rank_kernel_impl.h" -#include "paddle/phi/kernels/math_kernel.h" #include "paddle/phi/kernels/reduce_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/math_kernel.cc b/paddle/phi/kernels/elementwise_kernel.cc similarity index 98% rename from paddle/phi/kernels/math_kernel.cc rename to paddle/phi/kernels/elementwise_kernel.cc index 5aad2375ebb85a52684946fe35b2a5b17a0b9efd..9d10a48c9e0795d8914c0c6cfb49b7686575cfac 100644 --- a/paddle/phi/kernels/math_kernel.cc +++ b/paddle/phi/kernels/elementwise_kernel.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/math_kernel.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/elementwise_kernel.h b/paddle/phi/kernels/elementwise_kernel.h index c1e73ad91c67d415437829d5fc731ac91a5722f5..b064ecc454c592df49670205163e73d2d3b249b3 100644 --- a/paddle/phi/kernels/elementwise_kernel.h +++ b/paddle/phi/kernels/elementwise_kernel.h @@ -15,7 +15,7 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/device_context.h" +#include "paddle/phi/infermeta/binary.h" namespace phi { @@ -33,4 +33,100 @@ void ElementwiseFMinKernel(const Context& dev_ctx, int axis, DenseTensor* out); +template +void AddRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void AddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +void SubtractRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void SubtractKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +void DivideRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +void MultiplyRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void MultiplyKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +DenseTensor Add(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + AddKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +template +DenseTensor Subtract(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + SubtractKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +template +DenseTensor Divide(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + DivideKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +template +DenseTensor Multiply(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + MultiplyKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + } // namespace phi diff --git a/paddle/phi/kernels/gpu/elementwise_kernel.cu b/paddle/phi/kernels/gpu/elementwise_kernel.cu index 2cffc68fa0648937b96095f5bd58210adaf865b3..a57d89013f921e3adb5587c70b7bbb12c383de61 100644 --- a/paddle/phi/kernels/gpu/elementwise_kernel.cu +++ b/paddle/phi/kernels/gpu/elementwise_kernel.cu @@ -13,9 +13,50 @@ // limitations under the License. #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/complex.h" +#include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" +namespace phi { + +#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ + template \ + void name##RawKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + std::vector inputs; \ + std::vector outputs; \ + inputs.emplace_back(&x); \ + inputs.emplace_back(&y); \ + outputs.emplace_back(out); \ + dev_ctx.template Alloc(out); \ + funcs::BroadcastKernel( \ + dev_ctx, inputs, &outputs, axis, funcs::name##Functor()); \ + } + +/** + * Kernels + */ + +// Create the definition of Add +DEFINE_CUDA_ELEMENTWISE_OP(Add) +// Create the definition of Subtract +DEFINE_CUDA_ELEMENTWISE_OP(Subtract) +// Create the definition of Multiply +DEFINE_CUDA_ELEMENTWISE_OP(Multiply) +// Create the definition of Divide +DEFINE_CUDA_ELEMENTWISE_OP(Divide) + +} // namespace phi + +using float16 = phi::dtype::float16; +using bfloat16 = phi::dtype::bfloat16; +using complex64 = ::phi::dtype::complex; +using complex128 = ::phi::dtype::complex; + PD_REGISTER_KERNEL(elementwise_fmax, GPU, ALL_LAYOUT, @@ -33,3 +74,55 @@ PD_REGISTER_KERNEL(elementwise_fmin, double, int, int64_t) {} + +PD_REGISTER_KERNEL(add_raw, + GPU, + ALL_LAYOUT, + phi::AddRawKernel, + float, + double, + int16_t, + int, + int64_t, + float16, + bfloat16, + complex64, + complex128) {} +PD_REGISTER_KERNEL(subtract_raw, + GPU, + ALL_LAYOUT, + phi::SubtractRawKernel, + float, + double, + int16_t, + int, + int64_t, + float16, + bfloat16, + complex64, + complex128) {} +PD_REGISTER_KERNEL(divide_raw, + GPU, + ALL_LAYOUT, + phi::DivideRawKernel, + float, + double, + int, + int64_t, + float16, + bfloat16, + complex64, + complex128) {} +PD_REGISTER_KERNEL(multiply_raw, + GPU, + ALL_LAYOUT, + phi::MultiplyRawKernel, + float, + double, + int, + int64_t, + bool, + float16, + complex64, + complex128, + bfloat16) {} diff --git a/paddle/phi/kernels/gpu/math_kernel.cu b/paddle/phi/kernels/gpu/math_kernel.cu deleted file mode 100644 index d33f216468220da7ef9fc09533226e8fdd0c702f..0000000000000000000000000000000000000000 --- a/paddle/phi/kernels/gpu/math_kernel.cu +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/phi/kernels/math_kernel.h" - -#include "paddle/phi/backends/gpu/gpu_context.h" -#include "paddle/phi/kernels/funcs/broadcast_function.h" -#include "paddle/phi/kernels/funcs/elementwise_functor.h" -#include "paddle/phi/kernels/gpu/reduce.h" - -#ifdef __NVCC__ -#include "cub/cub.cuh" -#endif -#ifdef __HIPCC__ -#include -namespace cub = hipcub; -#endif - -#include "paddle/phi/common/complex.h" -#include "paddle/phi/common/float16.h" -#include "paddle/phi/core/compat/convert_utils.h" -#include "paddle/phi/core/enforce.h" -#include "paddle/phi/core/kernel_registry.h" - -namespace phi { - -#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ - template \ - void name##RawKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - std::vector inputs; \ - std::vector outputs; \ - inputs.emplace_back(&x); \ - inputs.emplace_back(&y); \ - outputs.emplace_back(out); \ - dev_ctx.template Alloc(out); \ - funcs::BroadcastKernel( \ - dev_ctx, inputs, &outputs, axis, funcs::name##Functor()); \ - } - -/** - * Kernels - */ - -// Create the definition of Add -DEFINE_CUDA_ELEMENTWISE_OP(Add) -// Create the definition of Subtract -DEFINE_CUDA_ELEMENTWISE_OP(Subtract) -// Create the definition of Multiply -DEFINE_CUDA_ELEMENTWISE_OP(Multiply) -// Create the definition of Divide -DEFINE_CUDA_ELEMENTWISE_OP(Divide) - -} // namespace phi - -using float16 = phi::dtype::float16; -using bfloat16 = phi::dtype::bfloat16; -using complex64 = ::phi::dtype::complex; -using complex128 = ::phi::dtype::complex; - -PD_REGISTER_KERNEL(add_raw, - GPU, - ALL_LAYOUT, - phi::AddRawKernel, - float, - double, - int16_t, - int, - int64_t, - float16, - bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(subtract_raw, - GPU, - ALL_LAYOUT, - phi::SubtractRawKernel, - float, - double, - int16_t, - int, - int64_t, - float16, - bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(divide_raw, - GPU, - ALL_LAYOUT, - phi::DivideRawKernel, - float, - double, - int, - int64_t, - float16, - bfloat16, - complex64, - complex128) {} -PD_REGISTER_KERNEL(multiply_raw, - GPU, - ALL_LAYOUT, - phi::MultiplyRawKernel, - float, - double, - int, - int64_t, - bool, - float16, - complex64, - complex128, - bfloat16) {} diff --git a/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu b/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu index 7796132ec07f433d8495d1dba197c06d536e1338..66ba30f7ce6945693a974733c77a47f0d328e50b 100644 --- a/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu +++ b/paddle/phi/kernels/gpu/matrix_rank_tol_kernel.cu @@ -23,11 +23,11 @@ #include "paddle/phi/backends/dynload/cusolver.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/abs_kernel.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/compare_functors.h" #include "paddle/phi/kernels/impl/matrix_rank_kernel_impl.h" -#include "paddle/phi/kernels/math_kernel.h" #include "paddle/phi/kernels/reduce_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h b/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h index 72741e6d3a01ae374c43a24ac519ff5106b5733e..e3ea10705d24e90a76246d439c6d9263e072bc39 100644 --- a/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h @@ -19,6 +19,7 @@ #include "paddle/phi/kernels/cholesky_solve_kernel.h" #include "paddle/phi/kernels/complex_kernel.h" #include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/expand_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" @@ -27,7 +28,6 @@ #include "paddle/phi/kernels/funcs/for_range.h" #include "paddle/phi/kernels/funcs/matrix_reduce.h" #include "paddle/phi/kernels/funcs/tril_triu_compute.h" -#include "paddle/phi/kernels/math_kernel.h" #include "paddle/phi/kernels/transpose_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h index 038ef0c214bc73b41fc3aff661e296207d615df1..e4356e9af39372cd330991502078a13520d05586 100644 --- a/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/determinant_grad_kernel_impl.h @@ -17,13 +17,13 @@ #include "paddle/phi/kernels/determinant_grad_kernel.h" #include "paddle/phi/kernels/copy_kernel.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/for_range.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/matrix_inverse.h" #include "paddle/phi/kernels/funcs/unsqueeze.h" -#include "paddle/phi/kernels/math_kernel.h" #include "paddle/phi/kernels/transpose_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h b/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h index 5b71fd7fa3a5ecd1c864c155df2586d293d3d2e6..5e06435b28e2719c2e9fc18de034073f9674a977 100644 --- a/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/eigh_grad_kernel_impl.h @@ -16,11 +16,11 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/complex_kernel.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/phi/kernels/funcs/diag_functor.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/funcs/unsqueeze.h" -#include "paddle/phi/kernels/math_kernel.h" #include "paddle/phi/kernels/matmul_kernel.h" #include "paddle/phi/kernels/transpose_kernel.h" diff --git a/paddle/phi/kernels/math_kernel.h b/paddle/phi/kernels/math_kernel.h deleted file mode 100644 index ddc3a46e989f5cc86e294eb16ca0f82fcd7d8115..0000000000000000000000000000000000000000 --- a/paddle/phi/kernels/math_kernel.h +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/infermeta/binary.h" -namespace phi { - -template -void AddRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void AddKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - -template -void SubtractRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void SubtractKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - -template -void DivideRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void DivideKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - -template -void MultiplyRawKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void MultiplyKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - DenseTensor* out); - -template -DenseTensor Add(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - AddKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - -template -DenseTensor Subtract(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - SubtractKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - -template -DenseTensor Divide(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - DivideKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - -template -DenseTensor Multiply(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& y) { - DenseTensor dense_out; - MetaTensor meta_out(&dense_out); - ElementwiseInferMeta(x, y, &meta_out); - MultiplyKernel(dev_ctx, x, y, &dense_out); - return dense_out; -} - -} // namespace phi diff --git a/paddle/phi/tests/kernels/test_elementwise_dev_api.cc b/paddle/phi/tests/kernels/test_elementwise_dev_api.cc index 3e5f96507415624750eb297953719f397e294230..9552c02976f30d11601967034815545f94ff1f97 100644 --- a/paddle/phi/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/phi/tests/kernels/test_elementwise_dev_api.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/kernels/math_kernel.h" +#include "paddle/phi/kernels/elementwise_kernel.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/phi/api/lib/utils/allocator.h"