diff --git a/paddle/fluid/operators/cholesky_solve_op.h b/paddle/fluid/operators/cholesky_solve_op.h index f3b0056165426c7cc46409abece3589edad96269..94b68bff8f446200de563897eb9e9d10b433987a 100644 --- a/paddle/fluid/operators/cholesky_solve_op.h +++ b/paddle/fluid/operators/cholesky_solve_op.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/operators/triangular_solve_op.h" #include "paddle/fluid/platform/complex.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { // namespace operators @@ -205,7 +205,7 @@ class CholeskySolveGradKernel : public framework::OpKernel { auto pt_x = paddle::experimental::MakePtenDenseTensor(commonterm); auto pt_y = paddle::experimental::MakePtenDenseTensor(commonterm_conj); auto pt_z = paddle::experimental::MakePtenDenseTensor(commonterm); - pten::Add(dev_ctx, *pt_x.get(), *pt_y.get(), -1, pt_z.get()); + pten::AddKernel(dev_ctx, *pt_x.get(), *pt_y.get(), -1, pt_z.get()); auto mat_dim_u = math::CreateMatrixDescriptor(u_bst.dims(), 0, false); auto mat_dim_c = diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index a4567beeb4f3d9a1e913e0853bfaee19a1e9124f..d6d79d166d00a3610af7d22aa845a753437626ad 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -25,7 +25,7 @@ limitations under the License. */ // only can include the headers in paddle/pten/include dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/include/core.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { @@ -68,7 +68,7 @@ class ElementwiseAddKernel : public framework::OpKernel { auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); - pten::Add(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); + pten::AddKernel(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); } }; diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h index f3ba5050c4f53695baa92c07f7bb93eb59871de5..c886644bbdd1b63139f4cd98890b84fd63bce330 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h @@ -28,7 +28,7 @@ limitations under the License. */ // only can include the headers in paddle/pten/include dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/include/core.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { @@ -62,7 +62,7 @@ class ElementwiseDivKernel : public framework::OpKernel { auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); - pten::Divide(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); + pten::DivideKernel(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); } }; diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu index e131bc4974661c613928ac8fee807841bb85ba58..12e0062a698be6d386893c5c1fb09719ceb1c609 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu @@ -57,7 +57,8 @@ class ElementwiseMulKernel auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y_lod); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod); - pten::Multiply(cuda_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); + pten::MultiplyKernel(cuda_ctx, *pt_x.get(), *pt_y.get(), axis, + pt_z.get()); } else { PADDLE_THROW(platform::errors::InvalidArgument( "X's type[%s] is not supported by elementwise_op. X's type should be " diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h index 8b43f82e6b6a1092565331af2bc050e23a3f8ca8..3b0f0725722101ae3248e651c8279d3de464a728 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h @@ -24,7 +24,7 @@ limitations under the License. */ // only can include the headers in paddle/pten/include dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/include/core.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { @@ -129,7 +129,8 @@ class ElementwiseMulKernel : public framework::OpKernel { auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod); - pten::Multiply(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); + pten::MultiplyKernel(dev_ctx, *pt_x.get(), *pt_y.get(), axis, + pt_z.get()); } else { PADDLE_THROW(platform::errors::InvalidArgument( "X's type[%s] is not supported by elementwise_op. X's type should be " diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.h b/paddle/fluid/operators/elementwise/elementwise_sub_op.h index 09a33788d4133f6466e03b4b4e619f94401a5345..6a51d7c2a45ad5d31fe58f1b1c60a2f2541c3ce1 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.h @@ -22,7 +22,7 @@ limitations under the License. */ // only can include the headers in paddle/pten/include dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/include/core.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { @@ -56,7 +56,8 @@ class ElementwiseSubKernel : public framework::OpKernel { auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); - pten::Subtract(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); + pten::SubtractKernel(dev_ctx, *pt_x.get(), *pt_y.get(), axis, + pt_z.get()); } }; diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index 97be4c19c970b4e08416108096e4f9a093f08796..05b321c50c1c4e07be3fd16bdb9ca860448384b0 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -28,9 +28,5 @@ get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS) # keep this message for debug, remove it later if needless message(STATUS "All standard pten kernels: ${pten_kernels}") set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels}) -set(PTEN_DEPS ${PTEN_DEPS} math_cpu) -if(WITH_GPU OR WITH_ROCM) - set(PTEN_DEPS ${PTEN_DEPS} math_gpu) -endif() cc_library(pten SRCS all.cc DEPS ${PTEN_DEPS}) diff --git a/paddle/pten/api/lib/kernel_declare.h b/paddle/pten/api/lib/kernel_declare.h index 484063df478aaf4d8245e157fce640967bb6d5d6..4d3143ef09ccc62be069f2971ad0a17666547105 100644 --- a/paddle/pten/api/lib/kernel_declare.h +++ b/paddle/pten/api/lib/kernel_declare.h @@ -19,9 +19,3 @@ limitations under the License. */ // TODO(chenweihang) After the kernel is split into a single file, // the kernel declare statement is automatically generated according to the // file name of the kernel, and this header file will be removed - -PT_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT); - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PT_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT); -#endif diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h index 876834cea7806daf04e381551e8a4d24a4b74bd6..9abfa297a9452026918667f5f4fb9f00aec962d3 100644 --- a/paddle/pten/include/math.h +++ b/paddle/pten/include/math.h @@ -18,8 +18,7 @@ limitations under the License. */ #include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/complex_kernel.h" -#include "paddle/pten/kernels/cpu/math.h" -#include "paddle/pten/kernels/gpu/math.h" +#include "paddle/pten/kernels/math_kernel.h" #include "paddle/pten/kernels/scale_kernel.h" namespace pten { @@ -46,7 +45,7 @@ DenseTensor Mean(const ContextT& dev_ctx, dev_ctx.GetPlace()), std::move(out_meta)); bool reduce_all = false; - Mean(dev_ctx, x, axis, keep_dim, reduce_all, &dense_out); + Mean(dev_ctx, x, axis, keep_dim, reduce_all, &dense_out); return dense_out; } @@ -66,7 +65,8 @@ DenseTensor Sum(const ContextT& dev_ctx, // so use default value(false) is OK. bool reduce_all = false; - Sum(dev_ctx, x, axis, keep_dim, reduce_all, out_meta.dtype, &dense_out); + Sum( + dev_ctx, x, axis, keep_dim, reduce_all, out_meta.dtype, &dense_out); return dense_out; } @@ -85,62 +85,6 @@ DenseTensor Scale(const ContextT& dev_ctx, return dense_out; } -template -DenseTensor Add(const ContextT& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Add(dev_ctx, x, y, axis, &dense_out); - return dense_out; -} - -template -DenseTensor Subtract(const ContextT& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Subtract(dev_ctx, x, y, axis, &dense_out); - return dense_out; -} - -template -DenseTensor Divide(const ContextT& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Divide(dev_ctx, x, y, axis, &dense_out); - return dense_out; -} - -template -DenseTensor Multiply(const ContextT& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Multiply(dev_ctx, x, y, axis, &dense_out); - return dense_out; -} - template DenseTensor Conj(const ContextT& dev_ctx, const DenseTensor& x) { auto out_meta = UnchangedInferMeta(x.meta()); diff --git a/paddle/pten/kernels/CMakeLists.txt b/paddle/pten/kernels/CMakeLists.txt index 7a785d86921aa3ca48b15b6824f7d9066c0a9aff..4c705767f4c2ff298ce648a63fdebb78d0f69f18 100644 --- a/paddle/pten/kernels/CMakeLists.txt +++ b/paddle/pten/kernels/CMakeLists.txt @@ -24,11 +24,17 @@ endif() # pten depends all pten kernel targets set_property(GLOBAL PROPERTY PTEN_KERNELS "") -set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory) -set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function) +set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils) +set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta) +set(MATH_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel pten_transpose_cpu) +if(WITH_GPU OR WITH_ROCM) + set(MATH_KERNEL_DEPS ${MATH_KERNEL_DEPS} pten_transpose_gpu) +endif() + # auto build kernel targets by cmake -register_kernels(DEPS ${COMMON_KERNEL_DEPS}) +register_kernels(EXCLUDES math_kernel DEPS ${COMMON_KERNEL_DEPS}) +kernel_library(math_kernel DEPS ${MATH_KERNEL_DEPS}) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) diff --git a/paddle/pten/kernels/cpu/math.cc b/paddle/pten/kernels/cpu/math.cc index ee01cf65d02e54d417f9f7536c8e935fcadb2dd4..b4642d475d56639aff2fde22e3b4ff8c37515d57 100644 --- a/paddle/pten/kernels/cpu/math.cc +++ b/paddle/pten/kernels/cpu/math.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,138 +12,4 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/pten/kernels/cpu/math.h" - -#include "paddle/pten/api/ext/dispatch.h" -#include "paddle/pten/kernels/hybird/cpu/elementwise.h" -#include "paddle/pten/kernels/hybird/eigen/reduce.h" -#include "paddle/pten/kernels/hybird/general/elementwise_functor.h" -#include "paddle/pten/kernels/hybird/general/reduce_impl.h" - -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/complex.h" - -namespace pten { - -template -void Mean(const CPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DenseTensor* out) { - auto out_dtype = x.dtype(); - pten::general::Reduce( - dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); -} - -template -void Divide(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - out->mutable_data(); - if (x.dims() == y.dims() && std::is_floating_point::value) { - SameDimsElementwiseCompute>()( - dev_ctx, x, y, out); - } else { - auto x_dims = x.dims(); - auto y_dims = y.dims(); - if (x_dims.size() >= y_dims.size()) { - ElementwiseCompute, T>( - dev_ctx, x, y, axis, general::DivideFunctor(), out); - } else { - ElementwiseCompute, T>( - dev_ctx, x, y, axis, general::InverseDivideFunctor(), out); - } - } -} - -template -void Sum(const CPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DataType out_dtype, - DenseTensor* out) { - pten::general::Reduce( - dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); -} - -// Create the definition of Add -DEFINE_CPU_ELEMENTWISE_OP(Add) - -// Create the definition of Subtract -DEFINE_CPU_ELEMENTWISE_OP(Subtract) - -// Create the definition of Multiply -DEFINE_CPU_ELEMENTWISE_OP(Multiply) - -} // namespace pten - -using complex64 = ::paddle::platform::complex; -using complex128 = ::paddle::platform::complex; - -// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 -// using bfloat16 = ::paddle::platform::bfloat16; -PT_REGISTER_KERNEL(mean, CPU, ALL_LAYOUT, pten::Mean, float, double, bool) {} -PT_REGISTER_KERNEL(add, - CPU, - ALL_LAYOUT, - pten::Add, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PT_REGISTER_KERNEL(subtract, - CPU, - ALL_LAYOUT, - pten::Subtract, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PT_REGISTER_KERNEL(divide, - CPU, - ALL_LAYOUT, - pten::Divide, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PT_REGISTER_KERNEL(multiply, - CPU, - ALL_LAYOUT, - pten::Multiply, - float, - double, - int, - int64_t, - bool, - complex64, - complex128) {} -PT_REGISTER_KERNEL(sum, - CPU, - ALL_LAYOUT, - pten::Sum, - bool, - float, - double, - paddle::platform::float16, - int, - int64_t, - complex64, - complex128) { - kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); -} +namespace pten {} // namespace pten diff --git a/paddle/pten/kernels/cpu/math.h b/paddle/pten/kernels/cpu/math.h deleted file mode 100644 index 1a179218b4c4c92022f798a670b8327a8fb78c57..0000000000000000000000000000000000000000 --- a/paddle/pten/kernels/cpu/math.h +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/pten/backends/cpu/cpu_context.h" -#include "paddle/pten/common/scalar.h" -#include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/core/kernel_registry.h" - -namespace pten { - -template -void Mean(const CPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DenseTensor* out); - -template -void Add(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Subtract(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Divide(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Multiply(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); -template -void Sum(const CPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DataType out_dtype, - DenseTensor* out); - -} // namespace pten - -#define DEFINE_CPU_ELEMENTWISE_OP(name) \ - template \ - void name(const CPUContext& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - out->mutable_data(); \ - if (x.dims() == y.dims()) { \ - SameDimsElementwiseCompute< \ - general::SameDims##name##Functor>()( \ - dev_ctx, x, y, out); \ - } else { \ - auto x_dims = x.dims(); \ - auto y_dims = y.dims(); \ - if (x_dims.size() >= y_dims.size()) { \ - ElementwiseCompute, T>( \ - dev_ctx, x, y, axis, general::name##Functor(), out); \ - } else { \ - ElementwiseCompute, T>( \ - dev_ctx, x, y, axis, general::Inverse##name##Functor(), out); \ - } \ - } \ - } diff --git a/paddle/pten/kernels/cpu/math_kernel.cc b/paddle/pten/kernels/cpu/math_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..152d945144f6cd2723a4a1eaa0c94037dfade3ca --- /dev/null +++ b/paddle/pten/kernels/cpu/math_kernel.cc @@ -0,0 +1,178 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/kernels/math_kernel.h" + +#include "paddle/pten/api/ext/dispatch.h" +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/hybird/cpu/elementwise.h" +#include "paddle/pten/kernels/hybird/eigen/reduce.h" +#include "paddle/pten/kernels/hybird/general/elementwise_functor.h" +#include "paddle/pten/kernels/hybird/general/reduce_impl.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/fluid/platform/complex.h" + +namespace pten { + +#define DEFINE_CPU_ELEMENTWISE_OP(name) \ + template \ + void name##Kernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + out->mutable_data(); \ + if (x.dims() == y.dims()) { \ + SameDimsElementwiseCompute< \ + general::SameDims##name##Functor>()( \ + dev_ctx, x, y, out); \ + } else { \ + auto x_dims = x.dims(); \ + auto y_dims = y.dims(); \ + if (x_dims.size() >= y_dims.size()) { \ + ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, general::name##Functor(), out); \ + } else { \ + ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, general::Inverse##name##Functor(), out); \ + } \ + } \ + } + +template +void Mean(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out) { + auto out_dtype = x.dtype(); + pten::general::Reduce( + dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); +} + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out) { + // allocate memory for out + out->mutable_data(); + if (x.dims() == y.dims() && std::is_floating_point::value) { + SameDimsElementwiseCompute>()( + dev_ctx, x, y, out); + } else { + auto x_dims = x.dims(); + auto y_dims = y.dims(); + if (x_dims.size() >= y_dims.size()) { + ElementwiseCompute, T>( + dev_ctx, x, y, axis, general::DivideFunctor(), out); + } else { + ElementwiseCompute, T>( + dev_ctx, x, y, axis, general::InverseDivideFunctor(), out); + } + } +} + +template +void Sum(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType out_dtype, + DenseTensor* out) { + pten::general::Reduce( + dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); +} + +// Create the definition of Add +DEFINE_CPU_ELEMENTWISE_OP(Add) + +// Create the definition of Subtract +DEFINE_CPU_ELEMENTWISE_OP(Subtract) + +// Create the definition of Multiply +DEFINE_CPU_ELEMENTWISE_OP(Multiply) + +} // namespace pten + +using complex64 = ::paddle::platform::complex; +using complex128 = ::paddle::platform::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::paddle::platform::bfloat16; +PT_REGISTER_CTX_KERNEL(mean, CPU, ALL_LAYOUT, pten::Mean, float, double, bool) { +} +PT_REGISTER_CTX_KERNEL(add, + CPU, + ALL_LAYOUT, + pten::AddKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(subtract, + CPU, + ALL_LAYOUT, + pten::SubtractKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(divide, + CPU, + ALL_LAYOUT, + pten::DivideKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(multiply, + CPU, + ALL_LAYOUT, + pten::MultiplyKernel, + float, + double, + int, + int64_t, + bool, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(sum, + CPU, + ALL_LAYOUT, + pten::Sum, + bool, + float, + double, + paddle::platform::float16, + int, + int64_t, + complex64, + complex128) { + kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); +} diff --git a/paddle/pten/kernels/gpu/CMakeLists.txt b/paddle/pten/kernels/gpu/CMakeLists.txt index 51c666947b2f2cbc36a56584b97b4ad471ffb7a9..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/paddle/pten/kernels/gpu/CMakeLists.txt +++ b/paddle/pten/kernels/gpu/CMakeLists.txt @@ -1,5 +0,0 @@ -if(WITH_GPU) - nv_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu cast_kernel copy_kernel) -elseif(WITH_ROCM) - hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu cast_kernel copy_kernel) -endif() diff --git a/paddle/pten/kernels/gpu/math.cu b/paddle/pten/kernels/gpu/math.cu deleted file mode 100644 index e02403ac426f2c9544ac7300aec4099d0d010417..0000000000000000000000000000000000000000 --- a/paddle/pten/kernels/gpu/math.cu +++ /dev/null @@ -1,159 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/pten/kernels/gpu/math.h" - -#include "paddle/pten/kernels/hybird/cuda/elementwise/elementwise.h" -#include "paddle/pten/kernels/hybird/cuda/reduce/reduce.h" -#include "paddle/pten/kernels/hybird/general/elementwise_functor.h" -#include "paddle/pten/kernels/hybird/general/reduce_impl.h" - -#ifdef __NVCC__ -#include "cub/cub.cuh" -#endif -#ifdef __HIPCC__ -#include -namespace cub = hipcub; -#endif - -#include "paddle/fluid/platform/complex.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/pten/api/lib/utils/tensor_utils.h" -#include "paddle/pten/core/convert_utils.h" -#include "paddle/pten/core/kernel_registry.h" - -namespace kps = paddle::operators::kernel_primitives; - -namespace pten { - -/** - * Util Functors - */ - -template -struct DivideFunctor { - HOSTDEVICE explicit inline DivideFunctor(int n) - : n_inv(static_cast(1.0 / n)) {} - - HOSTDEVICE inline T operator()(const T& x) const { return x * n_inv; } - - private: - T n_inv; -}; - -/** - * Kernels - */ - -template -void Mean(const GPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DenseTensor* out) { - auto out_dtype = x.dtype(); - pten::Reduce( - dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); -} - -// Create the definition of Add -DEFINE_CUDA_ELEMENTWISE_OP(Add) -// Create the definition of Subtract -DEFINE_CUDA_ELEMENTWISE_OP(Subtract) -// Create the definition of Multiply -DEFINE_CUDA_ELEMENTWISE_OP(Multiply) -// Create the definition of Divide -DEFINE_CUDA_ELEMENTWISE_OP(Divide) - -template -void Sum(const GPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DataType out_dtype, - DenseTensor* out) { - pten::Reduce( - dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); -} - -} // namespace pten - -using float16 = paddle::platform::float16; -using complex64 = ::paddle::platform::complex; -using complex128 = ::paddle::platform::complex; - -PT_REGISTER_KERNEL( - mean, GPU, ALL_LAYOUT, pten::Mean, float, double, bool, float16) {} -PT_REGISTER_KERNEL(add, - GPU, - ALL_LAYOUT, - pten::Add, - float, - double, - int, - int64_t, - float16, - complex64, - complex128) {} -PT_REGISTER_KERNEL(subtract, - GPU, - ALL_LAYOUT, - pten::Subtract, - float, - double, - int, - int64_t, - float16, - complex64, - complex128) {} -PT_REGISTER_KERNEL(divide, - GPU, - ALL_LAYOUT, - pten::Divide, - float, - double, - int, - int64_t, - float16, - complex64, - complex128) {} -PT_REGISTER_KERNEL(multiply, - GPU, - ALL_LAYOUT, - pten::Multiply, - float, - double, - int, - int64_t, - bool, - float16, - complex64, - complex128) {} -PT_REGISTER_KERNEL(sum, - GPU, - ALL_LAYOUT, - pten::Sum, - bool, - float, - double, - float16, - int, - int64_t, - complex64, - complex128) { - kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); -} diff --git a/paddle/pten/kernels/gpu/math.h b/paddle/pten/kernels/gpu/math.h deleted file mode 100644 index c1d33a0fcdd092226d1aac89327dcb13ac88d0f6..0000000000000000000000000000000000000000 --- a/paddle/pten/kernels/gpu/math.h +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -// CUDA and HIP use same api -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - -#include "paddle/pten/backends/gpu/gpu_context.h" -#include "paddle/pten/common/scalar.h" -#include "paddle/pten/core/dense_tensor.h" - -namespace pten { - -template -void Mean(const GPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DenseTensor* out); - -template -void Add(const GPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Subtract(const GPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Divide(const GPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Multiply(const GPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Sum(const GPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DataType out_dtype, - DenseTensor* out); - -} // namespace pten - -#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ - template \ - void name(const GPUContext& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - std::vector inputs; \ - std::vector outputs; \ - inputs.emplace_back(&x); \ - inputs.emplace_back(&y); \ - outputs.emplace_back(out); \ - out->mutable_data(); \ - LaunchElementwiseCudaKernel( \ - dev_ctx, inputs, &outputs, axis, general::name##Functor()); \ - } - -#endif diff --git a/paddle/pten/kernels/gpu/math_kernel.cu b/paddle/pten/kernels/gpu/math_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..636d0f16b0d711328eb7f384722f60a178b11e60 --- /dev/null +++ b/paddle/pten/kernels/gpu/math_kernel.cu @@ -0,0 +1,177 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/kernels/math_kernel.h" + +#include "paddle/pten/backends/gpu/gpu_context.h" +#include "paddle/pten/kernels/hybird/cuda/elementwise/elementwise.h" +#include "paddle/pten/kernels/hybird/cuda/reduce/reduce.h" +#include "paddle/pten/kernels/hybird/general/elementwise_functor.h" +#include "paddle/pten/kernels/hybird/general/reduce_impl.h" + +#ifdef __NVCC__ +#include "cub/cub.cuh" +#endif +#ifdef __HIPCC__ +#include +namespace cub = hipcub; +#endif + +#include "paddle/fluid/platform/complex.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/pten/api/lib/utils/tensor_utils.h" +#include "paddle/pten/core/convert_utils.h" +#include "paddle/pten/core/kernel_registry.h" + +namespace kps = paddle::operators::kernel_primitives; + +namespace pten { + +#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ + template \ + void name##Kernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + std::vector inputs; \ + std::vector outputs; \ + inputs.emplace_back(&x); \ + inputs.emplace_back(&y); \ + outputs.emplace_back(out); \ + out->mutable_data(); \ + LaunchElementwiseCudaKernel( \ + dev_ctx, inputs, &outputs, axis, general::name##Functor()); \ + } + +/** + * Util Functors + */ + +template +struct DivideFunctor { + HOSTDEVICE explicit inline DivideFunctor(int n) + : n_inv(static_cast(1.0 / n)) {} + + HOSTDEVICE inline T operator()(const T& x) const { return x * n_inv; } + + private: + T n_inv; +}; + +/** + * Kernels + */ + +template +void Mean(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out) { + auto out_dtype = x.dtype(); + pten::Reduce( + dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); +} + +// Create the definition of Add +DEFINE_CUDA_ELEMENTWISE_OP(Add) +// Create the definition of Subtract +DEFINE_CUDA_ELEMENTWISE_OP(Subtract) +// Create the definition of Multiply +DEFINE_CUDA_ELEMENTWISE_OP(Multiply) +// Create the definition of Divide +DEFINE_CUDA_ELEMENTWISE_OP(Divide) + +template +void Sum(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType out_dtype, + DenseTensor* out) { + pten::Reduce( + dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); +} + +} // namespace pten + +using float16 = paddle::platform::float16; +using complex64 = ::paddle::platform::complex; +using complex128 = ::paddle::platform::complex; + +PT_REGISTER_CTX_KERNEL( + mean, GPU, ALL_LAYOUT, pten::Mean, float, double, bool, float16) {} +PT_REGISTER_CTX_KERNEL(add, + GPU, + ALL_LAYOUT, + pten::AddKernel, + float, + double, + int, + int64_t, + float16, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(subtract, + GPU, + ALL_LAYOUT, + pten::SubtractKernel, + float, + double, + int, + int64_t, + float16, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(divide, + GPU, + ALL_LAYOUT, + pten::DivideKernel, + float, + double, + int, + int64_t, + float16, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(multiply, + GPU, + ALL_LAYOUT, + pten::MultiplyKernel, + float, + double, + int, + int64_t, + bool, + float16, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(sum, + GPU, + ALL_LAYOUT, + pten::Sum, + bool, + float, + double, + float16, + int, + int64_t, + complex64, + complex128) { + kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); +} diff --git a/paddle/pten/kernels/math_kernel.h b/paddle/pten/kernels/math_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..2968aa3524a9f29086b108482b60b4ab3eecb3b6 --- /dev/null +++ b/paddle/pten/kernels/math_kernel.h @@ -0,0 +1,124 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/api/lib/utils/storage.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/include/infermeta.h" + +namespace pten { + +template +void Mean(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out); + +template +void AddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void SubtractKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void MultiplyKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void Sum(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType out_dtype, + DenseTensor* out); + +template +DenseTensor Add(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis) { + auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); + AddKernel(dev_ctx, x, y, axis, &dense_out); + return dense_out; +} + +template +DenseTensor Subtract(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis) { + auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); + SubtractKernel(dev_ctx, x, y, axis, &dense_out); + return dense_out; +} + +template +DenseTensor Divide(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis) { + auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); + DivideKernel(dev_ctx, x, y, axis, &dense_out); + return dense_out; +} + +template +DenseTensor Multiply(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis) { + auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); + MultiplyKernel(dev_ctx, x, y, axis, &dense_out); + return dense_out; +} + +} // namespace pten diff --git a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc index f12a2d48e6b2b876fe0c8bba7de6461aa41313bd..bd09ecb770a5d59b114e2ba883d3a8e6107121cd 100644 --- a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include #include -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/core/dense_tensor.h"