From e76087adbc62b645b44e9eec994749c26a5fd8f0 Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Fri, 31 Dec 2021 10:09:59 +0800 Subject: [PATCH] =?UTF-8?q?[Pten]Move=20math=20to=20new=20directory=20and?= =?UTF-8?q?=20change=20=E3=80=8Cmath=E3=80=8D=20to=20=E3=80=8Cmath=5Fkerne?= =?UTF-8?q?l=E3=80=8D=20(#38604)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * change 'math' to 'math_kernel' * fix compile bugs * merge develop * fix compile bugs --- paddle/fluid/operators/cholesky_solve_op.h | 4 +- .../elementwise/elementwise_add_op.h | 4 +- .../elementwise/elementwise_div_op.h | 4 +- .../elementwise/elementwise_mul_op.cu | 3 +- .../elementwise/elementwise_mul_op.h | 5 +- .../elementwise/elementwise_sub_op.h | 5 +- paddle/pten/CMakeLists.txt | 4 - paddle/pten/api/lib/kernel_declare.h | 6 - paddle/pten/include/math.h | 64 +------ paddle/pten/kernels/CMakeLists.txt | 12 +- paddle/pten/kernels/cpu/math.cc | 138 +------------- paddle/pten/kernels/cpu/math.h | 93 --------- paddle/pten/kernels/cpu/math_kernel.cc | 178 ++++++++++++++++++ paddle/pten/kernels/gpu/CMakeLists.txt | 5 - paddle/pten/kernels/gpu/math.cu | 159 ---------------- paddle/pten/kernels/gpu/math.h | 90 --------- paddle/pten/kernels/gpu/math_kernel.cu | 177 +++++++++++++++++ paddle/pten/kernels/math_kernel.h | 124 ++++++++++++ .../tests/kernels/test_elementwise_dev_api.cc | 2 +- 19 files changed, 509 insertions(+), 568 deletions(-) delete mode 100644 paddle/pten/kernels/cpu/math.h create mode 100644 paddle/pten/kernels/cpu/math_kernel.cc delete mode 100644 paddle/pten/kernels/gpu/math.cu delete mode 100644 paddle/pten/kernels/gpu/math.h create mode 100644 paddle/pten/kernels/gpu/math_kernel.cu create mode 100644 paddle/pten/kernels/math_kernel.h diff --git a/paddle/fluid/operators/cholesky_solve_op.h b/paddle/fluid/operators/cholesky_solve_op.h index f3b0056165..94b68bff8f 100644 --- a/paddle/fluid/operators/cholesky_solve_op.h +++ b/paddle/fluid/operators/cholesky_solve_op.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/operators/triangular_solve_op.h" #include "paddle/fluid/platform/complex.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { // namespace operators @@ -205,7 +205,7 @@ class CholeskySolveGradKernel : public framework::OpKernel { auto pt_x = paddle::experimental::MakePtenDenseTensor(commonterm); auto pt_y = paddle::experimental::MakePtenDenseTensor(commonterm_conj); auto pt_z = paddle::experimental::MakePtenDenseTensor(commonterm); - pten::Add(dev_ctx, *pt_x.get(), *pt_y.get(), -1, pt_z.get()); + pten::AddKernel(dev_ctx, *pt_x.get(), *pt_y.get(), -1, pt_z.get()); auto mat_dim_u = math::CreateMatrixDescriptor(u_bst.dims(), 0, false); auto mat_dim_c = diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.h b/paddle/fluid/operators/elementwise/elementwise_add_op.h index a4567beeb4..d6d79d166d 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.h @@ -25,7 +25,7 @@ limitations under the License. */ // only can include the headers in paddle/pten/include dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/include/core.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { @@ -68,7 +68,7 @@ class ElementwiseAddKernel : public framework::OpKernel { auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); - pten::Add(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); + pten::AddKernel(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); } }; diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h index f3ba5050c4..c886644bbd 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h @@ -28,7 +28,7 @@ limitations under the License. */ // only can include the headers in paddle/pten/include dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/include/core.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { @@ -62,7 +62,7 @@ class ElementwiseDivKernel : public framework::OpKernel { auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); - pten::Divide(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); + pten::DivideKernel(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); } }; diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu index e131bc4974..12e0062a69 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cu @@ -57,7 +57,8 @@ class ElementwiseMulKernel auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y_lod); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod); - pten::Multiply(cuda_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); + pten::MultiplyKernel(cuda_ctx, *pt_x.get(), *pt_y.get(), axis, + pt_z.get()); } else { PADDLE_THROW(platform::errors::InvalidArgument( "X's type[%s] is not supported by elementwise_op. X's type should be " diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h index 8b43f82e6b..3b0f072572 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h @@ -24,7 +24,7 @@ limitations under the License. */ // only can include the headers in paddle/pten/include dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/include/core.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { @@ -129,7 +129,8 @@ class ElementwiseMulKernel : public framework::OpKernel { auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod); - pten::Multiply(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); + pten::MultiplyKernel(dev_ctx, *pt_x.get(), *pt_y.get(), axis, + pt_z.get()); } else { PADDLE_THROW(platform::errors::InvalidArgument( "X's type[%s] is not supported by elementwise_op. X's type should be " diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.h b/paddle/fluid/operators/elementwise/elementwise_sub_op.h index 09a33788d4..6a51d7c2a4 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.h @@ -22,7 +22,7 @@ limitations under the License. */ // only can include the headers in paddle/pten/include dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/include/core.h" -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" namespace paddle { namespace operators { @@ -56,7 +56,8 @@ class ElementwiseSubKernel : public framework::OpKernel { auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); - pten::Subtract(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); + pten::SubtractKernel(dev_ctx, *pt_x.get(), *pt_y.get(), axis, + pt_z.get()); } }; diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index 97be4c19c9..05b321c50c 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -28,9 +28,5 @@ get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS) # keep this message for debug, remove it later if needless message(STATUS "All standard pten kernels: ${pten_kernels}") set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels}) -set(PTEN_DEPS ${PTEN_DEPS} math_cpu) -if(WITH_GPU OR WITH_ROCM) - set(PTEN_DEPS ${PTEN_DEPS} math_gpu) -endif() cc_library(pten SRCS all.cc DEPS ${PTEN_DEPS}) diff --git a/paddle/pten/api/lib/kernel_declare.h b/paddle/pten/api/lib/kernel_declare.h index 484063df47..4d3143ef09 100644 --- a/paddle/pten/api/lib/kernel_declare.h +++ b/paddle/pten/api/lib/kernel_declare.h @@ -19,9 +19,3 @@ limitations under the License. */ // TODO(chenweihang) After the kernel is split into a single file, // the kernel declare statement is automatically generated according to the // file name of the kernel, and this header file will be removed - -PT_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT); - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PT_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT); -#endif diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h index 876834cea7..9abfa297a9 100644 --- a/paddle/pten/include/math.h +++ b/paddle/pten/include/math.h @@ -18,8 +18,7 @@ limitations under the License. */ #include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/include/infermeta.h" #include "paddle/pten/kernels/complex_kernel.h" -#include "paddle/pten/kernels/cpu/math.h" -#include "paddle/pten/kernels/gpu/math.h" +#include "paddle/pten/kernels/math_kernel.h" #include "paddle/pten/kernels/scale_kernel.h" namespace pten { @@ -46,7 +45,7 @@ DenseTensor Mean(const ContextT& dev_ctx, dev_ctx.GetPlace()), std::move(out_meta)); bool reduce_all = false; - Mean(dev_ctx, x, axis, keep_dim, reduce_all, &dense_out); + Mean(dev_ctx, x, axis, keep_dim, reduce_all, &dense_out); return dense_out; } @@ -66,7 +65,8 @@ DenseTensor Sum(const ContextT& dev_ctx, // so use default value(false) is OK. bool reduce_all = false; - Sum(dev_ctx, x, axis, keep_dim, reduce_all, out_meta.dtype, &dense_out); + Sum( + dev_ctx, x, axis, keep_dim, reduce_all, out_meta.dtype, &dense_out); return dense_out; } @@ -85,62 +85,6 @@ DenseTensor Scale(const ContextT& dev_ctx, return dense_out; } -template -DenseTensor Add(const ContextT& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Add(dev_ctx, x, y, axis, &dense_out); - return dense_out; -} - -template -DenseTensor Subtract(const ContextT& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Subtract(dev_ctx, x, y, axis, &dense_out); - return dense_out; -} - -template -DenseTensor Divide(const ContextT& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Divide(dev_ctx, x, y, axis, &dense_out); - return dense_out; -} - -template -DenseTensor Multiply(const ContextT& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis) { - auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); - pten::DenseTensor dense_out( - pten::make_intrusive( - dev_ctx.GetPlace()), - std::move(out_meta)); - Multiply(dev_ctx, x, y, axis, &dense_out); - return dense_out; -} - template DenseTensor Conj(const ContextT& dev_ctx, const DenseTensor& x) { auto out_meta = UnchangedInferMeta(x.meta()); diff --git a/paddle/pten/kernels/CMakeLists.txt b/paddle/pten/kernels/CMakeLists.txt index 7a785d8692..4c705767f4 100644 --- a/paddle/pten/kernels/CMakeLists.txt +++ b/paddle/pten/kernels/CMakeLists.txt @@ -24,11 +24,17 @@ endif() # pten depends all pten kernel targets set_property(GLOBAL PROPERTY PTEN_KERNELS "") -set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory) -set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function) +set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils) +set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta) +set(MATH_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel pten_transpose_cpu) +if(WITH_GPU OR WITH_ROCM) + set(MATH_KERNEL_DEPS ${MATH_KERNEL_DEPS} pten_transpose_gpu) +endif() + # auto build kernel targets by cmake -register_kernels(DEPS ${COMMON_KERNEL_DEPS}) +register_kernels(EXCLUDES math_kernel DEPS ${COMMON_KERNEL_DEPS}) +kernel_library(math_kernel DEPS ${MATH_KERNEL_DEPS}) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) diff --git a/paddle/pten/kernels/cpu/math.cc b/paddle/pten/kernels/cpu/math.cc index ee01cf65d0..b4642d475d 100644 --- a/paddle/pten/kernels/cpu/math.cc +++ b/paddle/pten/kernels/cpu/math.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -12,138 +12,4 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/pten/kernels/cpu/math.h" - -#include "paddle/pten/api/ext/dispatch.h" -#include "paddle/pten/kernels/hybird/cpu/elementwise.h" -#include "paddle/pten/kernels/hybird/eigen/reduce.h" -#include "paddle/pten/kernels/hybird/general/elementwise_functor.h" -#include "paddle/pten/kernels/hybird/general/reduce_impl.h" - -// See Note [ Why still include the fluid headers? ] -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/platform/bfloat16.h" -#include "paddle/fluid/platform/complex.h" - -namespace pten { - -template -void Mean(const CPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DenseTensor* out) { - auto out_dtype = x.dtype(); - pten::general::Reduce( - dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); -} - -template -void Divide(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out) { - // allocate memory for out - out->mutable_data(); - if (x.dims() == y.dims() && std::is_floating_point::value) { - SameDimsElementwiseCompute>()( - dev_ctx, x, y, out); - } else { - auto x_dims = x.dims(); - auto y_dims = y.dims(); - if (x_dims.size() >= y_dims.size()) { - ElementwiseCompute, T>( - dev_ctx, x, y, axis, general::DivideFunctor(), out); - } else { - ElementwiseCompute, T>( - dev_ctx, x, y, axis, general::InverseDivideFunctor(), out); - } - } -} - -template -void Sum(const CPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DataType out_dtype, - DenseTensor* out) { - pten::general::Reduce( - dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); -} - -// Create the definition of Add -DEFINE_CPU_ELEMENTWISE_OP(Add) - -// Create the definition of Subtract -DEFINE_CPU_ELEMENTWISE_OP(Subtract) - -// Create the definition of Multiply -DEFINE_CPU_ELEMENTWISE_OP(Multiply) - -} // namespace pten - -using complex64 = ::paddle::platform::complex; -using complex128 = ::paddle::platform::complex; - -// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 -// using bfloat16 = ::paddle::platform::bfloat16; -PT_REGISTER_KERNEL(mean, CPU, ALL_LAYOUT, pten::Mean, float, double, bool) {} -PT_REGISTER_KERNEL(add, - CPU, - ALL_LAYOUT, - pten::Add, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PT_REGISTER_KERNEL(subtract, - CPU, - ALL_LAYOUT, - pten::Subtract, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PT_REGISTER_KERNEL(divide, - CPU, - ALL_LAYOUT, - pten::Divide, - float, - double, - int, - int64_t, - complex64, - complex128) {} -PT_REGISTER_KERNEL(multiply, - CPU, - ALL_LAYOUT, - pten::Multiply, - float, - double, - int, - int64_t, - bool, - complex64, - complex128) {} -PT_REGISTER_KERNEL(sum, - CPU, - ALL_LAYOUT, - pten::Sum, - bool, - float, - double, - paddle::platform::float16, - int, - int64_t, - complex64, - complex128) { - kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); -} +namespace pten {} // namespace pten diff --git a/paddle/pten/kernels/cpu/math.h b/paddle/pten/kernels/cpu/math.h deleted file mode 100644 index 1a179218b4..0000000000 --- a/paddle/pten/kernels/cpu/math.h +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/pten/backends/cpu/cpu_context.h" -#include "paddle/pten/common/scalar.h" -#include "paddle/pten/core/dense_tensor.h" -#include "paddle/pten/core/kernel_registry.h" - -namespace pten { - -template -void Mean(const CPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DenseTensor* out); - -template -void Add(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Subtract(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Divide(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Multiply(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); -template -void Sum(const CPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DataType out_dtype, - DenseTensor* out); - -} // namespace pten - -#define DEFINE_CPU_ELEMENTWISE_OP(name) \ - template \ - void name(const CPUContext& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - out->mutable_data(); \ - if (x.dims() == y.dims()) { \ - SameDimsElementwiseCompute< \ - general::SameDims##name##Functor>()( \ - dev_ctx, x, y, out); \ - } else { \ - auto x_dims = x.dims(); \ - auto y_dims = y.dims(); \ - if (x_dims.size() >= y_dims.size()) { \ - ElementwiseCompute, T>( \ - dev_ctx, x, y, axis, general::name##Functor(), out); \ - } else { \ - ElementwiseCompute, T>( \ - dev_ctx, x, y, axis, general::Inverse##name##Functor(), out); \ - } \ - } \ - } diff --git a/paddle/pten/kernels/cpu/math_kernel.cc b/paddle/pten/kernels/cpu/math_kernel.cc new file mode 100644 index 0000000000..152d945144 --- /dev/null +++ b/paddle/pten/kernels/cpu/math_kernel.cc @@ -0,0 +1,178 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/kernels/math_kernel.h" + +#include "paddle/pten/api/ext/dispatch.h" +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/hybird/cpu/elementwise.h" +#include "paddle/pten/kernels/hybird/eigen/reduce.h" +#include "paddle/pten/kernels/hybird/general/elementwise_functor.h" +#include "paddle/pten/kernels/hybird/general/reduce_impl.h" + +// See Note [ Why still include the fluid headers? ] +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/platform/bfloat16.h" +#include "paddle/fluid/platform/complex.h" + +namespace pten { + +#define DEFINE_CPU_ELEMENTWISE_OP(name) \ + template \ + void name##Kernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + out->mutable_data(); \ + if (x.dims() == y.dims()) { \ + SameDimsElementwiseCompute< \ + general::SameDims##name##Functor>()( \ + dev_ctx, x, y, out); \ + } else { \ + auto x_dims = x.dims(); \ + auto y_dims = y.dims(); \ + if (x_dims.size() >= y_dims.size()) { \ + ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, general::name##Functor(), out); \ + } else { \ + ElementwiseCompute, T>( \ + dev_ctx, x, y, axis, general::Inverse##name##Functor(), out); \ + } \ + } \ + } + +template +void Mean(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out) { + auto out_dtype = x.dtype(); + pten::general::Reduce( + dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); +} + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out) { + // allocate memory for out + out->mutable_data(); + if (x.dims() == y.dims() && std::is_floating_point::value) { + SameDimsElementwiseCompute>()( + dev_ctx, x, y, out); + } else { + auto x_dims = x.dims(); + auto y_dims = y.dims(); + if (x_dims.size() >= y_dims.size()) { + ElementwiseCompute, T>( + dev_ctx, x, y, axis, general::DivideFunctor(), out); + } else { + ElementwiseCompute, T>( + dev_ctx, x, y, axis, general::InverseDivideFunctor(), out); + } + } +} + +template +void Sum(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType out_dtype, + DenseTensor* out) { + pten::general::Reduce( + dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); +} + +// Create the definition of Add +DEFINE_CPU_ELEMENTWISE_OP(Add) + +// Create the definition of Subtract +DEFINE_CPU_ELEMENTWISE_OP(Subtract) + +// Create the definition of Multiply +DEFINE_CPU_ELEMENTWISE_OP(Multiply) + +} // namespace pten + +using complex64 = ::paddle::platform::complex; +using complex128 = ::paddle::platform::complex; + +// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16 +// using bfloat16 = ::paddle::platform::bfloat16; +PT_REGISTER_CTX_KERNEL(mean, CPU, ALL_LAYOUT, pten::Mean, float, double, bool) { +} +PT_REGISTER_CTX_KERNEL(add, + CPU, + ALL_LAYOUT, + pten::AddKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(subtract, + CPU, + ALL_LAYOUT, + pten::SubtractKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(divide, + CPU, + ALL_LAYOUT, + pten::DivideKernel, + float, + double, + int, + int64_t, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(multiply, + CPU, + ALL_LAYOUT, + pten::MultiplyKernel, + float, + double, + int, + int64_t, + bool, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(sum, + CPU, + ALL_LAYOUT, + pten::Sum, + bool, + float, + double, + paddle::platform::float16, + int, + int64_t, + complex64, + complex128) { + kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); +} diff --git a/paddle/pten/kernels/gpu/CMakeLists.txt b/paddle/pten/kernels/gpu/CMakeLists.txt index 51c666947b..e69de29bb2 100644 --- a/paddle/pten/kernels/gpu/CMakeLists.txt +++ b/paddle/pten/kernels/gpu/CMakeLists.txt @@ -1,5 +0,0 @@ -if(WITH_GPU) - nv_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu cast_kernel copy_kernel) -elseif(WITH_ROCM) - hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu cast_kernel copy_kernel) -endif() diff --git a/paddle/pten/kernels/gpu/math.cu b/paddle/pten/kernels/gpu/math.cu deleted file mode 100644 index e02403ac42..0000000000 --- a/paddle/pten/kernels/gpu/math.cu +++ /dev/null @@ -1,159 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/pten/kernels/gpu/math.h" - -#include "paddle/pten/kernels/hybird/cuda/elementwise/elementwise.h" -#include "paddle/pten/kernels/hybird/cuda/reduce/reduce.h" -#include "paddle/pten/kernels/hybird/general/elementwise_functor.h" -#include "paddle/pten/kernels/hybird/general/reduce_impl.h" - -#ifdef __NVCC__ -#include "cub/cub.cuh" -#endif -#ifdef __HIPCC__ -#include -namespace cub = hipcub; -#endif - -#include "paddle/fluid/platform/complex.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/float16.h" -#include "paddle/pten/api/lib/utils/tensor_utils.h" -#include "paddle/pten/core/convert_utils.h" -#include "paddle/pten/core/kernel_registry.h" - -namespace kps = paddle::operators::kernel_primitives; - -namespace pten { - -/** - * Util Functors - */ - -template -struct DivideFunctor { - HOSTDEVICE explicit inline DivideFunctor(int n) - : n_inv(static_cast(1.0 / n)) {} - - HOSTDEVICE inline T operator()(const T& x) const { return x * n_inv; } - - private: - T n_inv; -}; - -/** - * Kernels - */ - -template -void Mean(const GPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DenseTensor* out) { - auto out_dtype = x.dtype(); - pten::Reduce( - dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); -} - -// Create the definition of Add -DEFINE_CUDA_ELEMENTWISE_OP(Add) -// Create the definition of Subtract -DEFINE_CUDA_ELEMENTWISE_OP(Subtract) -// Create the definition of Multiply -DEFINE_CUDA_ELEMENTWISE_OP(Multiply) -// Create the definition of Divide -DEFINE_CUDA_ELEMENTWISE_OP(Divide) - -template -void Sum(const GPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DataType out_dtype, - DenseTensor* out) { - pten::Reduce( - dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); -} - -} // namespace pten - -using float16 = paddle::platform::float16; -using complex64 = ::paddle::platform::complex; -using complex128 = ::paddle::platform::complex; - -PT_REGISTER_KERNEL( - mean, GPU, ALL_LAYOUT, pten::Mean, float, double, bool, float16) {} -PT_REGISTER_KERNEL(add, - GPU, - ALL_LAYOUT, - pten::Add, - float, - double, - int, - int64_t, - float16, - complex64, - complex128) {} -PT_REGISTER_KERNEL(subtract, - GPU, - ALL_LAYOUT, - pten::Subtract, - float, - double, - int, - int64_t, - float16, - complex64, - complex128) {} -PT_REGISTER_KERNEL(divide, - GPU, - ALL_LAYOUT, - pten::Divide, - float, - double, - int, - int64_t, - float16, - complex64, - complex128) {} -PT_REGISTER_KERNEL(multiply, - GPU, - ALL_LAYOUT, - pten::Multiply, - float, - double, - int, - int64_t, - bool, - float16, - complex64, - complex128) {} -PT_REGISTER_KERNEL(sum, - GPU, - ALL_LAYOUT, - pten::Sum, - bool, - float, - double, - float16, - int, - int64_t, - complex64, - complex128) { - kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); -} diff --git a/paddle/pten/kernels/gpu/math.h b/paddle/pten/kernels/gpu/math.h deleted file mode 100644 index c1d33a0fcd..0000000000 --- a/paddle/pten/kernels/gpu/math.h +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -// CUDA and HIP use same api -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - -#include "paddle/pten/backends/gpu/gpu_context.h" -#include "paddle/pten/common/scalar.h" -#include "paddle/pten/core/dense_tensor.h" - -namespace pten { - -template -void Mean(const GPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DenseTensor* out); - -template -void Add(const GPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Subtract(const GPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Divide(const GPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Multiply(const GPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& y, - int axis, - DenseTensor* out); - -template -void Sum(const GPUContext& dev_ctx, - const DenseTensor& x, - const std::vector& dims, - bool keep_dim, - bool reduce_all, - DataType out_dtype, - DenseTensor* out); - -} // namespace pten - -#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ - template \ - void name(const GPUContext& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& y, \ - int axis, \ - DenseTensor* out) { \ - std::vector inputs; \ - std::vector outputs; \ - inputs.emplace_back(&x); \ - inputs.emplace_back(&y); \ - outputs.emplace_back(out); \ - out->mutable_data(); \ - LaunchElementwiseCudaKernel( \ - dev_ctx, inputs, &outputs, axis, general::name##Functor()); \ - } - -#endif diff --git a/paddle/pten/kernels/gpu/math_kernel.cu b/paddle/pten/kernels/gpu/math_kernel.cu new file mode 100644 index 0000000000..636d0f16b0 --- /dev/null +++ b/paddle/pten/kernels/gpu/math_kernel.cu @@ -0,0 +1,177 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/kernels/math_kernel.h" + +#include "paddle/pten/backends/gpu/gpu_context.h" +#include "paddle/pten/kernels/hybird/cuda/elementwise/elementwise.h" +#include "paddle/pten/kernels/hybird/cuda/reduce/reduce.h" +#include "paddle/pten/kernels/hybird/general/elementwise_functor.h" +#include "paddle/pten/kernels/hybird/general/reduce_impl.h" + +#ifdef __NVCC__ +#include "cub/cub.cuh" +#endif +#ifdef __HIPCC__ +#include +namespace cub = hipcub; +#endif + +#include "paddle/fluid/platform/complex.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/pten/api/lib/utils/tensor_utils.h" +#include "paddle/pten/core/convert_utils.h" +#include "paddle/pten/core/kernel_registry.h" + +namespace kps = paddle::operators::kernel_primitives; + +namespace pten { + +#define DEFINE_CUDA_ELEMENTWISE_OP(name) \ + template \ + void name##Kernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& y, \ + int axis, \ + DenseTensor* out) { \ + std::vector inputs; \ + std::vector outputs; \ + inputs.emplace_back(&x); \ + inputs.emplace_back(&y); \ + outputs.emplace_back(out); \ + out->mutable_data(); \ + LaunchElementwiseCudaKernel( \ + dev_ctx, inputs, &outputs, axis, general::name##Functor()); \ + } + +/** + * Util Functors + */ + +template +struct DivideFunctor { + HOSTDEVICE explicit inline DivideFunctor(int n) + : n_inv(static_cast(1.0 / n)) {} + + HOSTDEVICE inline T operator()(const T& x) const { return x * n_inv; } + + private: + T n_inv; +}; + +/** + * Kernels + */ + +template +void Mean(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out) { + auto out_dtype = x.dtype(); + pten::Reduce( + dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); +} + +// Create the definition of Add +DEFINE_CUDA_ELEMENTWISE_OP(Add) +// Create the definition of Subtract +DEFINE_CUDA_ELEMENTWISE_OP(Subtract) +// Create the definition of Multiply +DEFINE_CUDA_ELEMENTWISE_OP(Multiply) +// Create the definition of Divide +DEFINE_CUDA_ELEMENTWISE_OP(Divide) + +template +void Sum(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType out_dtype, + DenseTensor* out) { + pten::Reduce( + dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out); +} + +} // namespace pten + +using float16 = paddle::platform::float16; +using complex64 = ::paddle::platform::complex; +using complex128 = ::paddle::platform::complex; + +PT_REGISTER_CTX_KERNEL( + mean, GPU, ALL_LAYOUT, pten::Mean, float, double, bool, float16) {} +PT_REGISTER_CTX_KERNEL(add, + GPU, + ALL_LAYOUT, + pten::AddKernel, + float, + double, + int, + int64_t, + float16, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(subtract, + GPU, + ALL_LAYOUT, + pten::SubtractKernel, + float, + double, + int, + int64_t, + float16, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(divide, + GPU, + ALL_LAYOUT, + pten::DivideKernel, + float, + double, + int, + int64_t, + float16, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(multiply, + GPU, + ALL_LAYOUT, + pten::MultiplyKernel, + float, + double, + int, + int64_t, + bool, + float16, + complex64, + complex128) {} +PT_REGISTER_CTX_KERNEL(sum, + GPU, + ALL_LAYOUT, + pten::Sum, + bool, + float, + double, + float16, + int, + int64_t, + complex64, + complex128) { + kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); +} diff --git a/paddle/pten/kernels/math_kernel.h b/paddle/pten/kernels/math_kernel.h new file mode 100644 index 0000000000..2968aa3524 --- /dev/null +++ b/paddle/pten/kernels/math_kernel.h @@ -0,0 +1,124 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/api/lib/utils/storage.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/include/infermeta.h" + +namespace pten { + +template +void Mean(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DenseTensor* out); + +template +void AddKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void SubtractKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void DivideKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void MultiplyKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void Sum(const Context& dev_ctx, + const DenseTensor& x, + const std::vector& dims, + bool keep_dim, + bool reduce_all, + DataType out_dtype, + DenseTensor* out); + +template +DenseTensor Add(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis) { + auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); + AddKernel(dev_ctx, x, y, axis, &dense_out); + return dense_out; +} + +template +DenseTensor Subtract(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis) { + auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); + SubtractKernel(dev_ctx, x, y, axis, &dense_out); + return dense_out; +} + +template +DenseTensor Divide(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis) { + auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); + DivideKernel(dev_ctx, x, y, axis, &dense_out); + return dense_out; +} + +template +DenseTensor Multiply(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis) { + auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis); + pten::DenseTensor dense_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); + MultiplyKernel(dev_ctx, x, y, axis, &dense_out); + return dense_out; +} + +} // namespace pten diff --git a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc index f12a2d48e6..bd09ecb770 100644 --- a/paddle/pten/tests/kernels/test_elementwise_dev_api.cc +++ b/paddle/pten/tests/kernels/test_elementwise_dev_api.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include #include -#include "paddle/pten/include/math.h" +#include "paddle/pten/kernels/math_kernel.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/core/dense_tensor.h" -- GitLab