未验证 提交 e76087ad 编写于 作者: Y YuanRisheng 提交者: GitHub

[Pten]Move math to new directory and change 「math」 to 「math_kernel」 (#38604)

* change 'math' to 'math_kernel'

* fix compile bugs

* merge develop

* fix compile bugs
上级 4e21457d
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/operators/svd_helper.h" #include "paddle/fluid/operators/svd_helper.h"
#include "paddle/fluid/operators/triangular_solve_op.h" #include "paddle/fluid/operators/triangular_solve_op.h"
#include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/complex.h"
#include "paddle/pten/include/math.h" #include "paddle/pten/kernels/math_kernel.h"
namespace paddle { namespace paddle {
namespace operators { // namespace operators namespace operators { // namespace operators
...@@ -205,7 +205,7 @@ class CholeskySolveGradKernel : public framework::OpKernel<T> { ...@@ -205,7 +205,7 @@ class CholeskySolveGradKernel : public framework::OpKernel<T> {
auto pt_x = paddle::experimental::MakePtenDenseTensor(commonterm); auto pt_x = paddle::experimental::MakePtenDenseTensor(commonterm);
auto pt_y = paddle::experimental::MakePtenDenseTensor(commonterm_conj); auto pt_y = paddle::experimental::MakePtenDenseTensor(commonterm_conj);
auto pt_z = paddle::experimental::MakePtenDenseTensor(commonterm); auto pt_z = paddle::experimental::MakePtenDenseTensor(commonterm);
pten::Add<T>(dev_ctx, *pt_x.get(), *pt_y.get(), -1, pt_z.get()); pten::AddKernel<T>(dev_ctx, *pt_x.get(), *pt_y.get(), -1, pt_z.get());
auto mat_dim_u = math::CreateMatrixDescriptor(u_bst.dims(), 0, false); auto mat_dim_u = math::CreateMatrixDescriptor(u_bst.dims(), 0, false);
auto mat_dim_c = auto mat_dim_c =
......
...@@ -25,7 +25,7 @@ limitations under the License. */ ...@@ -25,7 +25,7 @@ limitations under the License. */
// only can include the headers in paddle/pten/include dirs // only can include the headers in paddle/pten/include dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/include/core.h" #include "paddle/pten/include/core.h"
#include "paddle/pten/include/math.h" #include "paddle/pten/kernels/math_kernel.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -68,7 +68,7 @@ class ElementwiseAddKernel : public framework::OpKernel<T> { ...@@ -68,7 +68,7 @@ class ElementwiseAddKernel : public framework::OpKernel<T> {
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
pten::Add<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); pten::AddKernel<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
} }
}; };
......
...@@ -28,7 +28,7 @@ limitations under the License. */ ...@@ -28,7 +28,7 @@ limitations under the License. */
// only can include the headers in paddle/pten/include dirs // only can include the headers in paddle/pten/include dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/include/core.h" #include "paddle/pten/include/core.h"
#include "paddle/pten/include/math.h" #include "paddle/pten/kernels/math_kernel.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -62,7 +62,7 @@ class ElementwiseDivKernel : public framework::OpKernel<T> { ...@@ -62,7 +62,7 @@ class ElementwiseDivKernel : public framework::OpKernel<T> {
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
pten::Divide<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); pten::DivideKernel<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get());
} }
}; };
......
...@@ -57,7 +57,8 @@ class ElementwiseMulKernel<platform::CUDADeviceContext, T> ...@@ -57,7 +57,8 @@ class ElementwiseMulKernel<platform::CUDADeviceContext, T>
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod); auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y_lod); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y_lod);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod);
pten::Multiply<T>(cuda_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); pten::MultiplyKernel<T>(cuda_ctx, *pt_x.get(), *pt_y.get(), axis,
pt_z.get());
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"X's type[%s] is not supported by elementwise_op. X's type should be " "X's type[%s] is not supported by elementwise_op. X's type should be "
......
...@@ -24,7 +24,7 @@ limitations under the License. */ ...@@ -24,7 +24,7 @@ limitations under the License. */
// only can include the headers in paddle/pten/include dirs // only can include the headers in paddle/pten/include dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/include/core.h" #include "paddle/pten/include/core.h"
#include "paddle/pten/include/math.h" #include "paddle/pten/kernels/math_kernel.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -129,7 +129,8 @@ class ElementwiseMulKernel : public framework::OpKernel<T> { ...@@ -129,7 +129,8 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod); auto pt_x = paddle::experimental::MakePtenDenseTensor(*x_lod);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z_lod);
pten::Multiply<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); pten::MultiplyKernel<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
pt_z.get());
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"X's type[%s] is not supported by elementwise_op. X's type should be " "X's type[%s] is not supported by elementwise_op. X's type should be "
......
...@@ -22,7 +22,7 @@ limitations under the License. */ ...@@ -22,7 +22,7 @@ limitations under the License. */
// only can include the headers in paddle/pten/include dirs // only can include the headers in paddle/pten/include dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h" #include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/include/core.h" #include "paddle/pten/include/core.h"
#include "paddle/pten/include/math.h" #include "paddle/pten/kernels/math_kernel.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
...@@ -56,7 +56,8 @@ class ElementwiseSubKernel : public framework::OpKernel<T> { ...@@ -56,7 +56,8 @@ class ElementwiseSubKernel : public framework::OpKernel<T> {
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
pten::Subtract<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis, pt_z.get()); pten::SubtractKernel<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
pt_z.get());
} }
}; };
......
...@@ -28,9 +28,5 @@ get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS) ...@@ -28,9 +28,5 @@ get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS)
# keep this message for debug, remove it later if needless # keep this message for debug, remove it later if needless
message(STATUS "All standard pten kernels: ${pten_kernels}") message(STATUS "All standard pten kernels: ${pten_kernels}")
set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels}) set(PTEN_DEPS ${PTEN_DEPS} ${pten_kernels})
set(PTEN_DEPS ${PTEN_DEPS} math_cpu)
if(WITH_GPU OR WITH_ROCM)
set(PTEN_DEPS ${PTEN_DEPS} math_gpu)
endif()
cc_library(pten SRCS all.cc DEPS ${PTEN_DEPS}) cc_library(pten SRCS all.cc DEPS ${PTEN_DEPS})
...@@ -19,9 +19,3 @@ limitations under the License. */ ...@@ -19,9 +19,3 @@ limitations under the License. */
// TODO(chenweihang) After the kernel is split into a single file, // TODO(chenweihang) After the kernel is split into a single file,
// the kernel declare statement is automatically generated according to the // the kernel declare statement is automatically generated according to the
// file name of the kernel, and this header file will be removed // file name of the kernel, and this header file will be removed
PT_DECLARE_KERNEL(mean, CPU, ALL_LAYOUT);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PT_DECLARE_KERNEL(mean, GPU, ALL_LAYOUT);
#endif
...@@ -18,8 +18,7 @@ limitations under the License. */ ...@@ -18,8 +18,7 @@ limitations under the License. */
#include "paddle/pten/api/lib/utils/storage.h" #include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/include/infermeta.h" #include "paddle/pten/include/infermeta.h"
#include "paddle/pten/kernels/complex_kernel.h" #include "paddle/pten/kernels/complex_kernel.h"
#include "paddle/pten/kernels/cpu/math.h" #include "paddle/pten/kernels/math_kernel.h"
#include "paddle/pten/kernels/gpu/math.h"
#include "paddle/pten/kernels/scale_kernel.h" #include "paddle/pten/kernels/scale_kernel.h"
namespace pten { namespace pten {
...@@ -46,7 +45,7 @@ DenseTensor Mean(const ContextT& dev_ctx, ...@@ -46,7 +45,7 @@ DenseTensor Mean(const ContextT& dev_ctx,
dev_ctx.GetPlace()), dev_ctx.GetPlace()),
std::move(out_meta)); std::move(out_meta));
bool reduce_all = false; bool reduce_all = false;
Mean<T>(dev_ctx, x, axis, keep_dim, reduce_all, &dense_out); Mean<T, ContextT>(dev_ctx, x, axis, keep_dim, reduce_all, &dense_out);
return dense_out; return dense_out;
} }
...@@ -66,7 +65,8 @@ DenseTensor Sum(const ContextT& dev_ctx, ...@@ -66,7 +65,8 @@ DenseTensor Sum(const ContextT& dev_ctx,
// so use default value(false) is OK. // so use default value(false) is OK.
bool reduce_all = false; bool reduce_all = false;
Sum<T>(dev_ctx, x, axis, keep_dim, reduce_all, out_meta.dtype, &dense_out); Sum<T, ContextT>(
dev_ctx, x, axis, keep_dim, reduce_all, out_meta.dtype, &dense_out);
return dense_out; return dense_out;
} }
...@@ -85,62 +85,6 @@ DenseTensor Scale(const ContextT& dev_ctx, ...@@ -85,62 +85,6 @@ DenseTensor Scale(const ContextT& dev_ctx,
return dense_out; return dense_out;
} }
template <typename T, typename ContextT>
DenseTensor Add(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Add<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor Subtract(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Subtract<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor Divide(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Divide<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor Multiply(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
Multiply<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
template <typename T, typename ContextT> template <typename T, typename ContextT>
DenseTensor Conj(const ContextT& dev_ctx, const DenseTensor& x) { DenseTensor Conj(const ContextT& dev_ctx, const DenseTensor& x) {
auto out_meta = UnchangedInferMeta(x.meta()); auto out_meta = UnchangedInferMeta(x.meta());
......
...@@ -24,11 +24,17 @@ endif() ...@@ -24,11 +24,17 @@ endif()
# pten depends all pten kernel targets # pten depends all pten kernel targets
set_property(GLOBAL PROPERTY PTEN_KERNELS "") set_property(GLOBAL PROPERTY PTEN_KERNELS "")
set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory) set(COMMON_KERNEL_DEPS dense_tensor kernel_context kernel_factory convert_utils)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} eigen_function blas)
set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta) set(COMMON_KERNEL_DEPS ${COMMON_KERNEL_DEPS} infermeta)
set(MATH_KERNEL_DEPS ${COMMON_KERNEL_DEPS} cast_kernel copy_kernel pten_transpose_cpu)
if(WITH_GPU OR WITH_ROCM)
set(MATH_KERNEL_DEPS ${MATH_KERNEL_DEPS} pten_transpose_gpu)
endif()
# auto build kernel targets by cmake # auto build kernel targets by cmake
register_kernels(DEPS ${COMMON_KERNEL_DEPS}) register_kernels(EXCLUDES math_kernel DEPS ${COMMON_KERNEL_DEPS})
kernel_library(math_kernel DEPS ${MATH_KERNEL_DEPS})
copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final})
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
...@@ -12,138 +12,4 @@ ...@@ -12,138 +12,4 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/pten/kernels/cpu/math.h" namespace pten {} // namespace pten
#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/kernels/hybird/cpu/elementwise.h"
#include "paddle/pten/kernels/hybird/eigen/reduce.h"
#include "paddle/pten/kernels/hybird/general/elementwise_functor.h"
#include "paddle/pten/kernels/hybird/general/reduce_impl.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
namespace pten {
template <typename T>
void Mean(const CPUContext& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DenseTensor* out) {
auto out_dtype = x.dtype();
pten::general::Reduce<CPUContext, T, pten::eigen::MeanFunctor>(
dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out);
}
template <typename T>
void Divide(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
out->mutable_data<T>();
if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
SameDimsElementwiseCompute<general::SameDimsDivideFunctor<CPUContext, T>>()(
dev_ctx, x, y, out);
} else {
auto x_dims = x.dims();
auto y_dims = y.dims();
if (x_dims.size() >= y_dims.size()) {
ElementwiseCompute<general::DivideFunctor<T>, T>(
dev_ctx, x, y, axis, general::DivideFunctor<T>(), out);
} else {
ElementwiseCompute<general::InverseDivideFunctor<T>, T>(
dev_ctx, x, y, axis, general::InverseDivideFunctor<T>(), out);
}
}
}
template <typename T>
void Sum(const CPUContext& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType out_dtype,
DenseTensor* out) {
pten::general::Reduce<CPUContext, T, pten::eigen::SumFunctor>(
dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out);
}
// Create the definition of Add
DEFINE_CPU_ELEMENTWISE_OP(Add)
// Create the definition of Subtract
DEFINE_CPU_ELEMENTWISE_OP(Subtract)
// Create the definition of Multiply
DEFINE_CPU_ELEMENTWISE_OP(Multiply)
} // namespace pten
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;
// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16
// using bfloat16 = ::paddle::platform::bfloat16;
PT_REGISTER_KERNEL(mean, CPU, ALL_LAYOUT, pten::Mean, float, double, bool) {}
PT_REGISTER_KERNEL(add,
CPU,
ALL_LAYOUT,
pten::Add,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PT_REGISTER_KERNEL(subtract,
CPU,
ALL_LAYOUT,
pten::Subtract,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PT_REGISTER_KERNEL(divide,
CPU,
ALL_LAYOUT,
pten::Divide,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PT_REGISTER_KERNEL(multiply,
CPU,
ALL_LAYOUT,
pten::Multiply,
float,
double,
int,
int64_t,
bool,
complex64,
complex128) {}
PT_REGISTER_KERNEL(sum,
CPU,
ALL_LAYOUT,
pten::Sum,
bool,
float,
double,
paddle::platform::float16,
int,
int64_t,
complex64,
complex128) {
kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED);
}
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
namespace pten {
template <typename T>
void Mean(const CPUContext& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DenseTensor* out);
template <typename T>
void Add(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Subtract(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Divide(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Multiply(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Sum(const CPUContext& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType out_dtype,
DenseTensor* out);
} // namespace pten
#define DEFINE_CPU_ELEMENTWISE_OP(name) \
template <typename T> \
void name(const CPUContext& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
out->mutable_data<T>(); \
if (x.dims() == y.dims()) { \
SameDimsElementwiseCompute< \
general::SameDims##name##Functor<CPUContext, T>>()( \
dev_ctx, x, y, out); \
} else { \
auto x_dims = x.dims(); \
auto y_dims = y.dims(); \
if (x_dims.size() >= y_dims.size()) { \
ElementwiseCompute<general::name##Functor<T>, T>( \
dev_ctx, x, y, axis, general::name##Functor<T>(), out); \
} else { \
ElementwiseCompute<general::Inverse##name##Functor<T>, T>( \
dev_ctx, x, y, axis, general::Inverse##name##Functor<T>(), out); \
} \
} \
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/pten/kernels/math_kernel.h"
#include "paddle/pten/api/ext/dispatch.h"
#include "paddle/pten/backends/cpu/cpu_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/kernel_registry.h"
#include "paddle/pten/kernels/hybird/cpu/elementwise.h"
#include "paddle/pten/kernels/hybird/eigen/reduce.h"
#include "paddle/pten/kernels/hybird/general/elementwise_functor.h"
#include "paddle/pten/kernels/hybird/general/reduce_impl.h"
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/platform/bfloat16.h"
#include "paddle/fluid/platform/complex.h"
namespace pten {
#define DEFINE_CPU_ELEMENTWISE_OP(name) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
out->mutable_data<T>(); \
if (x.dims() == y.dims()) { \
SameDimsElementwiseCompute< \
general::SameDims##name##Functor<CPUContext, T>>()( \
dev_ctx, x, y, out); \
} else { \
auto x_dims = x.dims(); \
auto y_dims = y.dims(); \
if (x_dims.size() >= y_dims.size()) { \
ElementwiseCompute<general::name##Functor<T>, T>( \
dev_ctx, x, y, axis, general::name##Functor<T>(), out); \
} else { \
ElementwiseCompute<general::Inverse##name##Functor<T>, T>( \
dev_ctx, x, y, axis, general::Inverse##name##Functor<T>(), out); \
} \
} \
}
template <typename T, typename Context>
void Mean(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DenseTensor* out) {
auto out_dtype = x.dtype();
pten::general::Reduce<CPUContext, T, pten::eigen::MeanFunctor>(
dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out);
}
template <typename T, typename Context>
void DivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
out->mutable_data<T>();
if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
SameDimsElementwiseCompute<general::SameDimsDivideFunctor<CPUContext, T>>()(
dev_ctx, x, y, out);
} else {
auto x_dims = x.dims();
auto y_dims = y.dims();
if (x_dims.size() >= y_dims.size()) {
ElementwiseCompute<general::DivideFunctor<T>, T>(
dev_ctx, x, y, axis, general::DivideFunctor<T>(), out);
} else {
ElementwiseCompute<general::InverseDivideFunctor<T>, T>(
dev_ctx, x, y, axis, general::InverseDivideFunctor<T>(), out);
}
}
}
template <typename T, typename Context>
void Sum(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType out_dtype,
DenseTensor* out) {
pten::general::Reduce<CPUContext, T, pten::eigen::SumFunctor>(
dev_ctx, x, reduce_all, dims, keep_dim, out_dtype, out);
}
// Create the definition of Add
DEFINE_CPU_ELEMENTWISE_OP(Add)
// Create the definition of Subtract
DEFINE_CPU_ELEMENTWISE_OP(Subtract)
// Create the definition of Multiply
DEFINE_CPU_ELEMENTWISE_OP(Multiply)
} // namespace pten
using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>;
// NOTE(chenweihang): using bfloat16 will cause redefine with xpu bfloat16
// using bfloat16 = ::paddle::platform::bfloat16;
PT_REGISTER_CTX_KERNEL(mean, CPU, ALL_LAYOUT, pten::Mean, float, double, bool) {
}
PT_REGISTER_CTX_KERNEL(add,
CPU,
ALL_LAYOUT,
pten::AddKernel,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PT_REGISTER_CTX_KERNEL(subtract,
CPU,
ALL_LAYOUT,
pten::SubtractKernel,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PT_REGISTER_CTX_KERNEL(divide,
CPU,
ALL_LAYOUT,
pten::DivideKernel,
float,
double,
int,
int64_t,
complex64,
complex128) {}
PT_REGISTER_CTX_KERNEL(multiply,
CPU,
ALL_LAYOUT,
pten::MultiplyKernel,
float,
double,
int,
int64_t,
bool,
complex64,
complex128) {}
PT_REGISTER_CTX_KERNEL(sum,
CPU,
ALL_LAYOUT,
pten::Sum,
bool,
float,
double,
paddle::platform::float16,
int,
int64_t,
complex64,
complex128) {
kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED);
}
if(WITH_GPU)
nv_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu cast_kernel copy_kernel)
elseif(WITH_ROCM)
hip_library(math_gpu SRCS math.cu DEPS eigen_function dense_tensor convert_utils kernel_context kernel_factory pten_transpose_gpu cast_kernel copy_kernel)
endif()
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
// CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h"
namespace pten {
template <typename T>
void Mean(const GPUContext& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DenseTensor* out);
template <typename T>
void Add(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Subtract(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Divide(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Multiply(const GPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T>
void Sum(const GPUContext& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType out_dtype,
DenseTensor* out);
} // namespace pten
#define DEFINE_CUDA_ELEMENTWISE_OP(name) \
template <typename T> \
void name(const GPUContext& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
std::vector<const DenseTensor*> inputs; \
std::vector<DenseTensor*> outputs; \
inputs.emplace_back(&x); \
inputs.emplace_back(&y); \
outputs.emplace_back(out); \
out->mutable_data<T>(); \
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( \
dev_ctx, inputs, &outputs, axis, general::name##Functor<T>()); \
}
#endif
...@@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/pten/kernels/gpu/math.h" #include "paddle/pten/kernels/math_kernel.h"
#include "paddle/pten/backends/gpu/gpu_context.h"
#include "paddle/pten/kernels/hybird/cuda/elementwise/elementwise.h" #include "paddle/pten/kernels/hybird/cuda/elementwise/elementwise.h"
#include "paddle/pten/kernels/hybird/cuda/reduce/reduce.h" #include "paddle/pten/kernels/hybird/cuda/reduce/reduce.h"
#include "paddle/pten/kernels/hybird/general/elementwise_functor.h" #include "paddle/pten/kernels/hybird/general/elementwise_functor.h"
...@@ -38,6 +39,23 @@ namespace kps = paddle::operators::kernel_primitives; ...@@ -38,6 +39,23 @@ namespace kps = paddle::operators::kernel_primitives;
namespace pten { namespace pten {
#define DEFINE_CUDA_ELEMENTWISE_OP(name) \
template <typename T, typename Context> \
void name##Kernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& y, \
int axis, \
DenseTensor* out) { \
std::vector<const DenseTensor*> inputs; \
std::vector<DenseTensor*> outputs; \
inputs.emplace_back(&x); \
inputs.emplace_back(&y); \
outputs.emplace_back(out); \
out->mutable_data<T>(); \
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>( \
dev_ctx, inputs, &outputs, axis, general::name##Functor<T>()); \
}
/** /**
* Util Functors * Util Functors
*/ */
...@@ -57,8 +75,8 @@ struct DivideFunctor { ...@@ -57,8 +75,8 @@ struct DivideFunctor {
* Kernels * Kernels
*/ */
template <typename T> template <typename T, typename Context>
void Mean(const GPUContext& dev_ctx, void Mean(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim, bool keep_dim,
...@@ -78,8 +96,8 @@ DEFINE_CUDA_ELEMENTWISE_OP(Multiply) ...@@ -78,8 +96,8 @@ DEFINE_CUDA_ELEMENTWISE_OP(Multiply)
// Create the definition of Divide // Create the definition of Divide
DEFINE_CUDA_ELEMENTWISE_OP(Divide) DEFINE_CUDA_ELEMENTWISE_OP(Divide)
template <typename T> template <typename T, typename Context>
void Sum(const GPUContext& dev_ctx, void Sum(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim, bool keep_dim,
...@@ -96,64 +114,64 @@ using float16 = paddle::platform::float16; ...@@ -96,64 +114,64 @@ using float16 = paddle::platform::float16;
using complex64 = ::paddle::platform::complex<float>; using complex64 = ::paddle::platform::complex<float>;
using complex128 = ::paddle::platform::complex<double>; using complex128 = ::paddle::platform::complex<double>;
PT_REGISTER_KERNEL( PT_REGISTER_CTX_KERNEL(
mean, GPU, ALL_LAYOUT, pten::Mean, float, double, bool, float16) {} mean, GPU, ALL_LAYOUT, pten::Mean, float, double, bool, float16) {}
PT_REGISTER_KERNEL(add, PT_REGISTER_CTX_KERNEL(add,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Add, pten::AddKernel,
float, float,
double, double,
int, int,
int64_t, int64_t,
float16, float16,
complex64, complex64,
complex128) {} complex128) {}
PT_REGISTER_KERNEL(subtract, PT_REGISTER_CTX_KERNEL(subtract,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Subtract, pten::SubtractKernel,
float, float,
double, double,
int, int,
int64_t, int64_t,
float16, float16,
complex64, complex64,
complex128) {} complex128) {}
PT_REGISTER_KERNEL(divide, PT_REGISTER_CTX_KERNEL(divide,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Divide, pten::DivideKernel,
float, float,
double, double,
int, int,
int64_t, int64_t,
float16, float16,
complex64, complex64,
complex128) {} complex128) {}
PT_REGISTER_KERNEL(multiply, PT_REGISTER_CTX_KERNEL(multiply,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Multiply, pten::MultiplyKernel,
float, float,
double, double,
int, int,
int64_t, int64_t,
bool, bool,
float16, float16,
complex64, complex64,
complex128) {} complex128) {}
PT_REGISTER_KERNEL(sum, PT_REGISTER_CTX_KERNEL(sum,
GPU, GPU,
ALL_LAYOUT, ALL_LAYOUT,
pten::Sum, pten::Sum,
bool, bool,
float, float,
double, double,
float16, float16,
int, int,
int64_t, int64_t,
complex64, complex64,
complex128) { complex128) {
kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED); kernel->OutputAt(0).SetDataType(paddle::experimental::DataType::UNDEFINED);
} }
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/pten/api/lib/utils/storage.h"
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/include/infermeta.h"
namespace pten {
template <typename T, typename Context>
void Mean(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DenseTensor* out);
template <typename T, typename Context>
void AddKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void SubtractKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void DivideKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void MultiplyKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
template <typename T, typename Context>
void Sum(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
bool reduce_all,
DataType out_dtype,
DenseTensor* out);
template <typename T, typename ContextT>
DenseTensor Add(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
AddKernel<T, ContextT>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor Subtract(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
SubtractKernel<T, ContextT>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor Divide(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
DivideKernel<T, ContextT>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
template <typename T, typename ContextT>
DenseTensor Multiply(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), axis);
pten::DenseTensor dense_out(
pten::make_intrusive<paddle::experimental::SharedStorage>(
dev_ctx.GetPlace()),
std::move(out_meta));
MultiplyKernel<T, ContextT>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
} // namespace pten
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <memory> #include <memory>
#include "paddle/pten/include/math.h" #include "paddle/pten/kernels/math_kernel.h"
#include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册