未验证 提交 5cb2c741 编写于 作者: A Aurelius84 提交者: GitHub

add register op_data_type of pad/expand_as et.al (#21718)

* add register op_data_type test=develop

* fix register bug in isfinite op test=develop

* rm int int64_t in pad2d gradKernel  test=develop
上级 8439384e
......@@ -93,4 +93,6 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(argsort,
ops::ArgsortKernel<paddle::platform::CPUPlace, float>,
ops::ArgsortKernel<paddle::platform::CPUPlace, double>);
ops::ArgsortKernel<paddle::platform::CPUPlace, double>,
ops::ArgsortKernel<paddle::platform::CPUPlace, int>,
ops::ArgsortKernel<paddle::platform::CPUPlace, int64_t>);
......@@ -240,4 +240,6 @@ class ArgsortOpCUDAKernel : public framework::OpKernel<T> {
REGISTER_OP_CUDA_KERNEL(
argsort, paddle::operators::ArgsortOpCUDAKernel<float>,
paddle::operators::ArgsortOpCUDAKernel<double>,
paddle::operators::ArgsortOpCUDAKernel<int>,
paddle::operators::ArgsortOpCUDAKernel<int64_t>,
paddle::operators::ArgsortOpCUDAKernel<paddle::platform::float16>);
......@@ -81,4 +81,5 @@ REGISTER_OPERATOR(cumsum, ops::CumOp, ops::CumsumOpMaker,
ops::CumsumGradMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(cumsum, ops::CumKernel<CPU, ops::CumsumFunctor<float>>,
ops::CumKernel<CPU, ops::CumsumFunctor<double>>,
ops::CumKernel<CPU, ops::CumsumFunctor<int>>);
ops::CumKernel<CPU, ops::CumsumFunctor<int>>,
ops::CumKernel<CPU, ops::CumsumFunctor<int64_t>>);
......@@ -19,4 +19,5 @@ using CUDA = paddle::platform::CUDADeviceContext;
REGISTER_OP_CUDA_KERNEL(cumsum, ops::CumKernel<CUDA, ops::CumsumFunctor<float>>,
ops::CumKernel<CUDA, ops::CumsumFunctor<double>>,
ops::CumKernel<CUDA, ops::CumsumFunctor<int>>);
ops::CumKernel<CUDA, ops::CumsumFunctor<int>>,
ops::CumKernel<CUDA, ops::CumsumFunctor<int64_t>>);
......@@ -130,8 +130,11 @@ REGISTER_OP_CPU_KERNEL(
expand_as, ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, float>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, double>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, int>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ExpandAsKernel<paddle::platform::CPUDeviceContext, bool>);
REGISTER_OP_CPU_KERNEL(
expand_as_grad,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, int64_t>,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::ExpandAsGradKernel<paddle::platform::CPUDeviceContext, double>);
......@@ -15,8 +15,11 @@ REGISTER_OP_CUDA_KERNEL(
expand_as, ops::ExpandAsKernel<paddle::platform::CUDADeviceContext, float>,
ops::ExpandAsKernel<paddle::platform::CUDADeviceContext, double>,
ops::ExpandAsKernel<paddle::platform::CUDADeviceContext, int>,
ops::ExpandAsKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ExpandAsKernel<paddle::platform::CUDADeviceContext, bool>);
REGISTER_OP_CUDA_KERNEL(
expand_as_grad,
ops::ExpandAsGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::ExpandAsGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::ExpandAsGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::ExpandAsGradKernel<paddle::platform::CUDADeviceContext, double>);
......@@ -104,6 +104,8 @@ namespace ops = paddle::operators;
REGISTER_OP_CPU_KERNEL( \
op_type, ops::OverflowKernel<paddle::platform::CPUDeviceContext, int, \
ops::functor>, \
ops::OverflowKernel<paddle::platform::CPUDeviceContext, int64_t, \
ops::functor>, \
ops::OverflowKernel<paddle::platform::CPUDeviceContext, float, \
ops::functor>, \
ops::OverflowKernel<paddle::platform::CPUDeviceContext, double, \
......
......@@ -661,5 +661,8 @@ REGISTER_OPERATOR(pad2d, ops::Pad2dOp, ops::Pad2dOpMaker,
ops::Pad2dOpGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(pad2d_grad, ops::Pad2dOpGrad,
ops::Pad2dOpGradNoNeedBufferVarsInference);
REGISTER_OP_CPU_KERNEL(pad2d, ops::Pad2dCPUKernel<float>);
REGISTER_OP_CPU_KERNEL(pad2d_grad, ops::Pad2dGradCPUKernel<float>);
REGISTER_OP_CPU_KERNEL(pad2d, ops::Pad2dCPUKernel<float>,
ops::Pad2dCPUKernel<double>, ops::Pad2dCPUKernel<int>,
ops::Pad2dCPUKernel<int64_t>);
REGISTER_OP_CPU_KERNEL(pad2d_grad, ops::Pad2dGradCPUKernel<float>,
ops::Pad2dGradCPUKernel<double>);
......@@ -459,5 +459,8 @@ class Pad2dGradCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(pad2d, ops::Pad2dCUDAKernel<float>);
REGISTER_OP_CUDA_KERNEL(pad2d_grad, ops::Pad2dGradCUDAKernel<float>);
REGISTER_OP_CUDA_KERNEL(pad2d, ops::Pad2dCUDAKernel<float>,
ops::Pad2dCUDAKernel<double>, ops::Pad2dCUDAKernel<int>,
ops::Pad2dCUDAKernel<int64_t>);
REGISTER_OP_CUDA_KERNEL(pad2d_grad, ops::Pad2dGradCUDAKernel<float>,
ops::Pad2dGradCUDAKernel<double>);
......@@ -224,8 +224,13 @@ REGISTER_OPERATOR(pad_constant_like_grad, ops::PadConstantLikeOpGrad);
REGISTER_OP_CPU_KERNEL(
pad_constant_like,
ops::PadConstantLikeKernel<paddle::platform::CPUDeviceContext, float>,
ops::PadConstantLikeKernel<paddle::platform::CPUDeviceContext, double>);
ops::PadConstantLikeKernel<paddle::platform::CPUDeviceContext, double>,
ops::PadConstantLikeKernel<paddle::platform::CPUDeviceContext, int>,
ops::PadConstantLikeKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
pad_constant_like_grad,
ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext, double>);
ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext,
int64_t>);
......@@ -17,9 +17,14 @@ namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
pad_constant_like,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, double>);
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, double>,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, int>,
ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
pad_constant_like_grad,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext,
int64_t>,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext,
double>);
......@@ -147,6 +147,10 @@ REGISTER_OPERATOR(pad, ops::PadOp, ops::PadOpMaker,
ops::PadOpGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(pad_grad, ops::PadOpGrad);
REGISTER_OP_CPU_KERNEL(
pad, ops::PadKernel<paddle::platform::CPUDeviceContext, float>);
pad, ops::PadKernel<paddle::platform::CPUDeviceContext, float>,
ops::PadKernel<paddle::platform::CPUDeviceContext, double>,
ops::PadKernel<paddle::platform::CPUDeviceContext, int>,
ops::PadKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
pad_grad, ops::PadGradKernel<paddle::platform::CPUDeviceContext, float>);
pad_grad, ops::PadGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::PadGradKernel<paddle::platform::CPUDeviceContext, double>);
......@@ -18,6 +18,8 @@ namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(
pad, ops::PadKernel<paddle::platform::CUDADeviceContext, double>,
ops::PadKernel<paddle::platform::CUDADeviceContext, float>,
ops::PadKernel<paddle::platform::CUDADeviceContext, int>,
ops::PadKernel<paddle::platform::CUDADeviceContext, int64_t>,
ops::PadKernel<paddle::platform::CUDADeviceContext, plat::float16>);
REGISTER_OP_CUDA_KERNEL(
pad_grad, ops::PadGradKernel<paddle::platform::CUDADeviceContext, double>,
......
......@@ -86,4 +86,5 @@ REGISTER_OP_CPU_KERNEL(
save_combine,
ops::SaveCombineOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::SaveCombineOpKernel<paddle::platform::CPUDeviceContext, double>,
ops::SaveCombineOpKernel<paddle::platform::CPUDeviceContext, int>);
ops::SaveCombineOpKernel<paddle::platform::CPUDeviceContext, int>,
ops::SaveCombineOpKernel<paddle::platform::CPUDeviceContext, int64_t>);
......@@ -20,4 +20,5 @@ REGISTER_OP_CUDA_KERNEL(
save_combine,
ops::SaveCombineOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::SaveCombineOpKernel<paddle::platform::CUDADeviceContext, double>,
ops::SaveCombineOpKernel<paddle::platform::CUDADeviceContext, int>);
ops::SaveCombineOpKernel<paddle::platform::CUDADeviceContext, int>,
ops::SaveCombineOpKernel<paddle::platform::CUDADeviceContext, int64_t>);
......@@ -146,5 +146,10 @@ REGISTER_OPERATOR(scatter, ops::ScatterOp, ops::ScatterOpMaker,
REGISTER_OPERATOR(scatter_grad, ops::ScatterGradOp,
ops::ScatterGradNoNeedBufferVarsInference,
ops::ScatterGradInplaceInferer);
REGISTER_OP_CPU_KERNEL(scatter, ops::ScatterOpKernel<float>);
REGISTER_OP_CPU_KERNEL(scatter_grad, ops::ScatterGradientOpKernel<float>);
REGISTER_OP_CPU_KERNEL(scatter, ops::ScatterOpKernel<float>,
ops::ScatterOpKernel<double>, ops::ScatterOpKernel<int>,
ops::ScatterOpKernel<int64_t>);
REGISTER_OP_CPU_KERNEL(scatter_grad, ops::ScatterGradientOpKernel<float>,
ops::ScatterGradientOpKernel<double>,
ops::ScatterGradientOpKernel<int>,
ops::ScatterGradientOpKernel<int64_t>);
......@@ -94,5 +94,9 @@ class ScatterGradOpCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(scatter, ops::ScatterOpCUDAKernel<float>);
REGISTER_OP_CUDA_KERNEL(scatter_grad, ops::ScatterGradOpCUDAKernel<float>);
REGISTER_OP_CUDA_KERNEL(scatter, ops::ScatterOpCUDAKernel<float>,
ops::ScatterOpCUDAKernel<double>,
ops::ScatterOpCUDAKernel<int>,
ops::ScatterOpCUDAKernel<int64_t>);
REGISTER_OP_CUDA_KERNEL(scatter_grad, ops::ScatterGradOpCUDAKernel<float>,
ops::ScatterGradOpCUDAKernel<double>);
......@@ -126,7 +126,7 @@ REGISTER_OPERATOR(sequence_concat, op::SequenceConcatOp, op::SeqConcatOpMaker,
template <typename T>
using Kernel = op::SeqConcatKernel<paddle::platform::CPUDeviceContext, T>;
REGISTER_OP_CPU_KERNEL(sequence_concat, Kernel<float>, Kernel<double>,
Kernel<int64_t>);
Kernel<int>, Kernel<int64_t>);
REGISTER_OPERATOR(sequence_concat_grad, op::SeqConcatGradOp,
op::SeqConcatGradNoNeedBufferVarsInference);
......@@ -134,4 +134,5 @@ template <typename T>
using GradKernel =
op::SeqConcatGradKernel<paddle::platform::CPUDeviceContext, T>;
REGISTER_OP_CPU_KERNEL(sequence_concat_grad, GradKernel<float>,
GradKernel<double>, GradKernel<int64_t>);
GradKernel<double>, GradKernel<int>,
GradKernel<int64_t>);
......@@ -17,10 +17,12 @@
template <typename T>
using Kernel =
paddle::operators::SeqConcatKernel<paddle::platform::CUDADeviceContext, T>;
REGISTER_OP_CUDA_KERNEL(sequence_concat, Kernel<float>, Kernel<double>);
REGISTER_OP_CUDA_KERNEL(sequence_concat, Kernel<float>, Kernel<double>,
Kernel<int>, Kernel<int64_t>);
template <typename T>
using GradKernel =
paddle::operators::SeqConcatGradKernel<paddle::platform::CUDADeviceContext,
T>;
REGISTER_OP_CUDA_KERNEL(sequence_concat_grad, GradKernel<float>,
GradKernel<double>);
GradKernel<double>, GradKernel<int>,
GradKernel<int64_t>);
......@@ -150,7 +150,14 @@ REGISTER_OPERATOR(sequence_slice_grad, ops::SequenceSliceGradOp,
ops::SequenceSliceGradNoNeedBufferVarsInference);
REGISTER_OP_CPU_KERNEL(
sequence_slice,
ops::SequenceSliceOpKernel<paddle::platform::CPUDeviceContext, float>);
ops::SequenceSliceOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::SequenceSliceOpKernel<paddle::platform::CPUDeviceContext, double>,
ops::SequenceSliceOpKernel<paddle::platform::CPUDeviceContext, int>,
ops::SequenceSliceOpKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
sequence_slice_grad,
ops::SequenceSliceGradOpKernel<paddle::platform::CPUDeviceContext, float>);
ops::SequenceSliceGradOpKernel<paddle::platform::CPUDeviceContext, float>,
ops::SequenceSliceGradOpKernel<paddle::platform::CPUDeviceContext, double>,
ops::SequenceSliceGradOpKernel<paddle::platform::CPUDeviceContext, int>,
ops::SequenceSliceGradOpKernel<paddle::platform::CPUDeviceContext,
int64_t>);
......@@ -17,7 +17,14 @@ limitations under the License. */
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
sequence_slice,
ops::SequenceSliceOpKernel<paddle::platform::CUDADeviceContext, float>);
ops::SequenceSliceOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::SequenceSliceOpKernel<paddle::platform::CUDADeviceContext, double>,
ops::SequenceSliceOpKernel<paddle::platform::CUDADeviceContext, int>,
ops::SequenceSliceOpKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
sequence_slice_grad,
ops::SequenceSliceGradOpKernel<paddle::platform::CUDADeviceContext, float>);
ops::SequenceSliceGradOpKernel<paddle::platform::CUDADeviceContext, float>,
ops::SequenceSliceGradOpKernel<paddle::platform::CUDADeviceContext, double>,
ops::SequenceSliceGradOpKernel<paddle::platform::CUDADeviceContext, int>,
ops::SequenceSliceGradOpKernel<paddle::platform::CUDADeviceContext,
int64_t>);
......@@ -187,9 +187,11 @@ REGISTER_OP_CPU_KERNEL(
space_to_depth,
ops::SpaceToDepthKernel<paddle::platform::CPUDeviceContext, float>,
ops::SpaceToDepthKernel<paddle::platform::CPUDeviceContext, double>,
ops::SpaceToDepthKernel<paddle::platform::CPUDeviceContext, int>,
ops::SpaceToDepthKernel<paddle::platform::CPUDeviceContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
space_to_depth_grad,
ops::SpaceToDepthGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::SpaceToDepthGradKernel<paddle::platform::CPUDeviceContext, double>,
ops::SpaceToDepthGradKernel<paddle::platform::CPUDeviceContext, int>,
ops::SpaceToDepthGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
......@@ -21,10 +21,12 @@ REGISTER_OP_CUDA_KERNEL(
space_to_depth,
ops::SpaceToDepthKernel<paddle::platform::CUDADeviceContext, float>,
ops::SpaceToDepthKernel<paddle::platform::CUDADeviceContext, double>,
ops::SpaceToDepthKernel<paddle::platform::CUDADeviceContext, int>,
ops::SpaceToDepthKernel<paddle::platform::CUDADeviceContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
space_to_depth_grad,
ops::SpaceToDepthGradKernel<paddle::platform::CUDADeviceContext, float>,
ops::SpaceToDepthGradKernel<paddle::platform::CUDADeviceContext, double>,
ops::SpaceToDepthGradKernel<paddle::platform::CUDADeviceContext, int>,
ops::SpaceToDepthGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
......@@ -99,4 +99,6 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(top_k,
ops::TopkKernel<paddle::platform::CPUPlace, float>,
ops::TopkKernel<paddle::platform::CPUPlace, double>);
ops::TopkKernel<paddle::platform::CPUPlace, double>,
ops::TopkKernel<paddle::platform::CPUPlace, int>,
ops::TopkKernel<paddle::platform::CPUPlace, int64_t>);
......@@ -549,4 +549,6 @@ class TopkOpCUDAKernel : public framework::OpKernel<T> {
REGISTER_OP_CUDA_KERNEL(
top_k, paddle::operators::TopkOpCUDAKernel<float>,
paddle::operators::TopkOpCUDAKernel<double>,
paddle::operators::TopkOpCUDAKernel<int>,
paddle::operators::TopkOpCUDAKernel<int64_t>,
paddle::operators::TopkOpCUDAKernel<paddle::platform::float16>);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册