From aa35331f11b8ecb1d9c285fafdb9ed239a4d98c9 Mon Sep 17 00:00:00 2001 From: huangjiyi <43315610+huangjiyi@users.noreply.github.com> Date: Mon, 10 Apr 2023 11:39:55 +0800 Subject: [PATCH] register fluid kerenls to phi [part 7] (#52577) * update * fix bug * fix ci-windows-openblas * fix test_partial_sum_op * fix codestyle --- .../collective/partial_allgather_op.cc | 15 +++-- .../collective/partial_allgather_op.cu.cc | 20 ++++--- .../collective/partial_allgather_op.h | 2 +- .../operators/collective/partial_recv_op.cc | 15 +++-- .../collective/partial_recv_op.cu.cc | 20 ++++--- .../operators/collective/partial_recv_op.h | 2 +- .../operators/collective/partial_send_op.cc | 15 +++-- .../collective/partial_send_op.cu.cc | 20 ++++--- .../operators/collective/partial_send_op.h | 2 +- .../detection/polygon_box_transform_op.cc | 13 +++-- .../detection/polygon_box_transform_op.cu | 13 +++-- .../operators/metrics/precision_recall_op.cc | 10 ++-- .../operators/metrics/precision_recall_op.h | 2 +- paddle/fluid/operators/nccl/nccl_op.cu.cc | 15 +++-- .../fluid/operators/nccl/nccl_op_test.cu.cc | 9 ++- paddle/fluid/operators/nce_op.cc | 11 ++-- paddle/fluid/operators/nce_op.h | 4 +- paddle/fluid/operators/nop_op.cc | 10 ++-- paddle/fluid/operators/number_count_op.cc | 7 +-- paddle/fluid/operators/number_count_op.cu | 5 +- paddle/fluid/operators/number_count_op.h | 2 +- .../optimizers/proximal_adagrad_op.cc | 4 +- .../optimizers/proximal_adagrad_op.cu | 4 +- .../optimizers/proximal_adagrad_op.h | 2 +- .../operators/optimizers/proximal_gd_op.cc | 5 +- .../operators/optimizers/proximal_gd_op.cu | 4 +- .../operators/optimizers/proximal_gd_op.h | 2 +- paddle/fluid/operators/pad2d_op.cc | 17 +++--- paddle/fluid/operators/pad2d_op.cu | 30 ++++++---- .../fluid/operators/pad_constant_like_op.cc | 58 +++++++++++-------- paddle/fluid/operators/pad_constant_like_op.h | 4 +- paddle/fluid/operators/partial_concat_op.cc | 27 +++++---- paddle/fluid/operators/partial_concat_op.cu | 36 +++++++----- paddle/fluid/operators/partial_concat_op.h | 4 +- paddle/fluid/operators/partial_sum_op.cc | 27 +++++---- paddle/fluid/operators/partial_sum_op.cu | 19 +----- paddle/fluid/operators/partial_sum_op.h | 4 +- .../operators/positive_negative_pair_op.cc | 11 ++-- .../operators/positive_negative_pair_op.h | 2 +- paddle/fluid/operators/prroi_pool_op.cc | 27 +++++---- paddle/fluid/operators/prroi_pool_op.cu | 19 +++--- paddle/fluid/operators/prroi_pool_op.h | 4 +- .../operators/prune_gate_by_capacity_op.cc | 10 ++-- .../operators/prune_gate_by_capacity_op.cu | 10 ++-- .../operators/prune_gate_by_capacity_op.h | 2 +- .../operators/pull_box_extended_sparse_op.cc | 19 +++--- .../operators/pull_box_extended_sparse_op.cu | 23 +++++--- .../operators/pull_box_extended_sparse_op.h | 4 +- paddle/fluid/operators/pull_box_sparse_op.cc | 7 ++- paddle/fluid/operators/pull_box_sparse_op.h | 4 +- paddle/fluid/operators/pull_box_sparse_op.kps | 17 ++---- .../fluid/operators/pull_gpups_sparse_op.cc | 19 ++++-- .../fluid/operators/pull_gpups_sparse_op.cu | 22 ++++--- paddle/fluid/operators/pull_gpups_sparse_op.h | 4 +- paddle/fluid/operators/pull_sparse_op.cc | 6 +- paddle/fluid/operators/pull_sparse_op.h | 4 +- paddle/fluid/operators/pull_sparse_v2_op.cc | 6 +- paddle/fluid/operators/pull_sparse_v2_op.h | 4 +- paddle/fluid/operators/unity_build_rule.cmake | 1 - 59 files changed, 387 insertions(+), 297 deletions(-) diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cc b/paddle/fluid/operators/collective/partial_allgather_op.cc index 00610768059..7f9e5f3f3e3 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op.cc @@ -85,9 +85,12 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, ops::PartialAllGatherOpInplaceInferer) -REGISTER_OP_CPU_KERNEL(partial_allgather, - ops::PartialAllGatherOpCPUKernel, - ops::PartialAllGatherOpCPUKernel, - ops::PartialAllGatherOpCPUKernel, - ops::PartialAllGatherOpCPUKernel, - ops::PartialAllGatherOpCPUKernel); +PD_REGISTER_STRUCT_KERNEL(partial_allgather, + CPU, + ALL_LAYOUT, + ops::PartialAllGatherOpCPUKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc index ce5a5438eff..2374f4a4aed 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op.cu.cc @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class PartialAllGatherOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -102,12 +102,16 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel { namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL(partial_allgather, - ops::PartialAllGatherOpCUDAKernel, +PD_REGISTER_STRUCT_KERNEL(partial_allgather, + GPU, + ALL_LAYOUT, + ops::PartialAllGatherOpCUDAKernel, + float, + double, #if NCCL_VERSION_CODE >= 21000 - ops::PartialAllGatherOpCUDAKernel, + plat::bfloat16, #endif - ops::PartialAllGatherOpCUDAKernel, - ops::PartialAllGatherOpCUDAKernel, - ops::PartialAllGatherOpCUDAKernel, - ops::PartialAllGatherOpCUDAKernel); + int, + int64_t, + plat::float16) { +} diff --git a/paddle/fluid/operators/collective/partial_allgather_op.h b/paddle/fluid/operators/collective/partial_allgather_op.h index 7e9c85214cf..6b827a2656f 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.h +++ b/paddle/fluid/operators/collective/partial_allgather_op.h @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class PartialAllGatherOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/collective/partial_recv_op.cc b/paddle/fluid/operators/collective/partial_recv_op.cc index 14cca68cf16..5cd4a72ea7e 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cc @@ -129,9 +129,12 @@ REGISTER_OP_WITHOUT_GRADIENT(partial_recv, ops::PartialRecvOp, ops::PartialRecvOpMaker); -REGISTER_OP_CPU_KERNEL(partial_recv, - ops::PartialRecvOpCPUKernel, - ops::PartialRecvOpCPUKernel, - ops::PartialRecvOpCPUKernel, - ops::PartialRecvOpCPUKernel, - ops::PartialRecvOpCPUKernel); +PD_REGISTER_STRUCT_KERNEL(partial_recv, + CPU, + ALL_LAYOUT, + ops::PartialRecvOpCPUKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/partial_recv_op.cu.cc b/paddle/fluid/operators/collective/partial_recv_op.cu.cc index 306175d1ca7..b0df94194e4 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cu.cc @@ -23,7 +23,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class PartialRecvOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -118,12 +118,16 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel { namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL(partial_recv, - ops::PartialRecvOpCUDAKernel, +PD_REGISTER_STRUCT_KERNEL(partial_recv, + GPU, + ALL_LAYOUT, + ops::PartialRecvOpCUDAKernel, + float, + double, #if NCCL_VERSION_CODE >= 21000 - ops::PartialRecvOpCUDAKernel, + plat::bfloat16, #endif - ops::PartialRecvOpCUDAKernel, - ops::PartialRecvOpCUDAKernel, - ops::PartialRecvOpCUDAKernel, - ops::PartialRecvOpCUDAKernel); + int, + int64_t, + plat::float16) { +} diff --git a/paddle/fluid/operators/collective/partial_recv_op.h b/paddle/fluid/operators/collective/partial_recv_op.h index d64fa39939c..fdf3f02b0d6 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.h +++ b/paddle/fluid/operators/collective/partial_recv_op.h @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class PartialRecvOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/collective/partial_send_op.cc b/paddle/fluid/operators/collective/partial_send_op.cc index a45cc6ddde6..936336ce74a 100644 --- a/paddle/fluid/operators/collective/partial_send_op.cc +++ b/paddle/fluid/operators/collective/partial_send_op.cc @@ -94,9 +94,12 @@ REGISTER_OP_WITHOUT_GRADIENT(partial_send, ops::PartialSendOp, ops::PartialSendMaker); -REGISTER_OP_CPU_KERNEL(partial_send, - ops::PartialSendOpCPUKernel, - ops::PartialSendOpCPUKernel, - ops::PartialSendOpCPUKernel, - ops::PartialSendOpCPUKernel, - ops::PartialSendOpCPUKernel); +PD_REGISTER_STRUCT_KERNEL(partial_send, + CPU, + ALL_LAYOUT, + ops::PartialSendOpCPUKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/collective/partial_send_op.cu.cc b/paddle/fluid/operators/collective/partial_send_op.cu.cc index afac7f963fa..dc24ea01fc9 100644 --- a/paddle/fluid/operators/collective/partial_send_op.cu.cc +++ b/paddle/fluid/operators/collective/partial_send_op.cu.cc @@ -24,7 +24,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class PartialSendCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -117,12 +117,16 @@ class PartialSendCUDAKernel : public framework::OpKernel { namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL(partial_send, - ops::PartialSendCUDAKernel, - ops::PartialSendCUDAKernel, +PD_REGISTER_STRUCT_KERNEL(partial_send, + GPU, + ALL_LAYOUT, + ops::PartialSendCUDAKernel, + float, + double, #if NCCL_VERSION_CODE >= 21000 - ops::PartialSendCUDAKernel, + plat::bfloat16, #endif - ops::PartialSendCUDAKernel, - ops::PartialSendCUDAKernel, - ops::PartialSendCUDAKernel); + int, + int64_t, + plat::float16) { +} diff --git a/paddle/fluid/operators/collective/partial_send_op.h b/paddle/fluid/operators/collective/partial_send_op.h index 7550ac40078..773125be7d4 100644 --- a/paddle/fluid/operators/collective/partial_send_op.h +++ b/paddle/fluid/operators/collective/partial_send_op.h @@ -25,7 +25,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class PartialSendOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cc b/paddle/fluid/operators/detection/polygon_box_transform_op.cc index c331cdc97f0..936480a9e23 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cc +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cc @@ -17,7 +17,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class PolygonBoxTransformCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -111,7 +111,10 @@ REGISTER_OPERATOR( ops::PolygonBoxTransformOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL( - polygon_box_transform, - ops::PolygonBoxTransformCPUKernel, - ops::PolygonBoxTransformCPUKernel); + +PD_REGISTER_STRUCT_KERNEL(polygon_box_transform, + CPU, + ALL_LAYOUT, + ops::PolygonBoxTransformCPUKernel, + float, + double) {} diff --git a/paddle/fluid/operators/detection/polygon_box_transform_op.cu b/paddle/fluid/operators/detection/polygon_box_transform_op.cu index de43f2d62b4..4f182464f77 100644 --- a/paddle/fluid/operators/detection/polygon_box_transform_op.cu +++ b/paddle/fluid/operators/detection/polygon_box_transform_op.cu @@ -38,7 +38,7 @@ __global__ void PolygonBoxTransformKernel( } } -template +template class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -73,7 +73,10 @@ class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel { } // namespace operators } // namespace paddle -REGISTER_OP_CUDA_KERNEL( - polygon_box_transform, - paddle::operators::PolygonBoxTransformOpCUDAKernel, - paddle::operators::PolygonBoxTransformOpCUDAKernel); +namespace ops = paddle::operators; +PD_REGISTER_STRUCT_KERNEL(polygon_box_transform, + GPU, + ALL_LAYOUT, + ops::PolygonBoxTransformOpCUDAKernel, + float, + double) {} diff --git a/paddle/fluid/operators/metrics/precision_recall_op.cc b/paddle/fluid/operators/metrics/precision_recall_op.cc index 0652151320d..413cd854601 100644 --- a/paddle/fluid/operators/metrics/precision_recall_op.cc +++ b/paddle/fluid/operators/metrics/precision_recall_op.cc @@ -242,7 +242,9 @@ REGISTER_OPERATOR( ops::PrecisionRecallOpMaker, paddle::framework::EmptyGradOpMaker, paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL( - precision_recall, - ops::PrecisionRecallKernel, - ops::PrecisionRecallKernel); +PD_REGISTER_STRUCT_KERNEL(precision_recall, + CPU, + ALL_LAYOUT, + ops::PrecisionRecallKernel, + float, + double) {} diff --git a/paddle/fluid/operators/metrics/precision_recall_op.h b/paddle/fluid/operators/metrics/precision_recall_op.h index bec8bba09ad..6eef5658c5c 100644 --- a/paddle/fluid/operators/metrics/precision_recall_op.h +++ b/paddle/fluid/operators/metrics/precision_recall_op.h @@ -26,7 +26,7 @@ using EigenMatrix = framework::EigenMatrix; enum StateVariable { TP = 0, FP, TN, FN }; -template +template class PrecisionRecallKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/nccl/nccl_op.cu.cc b/paddle/fluid/operators/nccl/nccl_op.cu.cc index d328329e1c2..7dae16afafd 100644 --- a/paddle/fluid/operators/nccl/nccl_op.cu.cc +++ b/paddle/fluid/operators/nccl/nccl_op.cu.cc @@ -52,7 +52,7 @@ static ncclRedOp_t str_to_nccl_red_type(std::string reduction) { return it->second; } -template +template class NCCLAllReduceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -87,7 +87,7 @@ class NCCLAllReduceKernel : public framework::OpKernel { } }; -template +template class NCCLReduceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -128,7 +128,7 @@ class NCCLReduceKernel : public framework::OpKernel { } }; -template +template class NCCLBcastKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -172,6 +172,9 @@ class NCCLBcastKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(ncclAllReduce, ops::NCCLAllReduceKernel); -REGISTER_OP_CUDA_KERNEL(ncclBcast, ops::NCCLBcastKernel); -REGISTER_OP_CUDA_KERNEL(ncclReduce, ops::NCCLReduceKernel); +PD_REGISTER_STRUCT_KERNEL( + ncclAllReduce, GPU, ALL_LAYOUT, ops::NCCLAllReduceKernel, float) {} +PD_REGISTER_STRUCT_KERNEL( + ncclBcast, GPU, ALL_LAYOUT, ops::NCCLBcastKernel, float) {} +PD_REGISTER_STRUCT_KERNEL( + ncclReduce, GPU, ALL_LAYOUT, ops::NCCLReduceKernel, float) {} diff --git a/paddle/fluid/operators/nccl/nccl_op_test.cu.cc b/paddle/fluid/operators/nccl/nccl_op_test.cu.cc index 8d5528716f4..87c0708e12d 100644 --- a/paddle/fluid/operators/nccl/nccl_op_test.cu.cc +++ b/paddle/fluid/operators/nccl/nccl_op_test.cu.cc @@ -31,9 +31,12 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" USE_NO_KERNEL_OP(ncclInit); -USE_CUDA_ONLY_OP(ncclAllReduce); -USE_CUDA_ONLY_OP(ncclReduce); -USE_CUDA_ONLY_OP(ncclBcast); +USE_OP_ITSELF(ncclAllReduce); +USE_OP_ITSELF(ncclReduce); +USE_OP_ITSELF(ncclBcast); +PD_DECLARE_KERNEL(ncclAllReduce, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(ncclReduce, GPU, ALL_LAYOUT); +PD_DECLARE_KERNEL(ncclBcast, GPU, ALL_LAYOUT); namespace f = paddle::framework; namespace p = paddle::platform; diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc index 286c8512781..9c9055d1987 100644 --- a/paddle/fluid/operators/nce_op.cc +++ b/paddle/fluid/operators/nce_op.cc @@ -320,9 +320,8 @@ REGISTER_OPERATOR(nce_grad, ops::NCEOpGrad, ops::NCEOpGradVarTypeInference, ops::NCEGradOpNoNeedBufferVarInferer); -REGISTER_OP_CPU_KERNEL(nce, - ops::NCEKernel, - ops::NCEKernel); -REGISTER_OP_CPU_KERNEL(nce_grad, - ops::NCEGradKernel, - ops::NCEGradKernel); + +PD_REGISTER_STRUCT_KERNEL(nce, CPU, ALL_LAYOUT, ops::NCEKernel, float, double) { +} +PD_REGISTER_STRUCT_KERNEL( + nce_grad, CPU, ALL_LAYOUT, ops::NCEGradKernel, float, double) {} diff --git a/paddle/fluid/operators/nce_op.h b/paddle/fluid/operators/nce_op.h index 4b9fe86b225..188568ec323 100644 --- a/paddle/fluid/operators/nce_op.h +++ b/paddle/fluid/operators/nce_op.h @@ -75,7 +75,7 @@ void PrepareSamples(const framework::ExecutionContext &context, } } -template +template class NCEKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { @@ -245,7 +245,7 @@ class NCEKernel : public framework::OpKernel { } }; -template +template class NCEGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &context) const override { diff --git a/paddle/fluid/operators/nop_op.cc b/paddle/fluid/operators/nop_op.cc index 709b1f4f1f0..69f0bfb2abc 100644 --- a/paddle/fluid/operators/nop_op.cc +++ b/paddle/fluid/operators/nop_op.cc @@ -45,7 +45,7 @@ establish the dependency between input and output tensors. } }; -template +template class NopKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override {} @@ -58,8 +58,8 @@ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(nop, ops::NopOp, ops::NopOpMaker); -REGISTER_OP_CPU_KERNEL(nop, ops::NopKernel); +PD_REGISTER_STRUCT_KERNEL(nop, CPU, ALL_LAYOUT, ops::NopKernel, float) {} -REGISTER_OP_CUDA_KERNEL(nop, ops::NopKernel); - -REGISTER_OP_NPU_KERNEL(nop, ops::NopKernel); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PD_REGISTER_STRUCT_KERNEL(nop, GPU, ALL_LAYOUT, ops::NopKernel, float) {} +#endif diff --git a/paddle/fluid/operators/number_count_op.cc b/paddle/fluid/operators/number_count_op.cc index e636bc98bfc..bc566ca5fbf 100644 --- a/paddle/fluid/operators/number_count_op.cc +++ b/paddle/fluid/operators/number_count_op.cc @@ -58,10 +58,9 @@ class NumberCountOpMaker : public framework::OpProtoAndCheckerMaker { namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CPU_KERNEL(number_count, - ops::NumberCountOpCPUKernel, - ops::NumberCountOpCPUKernel); - REGISTER_OP_WITHOUT_GRADIENT(number_count, ops::NumberCountOp, ops::NumberCountOpMaker); + +PD_REGISTER_STRUCT_KERNEL( + number_count, CPU, ALL_LAYOUT, ops::NumberCountOpCPUKernel, int, int64_t) {} diff --git a/paddle/fluid/operators/number_count_op.cu b/paddle/fluid/operators/number_count_op.cu index fdab0369871..b9afffd7887 100644 --- a/paddle/fluid/operators/number_count_op.cu +++ b/paddle/fluid/operators/number_count_op.cu @@ -79,7 +79,7 @@ __global__ void NumberCount(const T* numbers, } } -template +template class NumberCountOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { @@ -111,4 +111,5 @@ class NumberCountOpCUDAKernel : public framework::OpKernel { namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL(number_count, ops::NumberCountOpCUDAKernel); +PD_REGISTER_STRUCT_KERNEL( + number_count, GPU, ALL_LAYOUT, ops::NumberCountOpCUDAKernel, int64_t) {} diff --git a/paddle/fluid/operators/number_count_op.h b/paddle/fluid/operators/number_count_op.h index ded7ea6eec5..e95336ae2a3 100644 --- a/paddle/fluid/operators/number_count_op.h +++ b/paddle/fluid/operators/number_count_op.h @@ -24,7 +24,7 @@ namespace paddle { namespace operators { -template +template class NumberCountOpCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc b/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc index 076f5137cab..3261e96cbbe 100644 --- a/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/proximal_adagrad_op.cc @@ -133,5 +133,5 @@ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(proximal_adagrad, ops::ProximalAdagradOp, ops::ProximalAdagradOpMaker); -REGISTER_OP_CPU_KERNEL(proximal_adagrad, - ops::ProximalAdagradOpKernel); +PD_REGISTER_STRUCT_KERNEL( + proximal_adagrad, CPU, ALL_LAYOUT, ops::ProximalAdagradOpKernel, float) {} diff --git a/paddle/fluid/operators/optimizers/proximal_adagrad_op.cu b/paddle/fluid/operators/optimizers/proximal_adagrad_op.cu index c338f4cc717..0a79dcd425f 100644 --- a/paddle/fluid/operators/optimizers/proximal_adagrad_op.cu +++ b/paddle/fluid/operators/optimizers/proximal_adagrad_op.cu @@ -13,5 +13,5 @@ specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/optimizers/proximal_adagrad_op.h" namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(proximal_adagrad, - ops::ProximalAdagradOpKernel); +PD_REGISTER_STRUCT_KERNEL( + proximal_adagrad, GPU, ALL_LAYOUT, ops::ProximalAdagradOpKernel, float) {} diff --git a/paddle/fluid/operators/optimizers/proximal_adagrad_op.h b/paddle/fluid/operators/optimizers/proximal_adagrad_op.h index 72eccd17e44..973d870d14f 100644 --- a/paddle/fluid/operators/optimizers/proximal_adagrad_op.h +++ b/paddle/fluid/operators/optimizers/proximal_adagrad_op.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class ProximalAdagradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.cc b/paddle/fluid/operators/optimizers/proximal_gd_op.cc index d7e01aa0710..08cc29ce9eb 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.cc +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.cc @@ -106,5 +106,6 @@ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(proximal_gd, ops::ProximalGDOp, ops::ProximalGDOpMaker); -REGISTER_OP_CPU_KERNEL(proximal_gd, - ops::ProximalGDOpKernel); + +PD_REGISTER_STRUCT_KERNEL( + proximal_gd, CPU, ALL_LAYOUT, ops::ProximalGDOpKernel, float) {} diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.cu b/paddle/fluid/operators/optimizers/proximal_gd_op.cu index edc911134c7..ef1edfc2ee4 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.cu +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.cu @@ -13,5 +13,5 @@ specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/optimizers/proximal_gd_op.h" namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(proximal_gd, - ops::ProximalGDOpKernel); +PD_REGISTER_STRUCT_KERNEL( + proximal_gd, GPU, ALL_LAYOUT, ops::ProximalGDOpKernel, float) {} diff --git a/paddle/fluid/operators/optimizers/proximal_gd_op.h b/paddle/fluid/operators/optimizers/proximal_gd_op.h index 49cf7b68bd3..1945ef5bf6b 100644 --- a/paddle/fluid/operators/optimizers/proximal_gd_op.h +++ b/paddle/fluid/operators/optimizers/proximal_gd_op.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class ProximalGDOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index 91eeed0e900..e29981d35b4 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -402,7 +402,7 @@ static inline void GetPaddings(int* paddings, } } -template +template class Pad2dCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { @@ -520,7 +520,7 @@ class Pad2dCPUKernel : public framework::OpKernel { } }; -template +template class Pad2dGradCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { @@ -873,11 +873,8 @@ REGISTER_OPERATOR(pad2d, REGISTER_OPERATOR(pad2d_grad, ops::Pad2dOpGrad, ops::Pad2dOpGradNoNeedBufferVarsInferer); -REGISTER_OP_CPU_KERNEL(pad2d, - ops::Pad2dCPUKernel, - ops::Pad2dCPUKernel, - ops::Pad2dCPUKernel, - ops::Pad2dCPUKernel); -REGISTER_OP_CPU_KERNEL(pad2d_grad, - ops::Pad2dGradCPUKernel, - ops::Pad2dGradCPUKernel); + +PD_REGISTER_STRUCT_KERNEL( + pad2d, CPU, ALL_LAYOUT, ops::Pad2dCPUKernel, float, double, int, int64_t) {} +PD_REGISTER_STRUCT_KERNEL( + pad2d_grad, CPU, ALL_LAYOUT, ops::Pad2dGradCPUKernel, float, double) {} diff --git a/paddle/fluid/operators/pad2d_op.cu b/paddle/fluid/operators/pad2d_op.cu index 7b0dd2149de..b8263ea6bb1 100644 --- a/paddle/fluid/operators/pad2d_op.cu +++ b/paddle/fluid/operators/pad2d_op.cu @@ -361,7 +361,7 @@ static inline void GetPaddings(int* paddings, } } -template +template class Pad2dCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { @@ -489,7 +489,7 @@ class Pad2dCUDAKernel : public framework::OpKernel { } }; -template +template class Pad2dGradCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { @@ -618,13 +618,19 @@ class Pad2dGradCUDAKernel : public framework::OpKernel { namespace ops = paddle::operators; namespace plat = paddle::platform; -REGISTER_OP_CUDA_KERNEL(pad2d, - ops::Pad2dCUDAKernel, - ops::Pad2dCUDAKernel, - ops::Pad2dCUDAKernel, - ops::Pad2dCUDAKernel, - ops::Pad2dCUDAKernel); -REGISTER_OP_CUDA_KERNEL(pad2d_grad, - ops::Pad2dGradCUDAKernel, - ops::Pad2dGradCUDAKernel, - ops::Pad2dGradCUDAKernel); +PD_REGISTER_STRUCT_KERNEL(pad2d, + GPU, + ALL_LAYOUT, + ops::Pad2dCUDAKernel, + float, + double, + int, + int64_t, + plat::float16) {} +PD_REGISTER_STRUCT_KERNEL(pad2d_grad, + GPU, + ALL_LAYOUT, + ops::Pad2dGradCUDAKernel, + float, + double, + plat::float16) {} diff --git a/paddle/fluid/operators/pad_constant_like_op.cc b/paddle/fluid/operators/pad_constant_like_op.cc index 9b08bb3fc1e..d00cefab450 100644 --- a/paddle/fluid/operators/pad_constant_like_op.cc +++ b/paddle/fluid/operators/pad_constant_like_op.cc @@ -243,26 +243,38 @@ REGISTER_OPERATOR(pad_constant_like, ops::PadConstantLikeOpGradMaker); REGISTER_OPERATOR(pad_constant_like_grad, ops::PadConstantLikeOpGrad); -REGISTER_OP_CPU_KERNEL(pad_constant_like, - ops::PadConstantLikeKernel, - ops::PadConstantLikeKernel, - ops::PadConstantLikeKernel, - ops::PadConstantLikeKernel); -REGISTER_OP_CPU_KERNEL( - pad_constant_like_grad, - ops::PadConstantLikeGradKernel, - ops::PadConstantLikeGradKernel, - ops::PadConstantLikeGradKernel, - ops::PadConstantLikeGradKernel); - -REGISTER_OP_CUDA_KERNEL(pad_constant_like, - ops::PadConstantLikeKernel, - ops::PadConstantLikeKernel, - ops::PadConstantLikeKernel, - ops::PadConstantLikeKernel); -REGISTER_OP_CUDA_KERNEL( - pad_constant_like_grad, - ops::PadConstantLikeGradKernel, - ops::PadConstantLikeGradKernel, - ops::PadConstantLikeGradKernel, - ops::PadConstantLikeGradKernel); +PD_REGISTER_STRUCT_KERNEL(pad_constant_like, + CPU, + ALL_LAYOUT, + ops::PadConstantLikeKernel, + float, + double, + int, + int64_t) {} +PD_REGISTER_STRUCT_KERNEL(pad_constant_like_grad, + CPU, + ALL_LAYOUT, + ops::PadConstantLikeGradKernel, + float, + double, + int, + int64_t) {} + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PD_REGISTER_STRUCT_KERNEL(pad_constant_like, + GPU, + ALL_LAYOUT, + ops::PadConstantLikeKernel, + float, + double, + int, + int64_t) {} +PD_REGISTER_STRUCT_KERNEL(pad_constant_like_grad, + GPU, + ALL_LAYOUT, + ops::PadConstantLikeGradKernel, + float, + double, + int, + int64_t) {} +#endif diff --git a/paddle/fluid/operators/pad_constant_like_op.h b/paddle/fluid/operators/pad_constant_like_op.h index ba87bd3ef18..f6162037fbd 100644 --- a/paddle/fluid/operators/pad_constant_like_op.h +++ b/paddle/fluid/operators/pad_constant_like_op.h @@ -26,7 +26,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class PadConstantLikeKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { @@ -61,7 +61,7 @@ class PadConstantLikeKernel : public framework::OpKernel { } }; -template +template class PadConstantLikeGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { diff --git a/paddle/fluid/operators/partial_concat_op.cc b/paddle/fluid/operators/partial_concat_op.cc index 1fb9dceb415..f2f3da9f051 100644 --- a/paddle/fluid/operators/partial_concat_op.cc +++ b/paddle/fluid/operators/partial_concat_op.cc @@ -202,14 +202,19 @@ REGISTER_OPERATOR(partial_concat, REGISTER_OPERATOR(partial_concat_grad, ops::PartialConcatGradOp); -REGISTER_OP_CPU_KERNEL(partial_concat, - ops::PartialConcatKernel, - ops::PartialConcatKernel, - ops::PartialConcatKernel, - ops::PartialConcatKernel); - -REGISTER_OP_CPU_KERNEL(partial_concat_grad, - ops::PartialConcatGradientOpKernel, - ops::PartialConcatGradientOpKernel, - ops::PartialConcatGradientOpKernel, - ops::PartialConcatGradientOpKernel); +PD_REGISTER_STRUCT_KERNEL(partial_concat, + CPU, + ALL_LAYOUT, + ops::PartialConcatKernel, + float, + double, + int, + int64_t) {} +PD_REGISTER_STRUCT_KERNEL(partial_concat_grad, + CPU, + ALL_LAYOUT, + ops::PartialConcatGradientOpKernel, + float, + double, + int, + int64_t) {} diff --git a/paddle/fluid/operators/partial_concat_op.cu b/paddle/fluid/operators/partial_concat_op.cu index f4acf68dcbc..ffef094fa96 100644 --- a/paddle/fluid/operators/partial_concat_op.cu +++ b/paddle/fluid/operators/partial_concat_op.cu @@ -65,7 +65,7 @@ __global__ void ConcatPartialGradCUDAKernel(T **in, } } -template +template class PartialConcatOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -146,7 +146,7 @@ class PartialConcatOpCUDAKernel : public framework::OpKernel { } }; -template +template class PartialConcatGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -231,16 +231,22 @@ class PartialConcatGradOpCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(partial_concat, - ops::PartialConcatOpCUDAKernel, - ops::PartialConcatOpCUDAKernel, - ops::PartialConcatOpCUDAKernel, - ops::PartialConcatOpCUDAKernel, - ops::PartialConcatOpCUDAKernel); - -REGISTER_OP_CUDA_KERNEL(partial_concat_grad, - ops::PartialConcatGradOpCUDAKernel, - ops::PartialConcatGradOpCUDAKernel, - ops::PartialConcatGradOpCUDAKernel, - ops::PartialConcatGradOpCUDAKernel, - ops::PartialConcatGradOpCUDAKernel); + +PD_REGISTER_STRUCT_KERNEL(partial_concat, + GPU, + ALL_LAYOUT, + ops::PartialConcatOpCUDAKernel, + float, + double, + int, + int64_t, + plat::float16) {} +PD_REGISTER_STRUCT_KERNEL(partial_concat_grad, + GPU, + ALL_LAYOUT, + ops::PartialConcatGradOpCUDAKernel, + float, + double, + int, + int64_t, + plat::float16) {} diff --git a/paddle/fluid/operators/partial_concat_op.h b/paddle/fluid/operators/partial_concat_op.h index 407b57e3a82..fb0d17aa97b 100644 --- a/paddle/fluid/operators/partial_concat_op.h +++ b/paddle/fluid/operators/partial_concat_op.h @@ -39,7 +39,7 @@ static inline int64_t ComputeStartIndex(int64_t start_index, int64_t size) { return start_index; } -template +template class PartialConcatKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -84,7 +84,7 @@ class PartialConcatKernel : public framework::OpKernel { } }; -template +template class PartialConcatGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/partial_sum_op.cc b/paddle/fluid/operators/partial_sum_op.cc index 9ef7ac0a21a..4b130306825 100644 --- a/paddle/fluid/operators/partial_sum_op.cc +++ b/paddle/fluid/operators/partial_sum_op.cc @@ -204,14 +204,19 @@ REGISTER_OPERATOR(partial_sum, REGISTER_OPERATOR(partial_sum_grad, ops::PartialSumGradOp); -REGISTER_OP_CPU_KERNEL(partial_sum, - ops::PartialSumKernel, - ops::PartialSumKernel, - ops::PartialSumKernel, - ops::PartialSumKernel); - -REGISTER_OP_CPU_KERNEL(partial_sum_grad, - ops::PartialSumGradientOpKernel, - ops::PartialSumGradientOpKernel, - ops::PartialSumGradientOpKernel, - ops::PartialSumGradientOpKernel); +PD_REGISTER_STRUCT_KERNEL(partial_sum, + CPU, + ALL_LAYOUT, + ops::PartialSumKernel, + float, + double, + int, + int64_t) {} +PD_REGISTER_STRUCT_KERNEL(partial_sum_grad, + CPU, + ALL_LAYOUT, + ops::PartialSumGradientOpKernel, + float, + double, + int, + int64_t) {} diff --git a/paddle/fluid/operators/partial_sum_op.cu b/paddle/fluid/operators/partial_sum_op.cu index 093e0032b3c..a38ec4c8394 100644 --- a/paddle/fluid/operators/partial_sum_op.cu +++ b/paddle/fluid/operators/partial_sum_op.cu @@ -70,7 +70,7 @@ __global__ void PartialSumGradCUDAKernel(T **res_grad, } } -template +template class PartialSumOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -144,7 +144,7 @@ class PartialSumOpCUDAKernel : public framework::OpKernel { } }; -template +template class PartialSumGradOpCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -233,18 +233,3 @@ class PartialSumGradOpCUDAKernel : public framework::OpKernel { } // namespace operators } // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(partial_sum, - ops::PartialSumOpCUDAKernel, - ops::PartialSumOpCUDAKernel, - ops::PartialSumOpCUDAKernel, - ops::PartialSumOpCUDAKernel, - ops::PartialSumOpCUDAKernel); - -REGISTER_OP_CUDA_KERNEL(partial_sum_grad, - ops::PartialSumGradOpCUDAKernel, - ops::PartialSumGradOpCUDAKernel, - ops::PartialSumGradOpCUDAKernel, - ops::PartialSumGradOpCUDAKernel, - ops::PartialSumGradOpCUDAKernel); diff --git a/paddle/fluid/operators/partial_sum_op.h b/paddle/fluid/operators/partial_sum_op.h index fa4cc19d5e2..1b88eafae77 100644 --- a/paddle/fluid/operators/partial_sum_op.h +++ b/paddle/fluid/operators/partial_sum_op.h @@ -21,7 +21,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class PartialSumKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -57,7 +57,7 @@ class PartialSumKernel : public framework::OpKernel { } }; -template +template class PartialSumGradientOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/positive_negative_pair_op.cc b/paddle/fluid/operators/positive_negative_pair_op.cc index 3f4d8125671..72236c012c3 100644 --- a/paddle/fluid/operators/positive_negative_pair_op.cc +++ b/paddle/fluid/operators/positive_negative_pair_op.cc @@ -253,7 +253,10 @@ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(positive_negative_pair, ops::PositiveNegativePairOp, ops::PositiveNegativePairOpMaker); -REGISTER_OP_CPU_KERNEL( - positive_negative_pair, - ops::PositiveNegativePairKernel, - ops::PositiveNegativePairKernel); + +PD_REGISTER_STRUCT_KERNEL(positive_negative_pair, + CPU, + ALL_LAYOUT, + ops::PositiveNegativePairKernel, + float, + double) {} diff --git a/paddle/fluid/operators/positive_negative_pair_op.h b/paddle/fluid/operators/positive_negative_pair_op.h index 745b793f511..0cddbcc3abf 100644 --- a/paddle/fluid/operators/positive_negative_pair_op.h +++ b/paddle/fluid/operators/positive_negative_pair_op.h @@ -19,7 +19,7 @@ limitations under the License. */ namespace paddle { namespace operators { -template +template class PositiveNegativePairKernel : public framework::OpKernel { public: struct PredictionResult { diff --git a/paddle/fluid/operators/prroi_pool_op.cc b/paddle/fluid/operators/prroi_pool_op.cc index d1c455331b4..0f0dbf3c688 100644 --- a/paddle/fluid/operators/prroi_pool_op.cc +++ b/paddle/fluid/operators/prroi_pool_op.cc @@ -195,13 +195,20 @@ REGISTER_OPERATOR(prroi_pool, ops::PRROIPoolGradMaker, ops::PRROIPoolGradMaker); REGISTER_OPERATOR(prroi_pool_grad, ops::PRROIPoolGradOp); -REGISTER_OP_CPU_KERNEL(prroi_pool, - ops::CPUPRROIPoolOpKernel, - ops::CPUPRROIPoolOpKernel, - ops::CPUPRROIPoolOpKernel, - ops::CPUPRROIPoolOpKernel); -REGISTER_OP_CPU_KERNEL(prroi_pool_grad, - ops::CPUPRROIPoolGradOpKernel, - ops::CPUPRROIPoolGradOpKernel, - ops::CPUPRROIPoolGradOpKernel, - ops::CPUPRROIPoolGradOpKernel); + +PD_REGISTER_STRUCT_KERNEL(prroi_pool, + CPU, + ALL_LAYOUT, + ops::CPUPRROIPoolOpKernel, + float, + double, + int, + int64_t) {} +PD_REGISTER_STRUCT_KERNEL(prroi_pool_grad, + CPU, + ALL_LAYOUT, + ops::CPUPRROIPoolGradOpKernel, + float, + double, + int, + int64_t) {} diff --git a/paddle/fluid/operators/prroi_pool_op.cu b/paddle/fluid/operators/prroi_pool_op.cu index d1aa1d37d04..5d124396427 100644 --- a/paddle/fluid/operators/prroi_pool_op.cu +++ b/paddle/fluid/operators/prroi_pool_op.cu @@ -211,7 +211,7 @@ __global__ void GPUPRROIPoolBackward(const int nthreads, } } -template +template class GPUPRROIPoolOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -314,7 +314,7 @@ class GPUPRROIPoolOpKernel : public framework::OpKernel { } }; -template +template class GPUPRROIPoolGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -428,9 +428,12 @@ class GPUPRROIPoolGradOpKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(prroi_pool, - ops::GPUPRROIPoolOpKernel, - ops::GPUPRROIPoolOpKernel); -REGISTER_OP_CUDA_KERNEL(prroi_pool_grad, - ops::GPUPRROIPoolGradOpKernel, - ops::GPUPRROIPoolGradOpKernel); + +PD_REGISTER_STRUCT_KERNEL( + prroi_pool, GPU, ALL_LAYOUT, ops::GPUPRROIPoolOpKernel, float, double) {} +PD_REGISTER_STRUCT_KERNEL(prroi_pool_grad, + GPU, + ALL_LAYOUT, + ops::GPUPRROIPoolGradOpKernel, + float, + double) {} diff --git a/paddle/fluid/operators/prroi_pool_op.h b/paddle/fluid/operators/prroi_pool_op.h index 07a2bde7e94..e2417a071ce 100644 --- a/paddle/fluid/operators/prroi_pool_op.h +++ b/paddle/fluid/operators/prroi_pool_op.h @@ -327,7 +327,7 @@ inline HOSTDEVICE void PrRoIPoolingCoorBackward(int s_w, (*this_out_grad)); } -template +template class CPUPRROIPoolOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -481,7 +481,7 @@ class CPUPRROIPoolOpKernel : public framework::OpKernel { } }; -template +template class CPUPRROIPoolGradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/prune_gate_by_capacity_op.cc b/paddle/fluid/operators/prune_gate_by_capacity_op.cc index 388b65f3dd6..c1112b13feb 100644 --- a/paddle/fluid/operators/prune_gate_by_capacity_op.cc +++ b/paddle/fluid/operators/prune_gate_by_capacity_op.cc @@ -126,7 +126,9 @@ REGISTER_OP_WITHOUT_GRADIENT(prune_gate_by_capacity, ops::PruneGateByCapacityOp, ops::PruneGateByCapacityOpMaker); -REGISTER_OP_CPU_KERNEL( - prune_gate_by_capacity, - ops::PruneGateByCapacityCPUKernel, - ops::PruneGateByCapacityCPUKernel); +PD_REGISTER_STRUCT_KERNEL(prune_gate_by_capacity, + CPU, + ALL_LAYOUT, + ops::PruneGateByCapacityCPUKernel, + int, + int64_t) {} diff --git a/paddle/fluid/operators/prune_gate_by_capacity_op.cu b/paddle/fluid/operators/prune_gate_by_capacity_op.cu index 38baaeb809c..510de11029f 100644 --- a/paddle/fluid/operators/prune_gate_by_capacity_op.cu +++ b/paddle/fluid/operators/prune_gate_by_capacity_op.cu @@ -105,7 +105,7 @@ static void VisitDataType(phi::DataType type, Visitor visitor) { } } -template +template class PruneGateByCapacityCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { @@ -127,6 +127,8 @@ class PruneGateByCapacityCUDAKernel : public framework::OpKernel { } // namespace operators } // namespace paddle -REGISTER_OP_CUDA_KERNEL( - prune_gate_by_capacity, - ops::PruneGateByCapacityCUDAKernel); +PD_REGISTER_STRUCT_KERNEL(prune_gate_by_capacity, + GPU, + ALL_LAYOUT, + ops::PruneGateByCapacityCUDAKernel, + int64_t) {} diff --git a/paddle/fluid/operators/prune_gate_by_capacity_op.h b/paddle/fluid/operators/prune_gate_by_capacity_op.h index d7a00bd40d7..4420fae6ef5 100644 --- a/paddle/fluid/operators/prune_gate_by_capacity_op.h +++ b/paddle/fluid/operators/prune_gate_by_capacity_op.h @@ -20,7 +20,7 @@ namespace paddle { namespace operators { -template +template class PruneGateByCapacityCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.cc b/paddle/fluid/operators/pull_box_extended_sparse_op.cc index 7b949fa4338..f0799f75862 100644 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.cc +++ b/paddle/fluid/operators/pull_box_extended_sparse_op.cc @@ -151,10 +151,15 @@ REGISTER_OPERATOR( REGISTER_OPERATOR(push_box_extended_sparse, ops::PushBoxExtendedSparseOp); -REGISTER_OP_CPU_KERNEL(pull_box_extended_sparse, - ops::PullBoxExtendedSparseCPUKernel, - ops::PullBoxExtendedSparseCPUKernel); - -REGISTER_OP_CPU_KERNEL(push_box_extended_sparse, - ops::PushBoxExtendedSparseCPUKernel, - ops::PushBoxExtendedSparseCPUKernel); +PD_REGISTER_STRUCT_KERNEL(pull_box_extended_sparse, + CPU, + ALL_LAYOUT, + ops::PullBoxExtendedSparseCPUKernel, + float, + double) {} +PD_REGISTER_STRUCT_KERNEL(push_box_extended_sparse, + CPU, + ALL_LAYOUT, + ops::PushBoxExtendedSparseCPUKernel, + float, + double) {} diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.cu b/paddle/fluid/operators/pull_box_extended_sparse_op.cu index cfa317a3d39..570c367c931 100644 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.cu +++ b/paddle/fluid/operators/pull_box_extended_sparse_op.cu @@ -19,7 +19,7 @@ namespace paddle { namespace operators { -template +template class PullBoxExtendedSparseCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -27,7 +27,7 @@ class PullBoxExtendedSparseCUDAKernel : public framework::OpKernel { } }; -template +template class PushBoxExtendedSparseCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -38,9 +38,16 @@ class PushBoxExtendedSparseCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(pull_box_extended_sparse, - ops::PullBoxExtendedSparseCUDAKernel, - ops::PullBoxExtendedSparseCUDAKernel); -REGISTER_OP_CUDA_KERNEL(push_box_extended_sparse, - ops::PushBoxExtendedSparseCUDAKernel, - ops::PushBoxExtendedSparseCUDAKernel); + +PD_REGISTER_STRUCT_KERNEL(pull_box_extended_sparse, + GPU, + ALL_LAYOUT, + ops::PullBoxExtendedSparseCUDAKernel, + float, + double) {} +PD_REGISTER_STRUCT_KERNEL(push_box_extended_sparse, + GPU, + ALL_LAYOUT, + ops::PushBoxExtendedSparseCUDAKernel, + float, + double) {} diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.h b/paddle/fluid/operators/pull_box_extended_sparse_op.h index eff3bfd2a5f..b9508a27950 100644 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.h +++ b/paddle/fluid/operators/pull_box_extended_sparse_op.h @@ -108,7 +108,7 @@ static void PushBoxExtendedSparseFunctor( #endif } -template +template class PullBoxExtendedSparseCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -116,7 +116,7 @@ class PullBoxExtendedSparseCPUKernel : public framework::OpKernel { } }; -template +template class PushBoxExtendedSparseCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { diff --git a/paddle/fluid/operators/pull_box_sparse_op.cc b/paddle/fluid/operators/pull_box_sparse_op.cc index c58a176d526..a8f91c85485 100644 --- a/paddle/fluid/operators/pull_box_sparse_op.cc +++ b/paddle/fluid/operators/pull_box_sparse_op.cc @@ -135,5 +135,8 @@ REGISTER_OPERATOR(pull_box_sparse, ops::PushBoxSparseOpMaker, ops::PushBoxSparseOpMaker); REGISTER_OPERATOR(push_box_sparse, ops::PushBoxSparseOp); -REGISTER_OP_CPU_KERNEL(pull_box_sparse, ops::PullBoxSparseKernel); -REGISTER_OP_CPU_KERNEL(push_box_sparse, ops::PushBoxSparseKernel); + +PD_REGISTER_STRUCT_KERNEL( + pull_box_sparse, CPU, ALL_LAYOUT, ops::PullBoxSparseKernel, float) {} +PD_REGISTER_STRUCT_KERNEL( + push_box_sparse, CPU, ALL_LAYOUT, ops::PushBoxSparseKernel, float) {} diff --git a/paddle/fluid/operators/pull_box_sparse_op.h b/paddle/fluid/operators/pull_box_sparse_op.h index dd41fd6ff0f..1ebfa11a2b2 100644 --- a/paddle/fluid/operators/pull_box_sparse_op.h +++ b/paddle/fluid/operators/pull_box_sparse_op.h @@ -113,7 +113,7 @@ static void PushBoxSparseFunctor(const framework::ExecutionContext &ctx) { #endif } -template +template class PullBoxSparseKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -121,7 +121,7 @@ class PullBoxSparseKernel : public framework::OpKernel { } }; -template +template class PushBoxSparseKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { diff --git a/paddle/fluid/operators/pull_box_sparse_op.kps b/paddle/fluid/operators/pull_box_sparse_op.kps index 4b0580c5e1a..1e4a3640bda 100644 --- a/paddle/fluid/operators/pull_box_sparse_op.kps +++ b/paddle/fluid/operators/pull_box_sparse_op.kps @@ -45,16 +45,7 @@ limitations under the License. */ namespace ops = paddle::operators; namespace plat = paddle::platform; -#ifdef PADDLE_WITH_XPU_KP -REGISTER_OP_KERNEL(pull_box_sparse, - KP, - plat::XPUPlace, - ops::PullBoxSparseKernel); -REGISTER_OP_KERNEL(push_box_sparse, - KP, - plat::XPUPlace, - ops::PushBoxSparseKernel); -#else -REGISTER_OP_CUDA_KERNEL(pull_box_sparse, ops::PullBoxSparseKernel); -REGISTER_OP_CUDA_KERNEL(push_box_sparse, ops::PushBoxSparseKernel); -#endif +PD_REGISTER_STRUCT_KERNEL( + pull_box_sparse, KPS, ALL_LAYOUT, ops::PullBoxSparseKernel, float) {} +PD_REGISTER_STRUCT_KERNEL( + push_box_sparse, KPS, ALL_LAYOUT, ops::PushBoxSparseKernel, float) {} diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.cc b/paddle/fluid/operators/pull_gpups_sparse_op.cc index 821cfdab6f1..afaa9af3fda 100644 --- a/paddle/fluid/operators/pull_gpups_sparse_op.cc +++ b/paddle/fluid/operators/pull_gpups_sparse_op.cc @@ -145,9 +145,16 @@ REGISTER_OPERATOR(pull_gpups_sparse, ops::PushGpuPSSparseOpMaker, ops::PushGpuPSSparseOpMaker); REGISTER_OPERATOR(push_gpups_sparse, ops::PushGpuPSSparseOp); -REGISTER_OP_CPU_KERNEL(pull_gpups_sparse, - ops::PullGpuPSSparseCPUKernel, - ops::PullGpuPSSparseCPUKernel) -REGISTER_OP_CPU_KERNEL(push_gpups_sparse, - ops::PushGpuPSSparseCPUKernel, - ops::PushGpuPSSparseCPUKernel) + +PD_REGISTER_STRUCT_KERNEL(pull_gpups_sparse, + CPU, + ALL_LAYOUT, + ops::PullGpuPSSparseCPUKernel, + float, + double) {} +PD_REGISTER_STRUCT_KERNEL(push_gpups_sparse, + CPU, + ALL_LAYOUT, + ops::PushGpuPSSparseCPUKernel, + float, + double) {} diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.cu b/paddle/fluid/operators/pull_gpups_sparse_op.cu index ff68c42c8eb..a936d810216 100644 --- a/paddle/fluid/operators/pull_gpups_sparse_op.cu +++ b/paddle/fluid/operators/pull_gpups_sparse_op.cu @@ -20,7 +20,7 @@ namespace paddle { namespace operators { using phi::PADDLE_CUDA_NUM_THREADS; -template +template class PullGpuPSSparseCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -28,7 +28,7 @@ class PullGpuPSSparseCUDAKernel : public framework::OpKernel { } }; -template +template class PushGpuPSSparseCUDAKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -39,9 +39,15 @@ class PushGpuPSSparseCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL(pull_gpups_sparse, - ops::PullGpuPSSparseCUDAKernel, - ops::PullGpuPSSparseCUDAKernel) -REGISTER_OP_CUDA_KERNEL(push_gpups_sparse, - ops::PushGpuPSSparseCUDAKernel, - ops::PushGpuPSSparseCUDAKernel) +PD_REGISTER_STRUCT_KERNEL(pull_gpups_sparse, + GPU, + ALL_LAYOUT, + ops::PullGpuPSSparseCUDAKernel, + float, + double) {} +PD_REGISTER_STRUCT_KERNEL(push_gpups_sparse, + GPU, + ALL_LAYOUT, + ops::PushGpuPSSparseCUDAKernel, + float, + double) {} diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.h b/paddle/fluid/operators/pull_gpups_sparse_op.h index 2d844a4ce2b..d8fdadd99cb 100644 --- a/paddle/fluid/operators/pull_gpups_sparse_op.h +++ b/paddle/fluid/operators/pull_gpups_sparse_op.h @@ -97,7 +97,7 @@ static void PushGpuPSSparseFunctor(const framework::ExecutionContext &ctx) { #endif } -template +template class PullGpuPSSparseCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { @@ -105,7 +105,7 @@ class PullGpuPSSparseCPUKernel : public framework::OpKernel { } }; -template +template class PushGpuPSSparseCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { diff --git a/paddle/fluid/operators/pull_sparse_op.cc b/paddle/fluid/operators/pull_sparse_op.cc index 7dc9ae98e0e..4850bf33ae8 100644 --- a/paddle/fluid/operators/pull_sparse_op.cc +++ b/paddle/fluid/operators/pull_sparse_op.cc @@ -143,5 +143,7 @@ REGISTER_OPERATOR(pull_sparse, ops::PushSparseOpMaker, ops::PushSparseOpMaker); REGISTER_OPERATOR(push_sparse, ops::PushSparseOp); -REGISTER_OP_CPU_KERNEL(pull_sparse, ops::PullSparseCPUKernel) -REGISTER_OP_CPU_KERNEL(push_sparse, ops::PushSparseCPUKernel) +PD_REGISTER_STRUCT_KERNEL( + pull_sparse, CPU, ALL_LAYOUT, ops::PullSparseCPUKernel, float) {} +PD_REGISTER_STRUCT_KERNEL( + push_sparse, CPU, ALL_LAYOUT, ops::PushSparseCPUKernel, float) {} diff --git a/paddle/fluid/operators/pull_sparse_op.h b/paddle/fluid/operators/pull_sparse_op.h index ecc3a5e1021..263511b6518 100644 --- a/paddle/fluid/operators/pull_sparse_op.h +++ b/paddle/fluid/operators/pull_sparse_op.h @@ -66,7 +66,7 @@ void PushSparseFunctor(const framework::ExecutionContext& ctx) { &grads); } -template +template class PullSparseCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -74,7 +74,7 @@ class PullSparseCPUKernel : public framework::OpKernel { } }; -template +template class PushSparseCPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/pull_sparse_v2_op.cc b/paddle/fluid/operators/pull_sparse_v2_op.cc index 88a0ac86c25..993950c360c 100644 --- a/paddle/fluid/operators/pull_sparse_v2_op.cc +++ b/paddle/fluid/operators/pull_sparse_v2_op.cc @@ -135,5 +135,7 @@ REGISTER_OPERATOR(pull_sparse_v2, ops::PushSparseV2OpMaker, ops::PushSparseV2OpMaker); REGISTER_OPERATOR(push_sparse_v2, ops::PushSparseV2Op); -REGISTER_OP_CPU_KERNEL(pull_sparse_v2, ops::PullSparseV2CPUKernel) -REGISTER_OP_CPU_KERNEL(push_sparse_v2, ops::PushSparseV2CPUKernel) +PD_REGISTER_STRUCT_KERNEL( + pull_sparse_v2, CPU, ALL_LAYOUT, ops::PullSparseV2CPUKernel, float) {} +PD_REGISTER_STRUCT_KERNEL( + push_sparse_v2, CPU, ALL_LAYOUT, ops::PushSparseV2CPUKernel, float) {} diff --git a/paddle/fluid/operators/pull_sparse_v2_op.h b/paddle/fluid/operators/pull_sparse_v2_op.h index c24d0a4f338..95ce7183857 100644 --- a/paddle/fluid/operators/pull_sparse_v2_op.h +++ b/paddle/fluid/operators/pull_sparse_v2_op.h @@ -25,7 +25,7 @@ namespace paddle { namespace operators { -template +template class PullSparseV2CPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { @@ -33,7 +33,7 @@ class PullSparseV2CPUKernel : public framework::OpKernel { } }; -template +template class PushSparseV2CPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/unity_build_rule.cmake b/paddle/fluid/operators/unity_build_rule.cmake index 8f9a2f92814..7ca431e8ea5 100644 --- a/paddle/fluid/operators/unity_build_rule.cmake +++ b/paddle/fluid/operators/unity_build_rule.cmake @@ -202,7 +202,6 @@ register_unity_group( pad_op.cc) register_unity_group( cc - modified_huber_loss_op.cc partial_sum_op.cc pixel_shuffle_op.cc pool_op.cc -- GitLab