未验证 提交 aa35331f 编写于 作者: H huangjiyi 提交者: GitHub

register fluid kerenls to phi [part 7] (#52577)

* update

* fix bug

* fix ci-windows-openblas

* fix test_partial_sum_op

* fix codestyle
上级 6913feb0
...@@ -85,9 +85,12 @@ REGISTER_OPERATOR( ...@@ -85,9 +85,12 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>, paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
ops::PartialAllGatherOpInplaceInferer) ops::PartialAllGatherOpInplaceInferer)
REGISTER_OP_CPU_KERNEL(partial_allgather, PD_REGISTER_STRUCT_KERNEL(partial_allgather,
ops::PartialAllGatherOpCPUKernel<float>, CPU,
ops::PartialAllGatherOpCPUKernel<double>, ALL_LAYOUT,
ops::PartialAllGatherOpCPUKernel<int>, ops::PartialAllGatherOpCPUKernel,
ops::PartialAllGatherOpCPUKernel<int64_t>, float,
ops::PartialAllGatherOpCPUKernel<plat::float16>); double,
int,
int64_t,
plat::float16) {}
...@@ -23,7 +23,7 @@ limitations under the License. */ ...@@ -23,7 +23,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class PartialAllGatherOpCUDAKernel : public framework::OpKernel<T> { class PartialAllGatherOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -102,12 +102,16 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel<T> { ...@@ -102,12 +102,16 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(partial_allgather, PD_REGISTER_STRUCT_KERNEL(partial_allgather,
ops::PartialAllGatherOpCUDAKernel<float>, GPU,
ALL_LAYOUT,
ops::PartialAllGatherOpCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000
ops::PartialAllGatherOpCUDAKernel<plat::bfloat16>, plat::bfloat16,
#endif #endif
ops::PartialAllGatherOpCUDAKernel<double>, int,
ops::PartialAllGatherOpCUDAKernel<int>, int64_t,
ops::PartialAllGatherOpCUDAKernel<int64_t>, plat::float16) {
ops::PartialAllGatherOpCUDAKernel<plat::float16>); }
...@@ -26,7 +26,7 @@ limitations under the License. */ ...@@ -26,7 +26,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class PartialAllGatherOpCPUKernel : public framework::OpKernel<T> { class PartialAllGatherOpCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -129,9 +129,12 @@ REGISTER_OP_WITHOUT_GRADIENT(partial_recv, ...@@ -129,9 +129,12 @@ REGISTER_OP_WITHOUT_GRADIENT(partial_recv,
ops::PartialRecvOp, ops::PartialRecvOp,
ops::PartialRecvOpMaker); ops::PartialRecvOpMaker);
REGISTER_OP_CPU_KERNEL(partial_recv, PD_REGISTER_STRUCT_KERNEL(partial_recv,
ops::PartialRecvOpCPUKernel<float>, CPU,
ops::PartialRecvOpCPUKernel<double>, ALL_LAYOUT,
ops::PartialRecvOpCPUKernel<int>, ops::PartialRecvOpCPUKernel,
ops::PartialRecvOpCPUKernel<int64_t>, float,
ops::PartialRecvOpCPUKernel<plat::float16>); double,
int,
int64_t,
plat::float16) {}
...@@ -23,7 +23,7 @@ limitations under the License. */ ...@@ -23,7 +23,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class PartialRecvOpCUDAKernel : public framework::OpKernel<T> { class PartialRecvOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -118,12 +118,16 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel<T> { ...@@ -118,12 +118,16 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(partial_recv, PD_REGISTER_STRUCT_KERNEL(partial_recv,
ops::PartialRecvOpCUDAKernel<float>, GPU,
ALL_LAYOUT,
ops::PartialRecvOpCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000
ops::PartialRecvOpCUDAKernel<plat::bfloat16>, plat::bfloat16,
#endif #endif
ops::PartialRecvOpCUDAKernel<double>, int,
ops::PartialRecvOpCUDAKernel<int>, int64_t,
ops::PartialRecvOpCUDAKernel<int64_t>, plat::float16) {
ops::PartialRecvOpCUDAKernel<plat::float16>); }
...@@ -24,7 +24,7 @@ limitations under the License. */ ...@@ -24,7 +24,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class PartialRecvOpCPUKernel : public framework::OpKernel<T> { class PartialRecvOpCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -94,9 +94,12 @@ REGISTER_OP_WITHOUT_GRADIENT(partial_send, ...@@ -94,9 +94,12 @@ REGISTER_OP_WITHOUT_GRADIENT(partial_send,
ops::PartialSendOp, ops::PartialSendOp,
ops::PartialSendMaker); ops::PartialSendMaker);
REGISTER_OP_CPU_KERNEL(partial_send, PD_REGISTER_STRUCT_KERNEL(partial_send,
ops::PartialSendOpCPUKernel<float>, CPU,
ops::PartialSendOpCPUKernel<double>, ALL_LAYOUT,
ops::PartialSendOpCPUKernel<int>, ops::PartialSendOpCPUKernel,
ops::PartialSendOpCPUKernel<int64_t>, float,
ops::PartialSendOpCPUKernel<plat::float16>); double,
int,
int64_t,
plat::float16) {}
...@@ -24,7 +24,7 @@ limitations under the License. */ ...@@ -24,7 +24,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class PartialSendCUDAKernel : public framework::OpKernel<T> { class PartialSendCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -117,12 +117,16 @@ class PartialSendCUDAKernel : public framework::OpKernel<T> { ...@@ -117,12 +117,16 @@ class PartialSendCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(partial_send, PD_REGISTER_STRUCT_KERNEL(partial_send,
ops::PartialSendCUDAKernel<float>, GPU,
ops::PartialSendCUDAKernel<double>, ALL_LAYOUT,
ops::PartialSendCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000 #if NCCL_VERSION_CODE >= 21000
ops::PartialSendCUDAKernel<plat::bfloat16>, plat::bfloat16,
#endif #endif
ops::PartialSendCUDAKernel<int>, int,
ops::PartialSendCUDAKernel<int64_t>, int64_t,
ops::PartialSendCUDAKernel<plat::float16>); plat::float16) {
}
...@@ -25,7 +25,7 @@ limitations under the License. */ ...@@ -25,7 +25,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class PartialSendOpCPUKernel : public framework::OpKernel<T> { class PartialSendOpCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class PolygonBoxTransformCPUKernel : public framework::OpKernel<T> { class PolygonBoxTransformCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -111,7 +111,10 @@ REGISTER_OPERATOR( ...@@ -111,7 +111,10 @@ REGISTER_OPERATOR(
ops::PolygonBoxTransformOpMaker, ops::PolygonBoxTransformOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
polygon_box_transform, PD_REGISTER_STRUCT_KERNEL(polygon_box_transform,
ops::PolygonBoxTransformCPUKernel<paddle::platform::CPUPlace, float>, CPU,
ops::PolygonBoxTransformCPUKernel<paddle::platform::CPUPlace, double>); ALL_LAYOUT,
ops::PolygonBoxTransformCPUKernel,
float,
double) {}
...@@ -38,7 +38,7 @@ __global__ void PolygonBoxTransformKernel( ...@@ -38,7 +38,7 @@ __global__ void PolygonBoxTransformKernel(
} }
} }
template <typename T> template <typename T, typename DeviceContext>
class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel<T> { class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -73,7 +73,10 @@ class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel<T> { ...@@ -73,7 +73,10 @@ class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP_CUDA_KERNEL( namespace ops = paddle::operators;
polygon_box_transform, PD_REGISTER_STRUCT_KERNEL(polygon_box_transform,
paddle::operators::PolygonBoxTransformOpCUDAKernel<float>, GPU,
paddle::operators::PolygonBoxTransformOpCUDAKernel<double>); ALL_LAYOUT,
ops::PolygonBoxTransformOpCUDAKernel,
float,
double) {}
...@@ -242,7 +242,9 @@ REGISTER_OPERATOR( ...@@ -242,7 +242,9 @@ REGISTER_OPERATOR(
ops::PrecisionRecallOpMaker, ops::PrecisionRecallOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>, paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>); paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL( PD_REGISTER_STRUCT_KERNEL(precision_recall,
precision_recall, CPU,
ops::PrecisionRecallKernel<paddle::platform::CPUPlace, float>, ALL_LAYOUT,
ops::PrecisionRecallKernel<paddle::platform::CPUPlace, double>); ops::PrecisionRecallKernel,
float,
double) {}
...@@ -26,7 +26,7 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>; ...@@ -26,7 +26,7 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
enum StateVariable { TP = 0, FP, TN, FN }; enum StateVariable { TP = 0, FP, TN, FN };
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class PrecisionRecallKernel : public framework::OpKernel<T> { class PrecisionRecallKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -52,7 +52,7 @@ static ncclRedOp_t str_to_nccl_red_type(std::string reduction) { ...@@ -52,7 +52,7 @@ static ncclRedOp_t str_to_nccl_red_type(std::string reduction) {
return it->second; return it->second;
} }
template <typename T> template <typename T, typename DeviceContext>
class NCCLAllReduceKernel : public framework::OpKernel<T> { class NCCLAllReduceKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -87,7 +87,7 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> { ...@@ -87,7 +87,7 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class NCCLReduceKernel : public framework::OpKernel<T> { class NCCLReduceKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -128,7 +128,7 @@ class NCCLReduceKernel : public framework::OpKernel<T> { ...@@ -128,7 +128,7 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class NCCLBcastKernel : public framework::OpKernel<T> { class NCCLBcastKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -172,6 +172,9 @@ class NCCLBcastKernel : public framework::OpKernel<T> { ...@@ -172,6 +172,9 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(ncclAllReduce, ops::NCCLAllReduceKernel<float>); PD_REGISTER_STRUCT_KERNEL(
REGISTER_OP_CUDA_KERNEL(ncclBcast, ops::NCCLBcastKernel<float>); ncclAllReduce, GPU, ALL_LAYOUT, ops::NCCLAllReduceKernel, float) {}
REGISTER_OP_CUDA_KERNEL(ncclReduce, ops::NCCLReduceKernel<float>); PD_REGISTER_STRUCT_KERNEL(
ncclBcast, GPU, ALL_LAYOUT, ops::NCCLBcastKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
ncclReduce, GPU, ALL_LAYOUT, ops::NCCLReduceKernel, float) {}
...@@ -31,9 +31,12 @@ limitations under the License. */ ...@@ -31,9 +31,12 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
USE_NO_KERNEL_OP(ncclInit); USE_NO_KERNEL_OP(ncclInit);
USE_CUDA_ONLY_OP(ncclAllReduce); USE_OP_ITSELF(ncclAllReduce);
USE_CUDA_ONLY_OP(ncclReduce); USE_OP_ITSELF(ncclReduce);
USE_CUDA_ONLY_OP(ncclBcast); USE_OP_ITSELF(ncclBcast);
PD_DECLARE_KERNEL(ncclAllReduce, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(ncclReduce, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(ncclBcast, GPU, ALL_LAYOUT);
namespace f = paddle::framework; namespace f = paddle::framework;
namespace p = paddle::platform; namespace p = paddle::platform;
......
...@@ -320,9 +320,8 @@ REGISTER_OPERATOR(nce_grad, ...@@ -320,9 +320,8 @@ REGISTER_OPERATOR(nce_grad,
ops::NCEOpGrad, ops::NCEOpGrad,
ops::NCEOpGradVarTypeInference, ops::NCEOpGradVarTypeInference,
ops::NCEGradOpNoNeedBufferVarInferer); ops::NCEGradOpNoNeedBufferVarInferer);
REGISTER_OP_CPU_KERNEL(nce,
ops::NCEKernel<paddle::platform::CPUPlace, float>, PD_REGISTER_STRUCT_KERNEL(nce, CPU, ALL_LAYOUT, ops::NCEKernel, float, double) {
ops::NCEKernel<paddle::platform::CPUPlace, double>); }
REGISTER_OP_CPU_KERNEL(nce_grad, PD_REGISTER_STRUCT_KERNEL(
ops::NCEGradKernel<paddle::platform::CPUPlace, float>, nce_grad, CPU, ALL_LAYOUT, ops::NCEGradKernel, float, double) {}
ops::NCEGradKernel<paddle::platform::CPUPlace, double>);
...@@ -75,7 +75,7 @@ void PrepareSamples(const framework::ExecutionContext &context, ...@@ -75,7 +75,7 @@ void PrepareSamples(const framework::ExecutionContext &context,
} }
} }
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class NCEKernel : public framework::OpKernel<T> { class NCEKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
...@@ -245,7 +245,7 @@ class NCEKernel : public framework::OpKernel<T> { ...@@ -245,7 +245,7 @@ class NCEKernel : public framework::OpKernel<T> {
} }
}; };
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class NCEGradKernel : public framework::OpKernel<T> { class NCEGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &context) const override { void Compute(const framework::ExecutionContext &context) const override {
......
...@@ -45,7 +45,7 @@ establish the dependency between input and output tensors. ...@@ -45,7 +45,7 @@ establish the dependency between input and output tensors.
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class NopKernel : public framework::OpKernel<T> { class NopKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override {} void Compute(const framework::ExecutionContext& ctx) const override {}
...@@ -58,8 +58,8 @@ namespace ops = paddle::operators; ...@@ -58,8 +58,8 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(nop, ops::NopOp, ops::NopOpMaker); REGISTER_OP_WITHOUT_GRADIENT(nop, ops::NopOp, ops::NopOpMaker);
REGISTER_OP_CPU_KERNEL(nop, ops::NopKernel<float>); PD_REGISTER_STRUCT_KERNEL(nop, CPU, ALL_LAYOUT, ops::NopKernel, float) {}
REGISTER_OP_CUDA_KERNEL(nop, ops::NopKernel<float>); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_STRUCT_KERNEL(nop, GPU, ALL_LAYOUT, ops::NopKernel, float) {}
REGISTER_OP_NPU_KERNEL(nop, ops::NopKernel<float>); #endif
...@@ -58,10 +58,9 @@ class NumberCountOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -58,10 +58,9 @@ class NumberCountOpMaker : public framework::OpProtoAndCheckerMaker {
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
REGISTER_OP_CPU_KERNEL(number_count,
ops::NumberCountOpCPUKernel<int>,
ops::NumberCountOpCPUKernel<int64_t>);
REGISTER_OP_WITHOUT_GRADIENT(number_count, REGISTER_OP_WITHOUT_GRADIENT(number_count,
ops::NumberCountOp, ops::NumberCountOp,
ops::NumberCountOpMaker); ops::NumberCountOpMaker);
PD_REGISTER_STRUCT_KERNEL(
number_count, CPU, ALL_LAYOUT, ops::NumberCountOpCPUKernel, int, int64_t) {}
...@@ -79,7 +79,7 @@ __global__ void NumberCount(const T* numbers, ...@@ -79,7 +79,7 @@ __global__ void NumberCount(const T* numbers,
} }
} }
template <typename T> template <typename T, typename DeviceContext>
class NumberCountOpCUDAKernel : public framework::OpKernel<T> { class NumberCountOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -111,4 +111,5 @@ class NumberCountOpCUDAKernel : public framework::OpKernel<T> { ...@@ -111,4 +111,5 @@ class NumberCountOpCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(number_count, ops::NumberCountOpCUDAKernel<int64_t>); PD_REGISTER_STRUCT_KERNEL(
number_count, GPU, ALL_LAYOUT, ops::NumberCountOpCUDAKernel, int64_t) {}
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class NumberCountOpCPUKernel : public framework::OpKernel<T> { class NumberCountOpCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -133,5 +133,5 @@ namespace ops = paddle::operators; ...@@ -133,5 +133,5 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(proximal_adagrad, REGISTER_OP_WITHOUT_GRADIENT(proximal_adagrad,
ops::ProximalAdagradOp, ops::ProximalAdagradOp,
ops::ProximalAdagradOpMaker); ops::ProximalAdagradOpMaker);
REGISTER_OP_CPU_KERNEL(proximal_adagrad, PD_REGISTER_STRUCT_KERNEL(
ops::ProximalAdagradOpKernel<phi::CPUContext, float>); proximal_adagrad, CPU, ALL_LAYOUT, ops::ProximalAdagradOpKernel, float) {}
...@@ -13,5 +13,5 @@ specific language governing permissions and limitations under the License. */ ...@@ -13,5 +13,5 @@ specific language governing permissions and limitations under the License. */
#include "paddle/fluid/operators/optimizers/proximal_adagrad_op.h" #include "paddle/fluid/operators/optimizers/proximal_adagrad_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(proximal_adagrad, PD_REGISTER_STRUCT_KERNEL(
ops::ProximalAdagradOpKernel<phi::GPUContext, float>); proximal_adagrad, GPU, ALL_LAYOUT, ops::ProximalAdagradOpKernel, float) {}
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class ProximalAdagradOpKernel : public framework::OpKernel<T> { class ProximalAdagradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -106,5 +106,6 @@ namespace ops = paddle::operators; ...@@ -106,5 +106,6 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(proximal_gd, REGISTER_OP_WITHOUT_GRADIENT(proximal_gd,
ops::ProximalGDOp, ops::ProximalGDOp,
ops::ProximalGDOpMaker); ops::ProximalGDOpMaker);
REGISTER_OP_CPU_KERNEL(proximal_gd,
ops::ProximalGDOpKernel<phi::CPUContext, float>); PD_REGISTER_STRUCT_KERNEL(
proximal_gd, CPU, ALL_LAYOUT, ops::ProximalGDOpKernel, float) {}
...@@ -13,5 +13,5 @@ specific language governing permissions and limitations under the License. */ ...@@ -13,5 +13,5 @@ specific language governing permissions and limitations under the License. */
#include "paddle/fluid/operators/optimizers/proximal_gd_op.h" #include "paddle/fluid/operators/optimizers/proximal_gd_op.h"
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(proximal_gd, PD_REGISTER_STRUCT_KERNEL(
ops::ProximalGDOpKernel<phi::GPUContext, float>); proximal_gd, GPU, ALL_LAYOUT, ops::ProximalGDOpKernel, float) {}
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class ProximalGDOpKernel : public framework::OpKernel<T> { class ProximalGDOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -402,7 +402,7 @@ static inline void GetPaddings(int* paddings, ...@@ -402,7 +402,7 @@ static inline void GetPaddings(int* paddings,
} }
} }
template <typename T> template <typename T, typename DeviceContext>
class Pad2dCPUKernel : public framework::OpKernel<T> { class Pad2dCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -520,7 +520,7 @@ class Pad2dCPUKernel : public framework::OpKernel<T> { ...@@ -520,7 +520,7 @@ class Pad2dCPUKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class Pad2dGradCPUKernel : public framework::OpKernel<T> { class Pad2dGradCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -873,11 +873,8 @@ REGISTER_OPERATOR(pad2d, ...@@ -873,11 +873,8 @@ REGISTER_OPERATOR(pad2d,
REGISTER_OPERATOR(pad2d_grad, REGISTER_OPERATOR(pad2d_grad,
ops::Pad2dOpGrad, ops::Pad2dOpGrad,
ops::Pad2dOpGradNoNeedBufferVarsInferer); ops::Pad2dOpGradNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(pad2d,
ops::Pad2dCPUKernel<float>, PD_REGISTER_STRUCT_KERNEL(
ops::Pad2dCPUKernel<double>, pad2d, CPU, ALL_LAYOUT, ops::Pad2dCPUKernel, float, double, int, int64_t) {}
ops::Pad2dCPUKernel<int>, PD_REGISTER_STRUCT_KERNEL(
ops::Pad2dCPUKernel<int64_t>); pad2d_grad, CPU, ALL_LAYOUT, ops::Pad2dGradCPUKernel, float, double) {}
REGISTER_OP_CPU_KERNEL(pad2d_grad,
ops::Pad2dGradCPUKernel<float>,
ops::Pad2dGradCPUKernel<double>);
...@@ -361,7 +361,7 @@ static inline void GetPaddings(int* paddings, ...@@ -361,7 +361,7 @@ static inline void GetPaddings(int* paddings,
} }
} }
template <typename T> template <typename T, typename DeviceContext>
class Pad2dCUDAKernel : public framework::OpKernel<T> { class Pad2dCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -489,7 +489,7 @@ class Pad2dCUDAKernel : public framework::OpKernel<T> { ...@@ -489,7 +489,7 @@ class Pad2dCUDAKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class Pad2dGradCUDAKernel : public framework::OpKernel<T> { class Pad2dGradCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -618,13 +618,19 @@ class Pad2dGradCUDAKernel : public framework::OpKernel<T> { ...@@ -618,13 +618,19 @@ class Pad2dGradCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(pad2d, PD_REGISTER_STRUCT_KERNEL(pad2d,
ops::Pad2dCUDAKernel<plat::float16>, GPU,
ops::Pad2dCUDAKernel<float>, ALL_LAYOUT,
ops::Pad2dCUDAKernel<double>, ops::Pad2dCUDAKernel,
ops::Pad2dCUDAKernel<int>, float,
ops::Pad2dCUDAKernel<int64_t>); double,
REGISTER_OP_CUDA_KERNEL(pad2d_grad, int,
ops::Pad2dGradCUDAKernel<plat::float16>, int64_t,
ops::Pad2dGradCUDAKernel<float>, plat::float16) {}
ops::Pad2dGradCUDAKernel<double>); PD_REGISTER_STRUCT_KERNEL(pad2d_grad,
GPU,
ALL_LAYOUT,
ops::Pad2dGradCUDAKernel,
float,
double,
plat::float16) {}
...@@ -243,26 +243,38 @@ REGISTER_OPERATOR(pad_constant_like, ...@@ -243,26 +243,38 @@ REGISTER_OPERATOR(pad_constant_like,
ops::PadConstantLikeOpGradMaker<paddle::imperative::OpBase>); ops::PadConstantLikeOpGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(pad_constant_like_grad, ops::PadConstantLikeOpGrad); REGISTER_OPERATOR(pad_constant_like_grad, ops::PadConstantLikeOpGrad);
REGISTER_OP_CPU_KERNEL(pad_constant_like, PD_REGISTER_STRUCT_KERNEL(pad_constant_like,
ops::PadConstantLikeKernel<phi::CPUContext, float>, CPU,
ops::PadConstantLikeKernel<phi::CPUContext, double>, ALL_LAYOUT,
ops::PadConstantLikeKernel<phi::CPUContext, int>, ops::PadConstantLikeKernel,
ops::PadConstantLikeKernel<phi::CPUContext, int64_t>); float,
REGISTER_OP_CPU_KERNEL( double,
pad_constant_like_grad, int,
ops::PadConstantLikeGradKernel<phi::CPUContext, float>, int64_t) {}
ops::PadConstantLikeGradKernel<phi::CPUContext, double>, PD_REGISTER_STRUCT_KERNEL(pad_constant_like_grad,
ops::PadConstantLikeGradKernel<phi::CPUContext, int>, CPU,
ops::PadConstantLikeGradKernel<phi::CPUContext, int64_t>); ALL_LAYOUT,
ops::PadConstantLikeGradKernel,
REGISTER_OP_CUDA_KERNEL(pad_constant_like, float,
ops::PadConstantLikeKernel<phi::GPUContext, float>, double,
ops::PadConstantLikeKernel<phi::GPUContext, double>, int,
ops::PadConstantLikeKernel<phi::GPUContext, int>, int64_t) {}
ops::PadConstantLikeKernel<phi::GPUContext, int64_t>);
REGISTER_OP_CUDA_KERNEL( #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
pad_constant_like_grad, PD_REGISTER_STRUCT_KERNEL(pad_constant_like,
ops::PadConstantLikeGradKernel<phi::GPUContext, int>, GPU,
ops::PadConstantLikeGradKernel<phi::GPUContext, int64_t>, ALL_LAYOUT,
ops::PadConstantLikeGradKernel<phi::GPUContext, float>, ops::PadConstantLikeKernel,
ops::PadConstantLikeGradKernel<phi::GPUContext, double>); float,
double,
int,
int64_t) {}
PD_REGISTER_STRUCT_KERNEL(pad_constant_like_grad,
GPU,
ALL_LAYOUT,
ops::PadConstantLikeGradKernel,
float,
double,
int,
int64_t) {}
#endif
...@@ -26,7 +26,7 @@ limitations under the License. */ ...@@ -26,7 +26,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class PadConstantLikeKernel : public framework::OpKernel<T> { class PadConstantLikeKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -61,7 +61,7 @@ class PadConstantLikeKernel : public framework::OpKernel<T> { ...@@ -61,7 +61,7 @@ class PadConstantLikeKernel : public framework::OpKernel<T> {
} }
}; };
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class PadConstantLikeGradKernel : public framework::OpKernel<T> { class PadConstantLikeGradKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
......
...@@ -202,14 +202,19 @@ REGISTER_OPERATOR(partial_concat, ...@@ -202,14 +202,19 @@ REGISTER_OPERATOR(partial_concat,
REGISTER_OPERATOR(partial_concat_grad, ops::PartialConcatGradOp); REGISTER_OPERATOR(partial_concat_grad, ops::PartialConcatGradOp);
REGISTER_OP_CPU_KERNEL(partial_concat, PD_REGISTER_STRUCT_KERNEL(partial_concat,
ops::PartialConcatKernel<phi::CPUContext, double>, CPU,
ops::PartialConcatKernel<phi::CPUContext, float>, ALL_LAYOUT,
ops::PartialConcatKernel<phi::CPUContext, int64_t>, ops::PartialConcatKernel,
ops::PartialConcatKernel<phi::CPUContext, int>); float,
double,
REGISTER_OP_CPU_KERNEL(partial_concat_grad, int,
ops::PartialConcatGradientOpKernel<float>, int64_t) {}
ops::PartialConcatGradientOpKernel<int>, PD_REGISTER_STRUCT_KERNEL(partial_concat_grad,
ops::PartialConcatGradientOpKernel<double>, CPU,
ops::PartialConcatGradientOpKernel<int64_t>); ALL_LAYOUT,
ops::PartialConcatGradientOpKernel,
float,
double,
int,
int64_t) {}
...@@ -65,7 +65,7 @@ __global__ void ConcatPartialGradCUDAKernel(T **in, ...@@ -65,7 +65,7 @@ __global__ void ConcatPartialGradCUDAKernel(T **in,
} }
} }
template <typename T> template <typename T, typename DeviceContext>
class PartialConcatOpCUDAKernel : public framework::OpKernel<T> { class PartialConcatOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -146,7 +146,7 @@ class PartialConcatOpCUDAKernel : public framework::OpKernel<T> { ...@@ -146,7 +146,7 @@ class PartialConcatOpCUDAKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PartialConcatGradOpCUDAKernel : public framework::OpKernel<T> { class PartialConcatGradOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -231,16 +231,22 @@ class PartialConcatGradOpCUDAKernel : public framework::OpKernel<T> { ...@@ -231,16 +231,22 @@ class PartialConcatGradOpCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(partial_concat,
ops::PartialConcatOpCUDAKernel<float>, PD_REGISTER_STRUCT_KERNEL(partial_concat,
ops::PartialConcatOpCUDAKernel<double>, GPU,
ops::PartialConcatOpCUDAKernel<int>, ALL_LAYOUT,
ops::PartialConcatOpCUDAKernel<int64_t>, ops::PartialConcatOpCUDAKernel,
ops::PartialConcatOpCUDAKernel<plat::float16>); float,
double,
REGISTER_OP_CUDA_KERNEL(partial_concat_grad, int,
ops::PartialConcatGradOpCUDAKernel<float>, int64_t,
ops::PartialConcatGradOpCUDAKernel<double>, plat::float16) {}
ops::PartialConcatGradOpCUDAKernel<int>, PD_REGISTER_STRUCT_KERNEL(partial_concat_grad,
ops::PartialConcatGradOpCUDAKernel<int64_t>, GPU,
ops::PartialConcatGradOpCUDAKernel<plat::float16>); ALL_LAYOUT,
ops::PartialConcatGradOpCUDAKernel,
float,
double,
int,
int64_t,
plat::float16) {}
...@@ -39,7 +39,7 @@ static inline int64_t ComputeStartIndex(int64_t start_index, int64_t size) { ...@@ -39,7 +39,7 @@ static inline int64_t ComputeStartIndex(int64_t start_index, int64_t size) {
return start_index; return start_index;
} }
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class PartialConcatKernel : public framework::OpKernel<T> { class PartialConcatKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -84,7 +84,7 @@ class PartialConcatKernel : public framework::OpKernel<T> { ...@@ -84,7 +84,7 @@ class PartialConcatKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PartialConcatGradientOpKernel : public framework::OpKernel<T> { class PartialConcatGradientOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -204,14 +204,19 @@ REGISTER_OPERATOR(partial_sum, ...@@ -204,14 +204,19 @@ REGISTER_OPERATOR(partial_sum,
REGISTER_OPERATOR(partial_sum_grad, ops::PartialSumGradOp); REGISTER_OPERATOR(partial_sum_grad, ops::PartialSumGradOp);
REGISTER_OP_CPU_KERNEL(partial_sum, PD_REGISTER_STRUCT_KERNEL(partial_sum,
ops::PartialSumKernel<phi::CPUContext, float>, CPU,
ops::PartialSumKernel<phi::CPUContext, int>, ALL_LAYOUT,
ops::PartialSumKernel<phi::CPUContext, double>, ops::PartialSumKernel,
ops::PartialSumKernel<phi::CPUContext, int64_t>); float,
double,
REGISTER_OP_CPU_KERNEL(partial_sum_grad, int,
ops::PartialSumGradientOpKernel<float>, int64_t) {}
ops::PartialSumGradientOpKernel<int>, PD_REGISTER_STRUCT_KERNEL(partial_sum_grad,
ops::PartialSumGradientOpKernel<double>, CPU,
ops::PartialSumGradientOpKernel<int64_t>); ALL_LAYOUT,
ops::PartialSumGradientOpKernel,
float,
double,
int,
int64_t) {}
...@@ -70,7 +70,7 @@ __global__ void PartialSumGradCUDAKernel(T **res_grad, ...@@ -70,7 +70,7 @@ __global__ void PartialSumGradCUDAKernel(T **res_grad,
} }
} }
template <typename T> template <typename T, typename DeviceContext>
class PartialSumOpCUDAKernel : public framework::OpKernel<T> { class PartialSumOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -144,7 +144,7 @@ class PartialSumOpCUDAKernel : public framework::OpKernel<T> { ...@@ -144,7 +144,7 @@ class PartialSumOpCUDAKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PartialSumGradOpCUDAKernel : public framework::OpKernel<T> { class PartialSumGradOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -233,18 +233,3 @@ class PartialSumGradOpCUDAKernel : public framework::OpKernel<T> { ...@@ -233,18 +233,3 @@ class PartialSumGradOpCUDAKernel : public framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(partial_sum,
ops::PartialSumOpCUDAKernel<float>,
ops::PartialSumOpCUDAKernel<double>,
ops::PartialSumOpCUDAKernel<int>,
ops::PartialSumOpCUDAKernel<int64_t>,
ops::PartialSumOpCUDAKernel<plat::float16>);
REGISTER_OP_CUDA_KERNEL(partial_sum_grad,
ops::PartialSumGradOpCUDAKernel<float>,
ops::PartialSumGradOpCUDAKernel<double>,
ops::PartialSumGradOpCUDAKernel<int>,
ops::PartialSumGradOpCUDAKernel<int64_t>,
ops::PartialSumGradOpCUDAKernel<plat::float16>);
...@@ -21,7 +21,7 @@ limitations under the License. */ ...@@ -21,7 +21,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class PartialSumKernel : public framework::OpKernel<T> { class PartialSumKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -57,7 +57,7 @@ class PartialSumKernel : public framework::OpKernel<T> { ...@@ -57,7 +57,7 @@ class PartialSumKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PartialSumGradientOpKernel : public framework::OpKernel<T> { class PartialSumGradientOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -253,7 +253,10 @@ namespace ops = paddle::operators; ...@@ -253,7 +253,10 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(positive_negative_pair, REGISTER_OP_WITHOUT_GRADIENT(positive_negative_pair,
ops::PositiveNegativePairOp, ops::PositiveNegativePairOp,
ops::PositiveNegativePairOpMaker); ops::PositiveNegativePairOpMaker);
REGISTER_OP_CPU_KERNEL(
positive_negative_pair, PD_REGISTER_STRUCT_KERNEL(positive_negative_pair,
ops::PositiveNegativePairKernel<paddle::platform::CPUPlace, float>, CPU,
ops::PositiveNegativePairKernel<paddle::platform::CPUPlace, double>); ALL_LAYOUT,
ops::PositiveNegativePairKernel,
float,
double) {}
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class PositiveNegativePairKernel : public framework::OpKernel<T> { class PositiveNegativePairKernel : public framework::OpKernel<T> {
public: public:
struct PredictionResult { struct PredictionResult {
......
...@@ -195,13 +195,20 @@ REGISTER_OPERATOR(prroi_pool, ...@@ -195,13 +195,20 @@ REGISTER_OPERATOR(prroi_pool,
ops::PRROIPoolGradMaker<paddle::framework::OpDesc>, ops::PRROIPoolGradMaker<paddle::framework::OpDesc>,
ops::PRROIPoolGradMaker<paddle::imperative::OpBase>); ops::PRROIPoolGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(prroi_pool_grad, ops::PRROIPoolGradOp); REGISTER_OPERATOR(prroi_pool_grad, ops::PRROIPoolGradOp);
REGISTER_OP_CPU_KERNEL(prroi_pool,
ops::CPUPRROIPoolOpKernel<phi::CPUContext, float>, PD_REGISTER_STRUCT_KERNEL(prroi_pool,
ops::CPUPRROIPoolOpKernel<phi::CPUContext, double>, CPU,
ops::CPUPRROIPoolOpKernel<phi::CPUContext, int>, ALL_LAYOUT,
ops::CPUPRROIPoolOpKernel<phi::CPUContext, int64_t>); ops::CPUPRROIPoolOpKernel,
REGISTER_OP_CPU_KERNEL(prroi_pool_grad, float,
ops::CPUPRROIPoolGradOpKernel<phi::CPUContext, float>, double,
ops::CPUPRROIPoolGradOpKernel<phi::CPUContext, double>, int,
ops::CPUPRROIPoolGradOpKernel<phi::CPUContext, int>, int64_t) {}
ops::CPUPRROIPoolGradOpKernel<phi::CPUContext, int64_t>); PD_REGISTER_STRUCT_KERNEL(prroi_pool_grad,
CPU,
ALL_LAYOUT,
ops::CPUPRROIPoolGradOpKernel,
float,
double,
int,
int64_t) {}
...@@ -211,7 +211,7 @@ __global__ void GPUPRROIPoolBackward(const int nthreads, ...@@ -211,7 +211,7 @@ __global__ void GPUPRROIPoolBackward(const int nthreads,
} }
} }
template <typename T> template <typename T, typename DeviceContext>
class GPUPRROIPoolOpKernel : public framework::OpKernel<T> { class GPUPRROIPoolOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -314,7 +314,7 @@ class GPUPRROIPoolOpKernel : public framework::OpKernel<T> { ...@@ -314,7 +314,7 @@ class GPUPRROIPoolOpKernel : public framework::OpKernel<T> {
} }
}; };
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class GPUPRROIPoolGradOpKernel : public framework::OpKernel<T> { class GPUPRROIPoolGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -428,9 +428,12 @@ class GPUPRROIPoolGradOpKernel : public framework::OpKernel<T> { ...@@ -428,9 +428,12 @@ class GPUPRROIPoolGradOpKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(prroi_pool,
ops::GPUPRROIPoolOpKernel<float>, PD_REGISTER_STRUCT_KERNEL(
ops::GPUPRROIPoolOpKernel<double>); prroi_pool, GPU, ALL_LAYOUT, ops::GPUPRROIPoolOpKernel, float, double) {}
REGISTER_OP_CUDA_KERNEL(prroi_pool_grad, PD_REGISTER_STRUCT_KERNEL(prroi_pool_grad,
ops::GPUPRROIPoolGradOpKernel<phi::GPUContext, float>, GPU,
ops::GPUPRROIPoolGradOpKernel<phi::GPUContext, double>); ALL_LAYOUT,
ops::GPUPRROIPoolGradOpKernel,
float,
double) {}
...@@ -327,7 +327,7 @@ inline HOSTDEVICE void PrRoIPoolingCoorBackward(int s_w, ...@@ -327,7 +327,7 @@ inline HOSTDEVICE void PrRoIPoolingCoorBackward(int s_w,
(*this_out_grad)); (*this_out_grad));
} }
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class CPUPRROIPoolOpKernel : public framework::OpKernel<T> { class CPUPRROIPoolOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -481,7 +481,7 @@ class CPUPRROIPoolOpKernel : public framework::OpKernel<T> { ...@@ -481,7 +481,7 @@ class CPUPRROIPoolOpKernel : public framework::OpKernel<T> {
} }
}; };
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class CPUPRROIPoolGradOpKernel : public framework::OpKernel<T> { class CPUPRROIPoolGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -126,7 +126,9 @@ REGISTER_OP_WITHOUT_GRADIENT(prune_gate_by_capacity, ...@@ -126,7 +126,9 @@ REGISTER_OP_WITHOUT_GRADIENT(prune_gate_by_capacity,
ops::PruneGateByCapacityOp, ops::PruneGateByCapacityOp,
ops::PruneGateByCapacityOpMaker); ops::PruneGateByCapacityOpMaker);
REGISTER_OP_CPU_KERNEL( PD_REGISTER_STRUCT_KERNEL(prune_gate_by_capacity,
prune_gate_by_capacity, CPU,
ops::PruneGateByCapacityCPUKernel<phi::CPUContext, int>, ALL_LAYOUT,
ops::PruneGateByCapacityCPUKernel<phi::CPUContext, int64_t>); ops::PruneGateByCapacityCPUKernel,
int,
int64_t) {}
...@@ -105,7 +105,7 @@ static void VisitDataType(phi::DataType type, Visitor visitor) { ...@@ -105,7 +105,7 @@ static void VisitDataType(phi::DataType type, Visitor visitor) {
} }
} }
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class PruneGateByCapacityCUDAKernel : public framework::OpKernel<T> { class PruneGateByCapacityCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -127,6 +127,8 @@ class PruneGateByCapacityCUDAKernel : public framework::OpKernel<T> { ...@@ -127,6 +127,8 @@ class PruneGateByCapacityCUDAKernel : public framework::OpKernel<T> {
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
REGISTER_OP_CUDA_KERNEL( PD_REGISTER_STRUCT_KERNEL(prune_gate_by_capacity,
prune_gate_by_capacity, GPU,
ops::PruneGateByCapacityCUDAKernel<phi::GPUContext, int64_t>); ALL_LAYOUT,
ops::PruneGateByCapacityCUDAKernel,
int64_t) {}
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename DeviceContext, typename T> template <typename T, typename DeviceContext>
class PruneGateByCapacityCPUKernel : public framework::OpKernel<T> { class PruneGateByCapacityCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
......
...@@ -151,10 +151,15 @@ REGISTER_OPERATOR( ...@@ -151,10 +151,15 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR(push_box_extended_sparse, ops::PushBoxExtendedSparseOp); REGISTER_OPERATOR(push_box_extended_sparse, ops::PushBoxExtendedSparseOp);
REGISTER_OP_CPU_KERNEL(pull_box_extended_sparse, PD_REGISTER_STRUCT_KERNEL(pull_box_extended_sparse,
ops::PullBoxExtendedSparseCPUKernel<float>, CPU,
ops::PullBoxExtendedSparseCPUKernel<double>); ALL_LAYOUT,
ops::PullBoxExtendedSparseCPUKernel,
REGISTER_OP_CPU_KERNEL(push_box_extended_sparse, float,
ops::PushBoxExtendedSparseCPUKernel<float>, double) {}
ops::PushBoxExtendedSparseCPUKernel<double>); PD_REGISTER_STRUCT_KERNEL(push_box_extended_sparse,
CPU,
ALL_LAYOUT,
ops::PushBoxExtendedSparseCPUKernel,
float,
double) {}
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class PullBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> { class PullBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -27,7 +27,7 @@ class PullBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> { ...@@ -27,7 +27,7 @@ class PullBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PushBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> { class PushBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -38,9 +38,16 @@ class PushBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> { ...@@ -38,9 +38,16 @@ class PushBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(pull_box_extended_sparse,
ops::PullBoxExtendedSparseCUDAKernel<float>, PD_REGISTER_STRUCT_KERNEL(pull_box_extended_sparse,
ops::PullBoxExtendedSparseCUDAKernel<double>); GPU,
REGISTER_OP_CUDA_KERNEL(push_box_extended_sparse, ALL_LAYOUT,
ops::PushBoxExtendedSparseCUDAKernel<float>, ops::PullBoxExtendedSparseCUDAKernel,
ops::PushBoxExtendedSparseCUDAKernel<double>); float,
double) {}
PD_REGISTER_STRUCT_KERNEL(push_box_extended_sparse,
GPU,
ALL_LAYOUT,
ops::PushBoxExtendedSparseCUDAKernel,
float,
double) {}
...@@ -108,7 +108,7 @@ static void PushBoxExtendedSparseFunctor( ...@@ -108,7 +108,7 @@ static void PushBoxExtendedSparseFunctor(
#endif #endif
} }
template <typename T> template <typename T, typename DeviceContext>
class PullBoxExtendedSparseCPUKernel : public framework::OpKernel<T> { class PullBoxExtendedSparseCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -116,7 +116,7 @@ class PullBoxExtendedSparseCPUKernel : public framework::OpKernel<T> { ...@@ -116,7 +116,7 @@ class PullBoxExtendedSparseCPUKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PushBoxExtendedSparseCPUKernel : public framework::OpKernel<T> { class PushBoxExtendedSparseCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
......
...@@ -135,5 +135,8 @@ REGISTER_OPERATOR(pull_box_sparse, ...@@ -135,5 +135,8 @@ REGISTER_OPERATOR(pull_box_sparse,
ops::PushBoxSparseOpMaker<paddle::framework::OpDesc>, ops::PushBoxSparseOpMaker<paddle::framework::OpDesc>,
ops::PushBoxSparseOpMaker<paddle::imperative::OpBase>); ops::PushBoxSparseOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(push_box_sparse, ops::PushBoxSparseOp); REGISTER_OPERATOR(push_box_sparse, ops::PushBoxSparseOp);
REGISTER_OP_CPU_KERNEL(pull_box_sparse, ops::PullBoxSparseKernel<float>);
REGISTER_OP_CPU_KERNEL(push_box_sparse, ops::PushBoxSparseKernel<float>); PD_REGISTER_STRUCT_KERNEL(
pull_box_sparse, CPU, ALL_LAYOUT, ops::PullBoxSparseKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
push_box_sparse, CPU, ALL_LAYOUT, ops::PushBoxSparseKernel, float) {}
...@@ -113,7 +113,7 @@ static void PushBoxSparseFunctor(const framework::ExecutionContext &ctx) { ...@@ -113,7 +113,7 @@ static void PushBoxSparseFunctor(const framework::ExecutionContext &ctx) {
#endif #endif
} }
template <typename T> template <typename T, typename DeviceContext>
class PullBoxSparseKernel : public framework::OpKernel<T> { class PullBoxSparseKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -121,7 +121,7 @@ class PullBoxSparseKernel : public framework::OpKernel<T> { ...@@ -121,7 +121,7 @@ class PullBoxSparseKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PushBoxSparseKernel : public framework::OpKernel<T> { class PushBoxSparseKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
......
...@@ -45,16 +45,7 @@ limitations under the License. */ ...@@ -45,16 +45,7 @@ limitations under the License. */
namespace ops = paddle::operators; namespace ops = paddle::operators;
namespace plat = paddle::platform; namespace plat = paddle::platform;
#ifdef PADDLE_WITH_XPU_KP PD_REGISTER_STRUCT_KERNEL(
REGISTER_OP_KERNEL(pull_box_sparse, pull_box_sparse, KPS, ALL_LAYOUT, ops::PullBoxSparseKernel, float) {}
KP, PD_REGISTER_STRUCT_KERNEL(
plat::XPUPlace, push_box_sparse, KPS, ALL_LAYOUT, ops::PushBoxSparseKernel, float) {}
ops::PullBoxSparseKernel<float>);
REGISTER_OP_KERNEL(push_box_sparse,
KP,
plat::XPUPlace,
ops::PushBoxSparseKernel<float>);
#else
REGISTER_OP_CUDA_KERNEL(pull_box_sparse, ops::PullBoxSparseKernel<float>);
REGISTER_OP_CUDA_KERNEL(push_box_sparse, ops::PushBoxSparseKernel<float>);
#endif
...@@ -145,9 +145,16 @@ REGISTER_OPERATOR(pull_gpups_sparse, ...@@ -145,9 +145,16 @@ REGISTER_OPERATOR(pull_gpups_sparse,
ops::PushGpuPSSparseOpMaker<paddle::framework::OpDesc>, ops::PushGpuPSSparseOpMaker<paddle::framework::OpDesc>,
ops::PushGpuPSSparseOpMaker<paddle::imperative::OpBase>); ops::PushGpuPSSparseOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(push_gpups_sparse, ops::PushGpuPSSparseOp); REGISTER_OPERATOR(push_gpups_sparse, ops::PushGpuPSSparseOp);
REGISTER_OP_CPU_KERNEL(pull_gpups_sparse,
ops::PullGpuPSSparseCPUKernel<float>, PD_REGISTER_STRUCT_KERNEL(pull_gpups_sparse,
ops::PullGpuPSSparseCPUKernel<double>) CPU,
REGISTER_OP_CPU_KERNEL(push_gpups_sparse, ALL_LAYOUT,
ops::PushGpuPSSparseCPUKernel<float>, ops::PullGpuPSSparseCPUKernel,
ops::PushGpuPSSparseCPUKernel<double>) float,
double) {}
PD_REGISTER_STRUCT_KERNEL(push_gpups_sparse,
CPU,
ALL_LAYOUT,
ops::PushGpuPSSparseCPUKernel,
float,
double) {}
...@@ -20,7 +20,7 @@ namespace paddle { ...@@ -20,7 +20,7 @@ namespace paddle {
namespace operators { namespace operators {
using phi::PADDLE_CUDA_NUM_THREADS; using phi::PADDLE_CUDA_NUM_THREADS;
template <typename T> template <typename T, typename DeviceContext>
class PullGpuPSSparseCUDAKernel : public framework::OpKernel<T> { class PullGpuPSSparseCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -28,7 +28,7 @@ class PullGpuPSSparseCUDAKernel : public framework::OpKernel<T> { ...@@ -28,7 +28,7 @@ class PullGpuPSSparseCUDAKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PushGpuPSSparseCUDAKernel : public framework::OpKernel<T> { class PushGpuPSSparseCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -39,9 +39,15 @@ class PushGpuPSSparseCUDAKernel : public framework::OpKernel<T> { ...@@ -39,9 +39,15 @@ class PushGpuPSSparseCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle } // namespace paddle
namespace ops = paddle::operators; namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(pull_gpups_sparse, PD_REGISTER_STRUCT_KERNEL(pull_gpups_sparse,
ops::PullGpuPSSparseCUDAKernel<float>, GPU,
ops::PullGpuPSSparseCUDAKernel<double>) ALL_LAYOUT,
REGISTER_OP_CUDA_KERNEL(push_gpups_sparse, ops::PullGpuPSSparseCUDAKernel,
ops::PushGpuPSSparseCUDAKernel<float>, float,
ops::PushGpuPSSparseCUDAKernel<double>) double) {}
PD_REGISTER_STRUCT_KERNEL(push_gpups_sparse,
GPU,
ALL_LAYOUT,
ops::PushGpuPSSparseCUDAKernel,
float,
double) {}
...@@ -97,7 +97,7 @@ static void PushGpuPSSparseFunctor(const framework::ExecutionContext &ctx) { ...@@ -97,7 +97,7 @@ static void PushGpuPSSparseFunctor(const framework::ExecutionContext &ctx) {
#endif #endif
} }
template <typename T> template <typename T, typename DeviceContext>
class PullGpuPSSparseCPUKernel : public framework::OpKernel<T> { class PullGpuPSSparseCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
...@@ -105,7 +105,7 @@ class PullGpuPSSparseCPUKernel : public framework::OpKernel<T> { ...@@ -105,7 +105,7 @@ class PullGpuPSSparseCPUKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PushGpuPSSparseCPUKernel : public framework::OpKernel<T> { class PushGpuPSSparseCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
......
...@@ -143,5 +143,7 @@ REGISTER_OPERATOR(pull_sparse, ...@@ -143,5 +143,7 @@ REGISTER_OPERATOR(pull_sparse,
ops::PushSparseOpMaker<paddle::framework::OpDesc>, ops::PushSparseOpMaker<paddle::framework::OpDesc>,
ops::PushSparseOpMaker<paddle::imperative::OpBase>); ops::PushSparseOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(push_sparse, ops::PushSparseOp); REGISTER_OPERATOR(push_sparse, ops::PushSparseOp);
REGISTER_OP_CPU_KERNEL(pull_sparse, ops::PullSparseCPUKernel<float>) PD_REGISTER_STRUCT_KERNEL(
REGISTER_OP_CPU_KERNEL(push_sparse, ops::PushSparseCPUKernel<float>) pull_sparse, CPU, ALL_LAYOUT, ops::PullSparseCPUKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
push_sparse, CPU, ALL_LAYOUT, ops::PushSparseCPUKernel, float) {}
...@@ -66,7 +66,7 @@ void PushSparseFunctor(const framework::ExecutionContext& ctx) { ...@@ -66,7 +66,7 @@ void PushSparseFunctor(const framework::ExecutionContext& ctx) {
&grads); &grads);
} }
template <typename T> template <typename T, typename DeviceContext>
class PullSparseCPUKernel : public framework::OpKernel<T> { class PullSparseCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -74,7 +74,7 @@ class PullSparseCPUKernel : public framework::OpKernel<T> { ...@@ -74,7 +74,7 @@ class PullSparseCPUKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PushSparseCPUKernel : public framework::OpKernel<T> { class PushSparseCPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -135,5 +135,7 @@ REGISTER_OPERATOR(pull_sparse_v2, ...@@ -135,5 +135,7 @@ REGISTER_OPERATOR(pull_sparse_v2,
ops::PushSparseV2OpMaker<paddle::framework::OpDesc>, ops::PushSparseV2OpMaker<paddle::framework::OpDesc>,
ops::PushSparseV2OpMaker<paddle::imperative::OpBase>); ops::PushSparseV2OpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(push_sparse_v2, ops::PushSparseV2Op); REGISTER_OPERATOR(push_sparse_v2, ops::PushSparseV2Op);
REGISTER_OP_CPU_KERNEL(pull_sparse_v2, ops::PullSparseV2CPUKernel<float>) PD_REGISTER_STRUCT_KERNEL(
REGISTER_OP_CPU_KERNEL(push_sparse_v2, ops::PushSparseV2CPUKernel<float>) pull_sparse_v2, CPU, ALL_LAYOUT, ops::PullSparseV2CPUKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
push_sparse_v2, CPU, ALL_LAYOUT, ops::PushSparseV2CPUKernel, float) {}
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
namespace paddle { namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T, typename DeviceContext>
class PullSparseV2CPUKernel : public framework::OpKernel<T> { class PullSparseV2CPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
...@@ -33,7 +33,7 @@ class PullSparseV2CPUKernel : public framework::OpKernel<T> { ...@@ -33,7 +33,7 @@ class PullSparseV2CPUKernel : public framework::OpKernel<T> {
} }
}; };
template <typename T> template <typename T, typename DeviceContext>
class PushSparseV2CPUKernel : public framework::OpKernel<T> { class PushSparseV2CPUKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
......
...@@ -202,7 +202,6 @@ register_unity_group( ...@@ -202,7 +202,6 @@ register_unity_group(
pad_op.cc) pad_op.cc)
register_unity_group( register_unity_group(
cc cc
modified_huber_loss_op.cc
partial_sum_op.cc partial_sum_op.cc
pixel_shuffle_op.cc pixel_shuffle_op.cc
pool_op.cc pool_op.cc
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册