未验证 提交 aa35331f 编写于 作者: H huangjiyi 提交者: GitHub

register fluid kerenls to phi [part 7] (#52577)

* update

* fix bug

* fix ci-windows-openblas

* fix test_partial_sum_op

* fix codestyle
上级 6913feb0
......@@ -85,9 +85,12 @@ REGISTER_OPERATOR(
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
ops::PartialAllGatherOpInplaceInferer)
REGISTER_OP_CPU_KERNEL(partial_allgather,
ops::PartialAllGatherOpCPUKernel<float>,
ops::PartialAllGatherOpCPUKernel<double>,
ops::PartialAllGatherOpCPUKernel<int>,
ops::PartialAllGatherOpCPUKernel<int64_t>,
ops::PartialAllGatherOpCPUKernel<plat::float16>);
PD_REGISTER_STRUCT_KERNEL(partial_allgather,
CPU,
ALL_LAYOUT,
ops::PartialAllGatherOpCPUKernel,
float,
double,
int,
int64_t,
plat::float16) {}
......@@ -23,7 +23,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class PartialAllGatherOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -102,12 +102,16 @@ class PartialAllGatherOpCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(partial_allgather,
ops::PartialAllGatherOpCUDAKernel<float>,
PD_REGISTER_STRUCT_KERNEL(partial_allgather,
GPU,
ALL_LAYOUT,
ops::PartialAllGatherOpCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
ops::PartialAllGatherOpCUDAKernel<plat::bfloat16>,
plat::bfloat16,
#endif
ops::PartialAllGatherOpCUDAKernel<double>,
ops::PartialAllGatherOpCUDAKernel<int>,
ops::PartialAllGatherOpCUDAKernel<int64_t>,
ops::PartialAllGatherOpCUDAKernel<plat::float16>);
int,
int64_t,
plat::float16) {
}
......@@ -26,7 +26,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class PartialAllGatherOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -129,9 +129,12 @@ REGISTER_OP_WITHOUT_GRADIENT(partial_recv,
ops::PartialRecvOp,
ops::PartialRecvOpMaker);
REGISTER_OP_CPU_KERNEL(partial_recv,
ops::PartialRecvOpCPUKernel<float>,
ops::PartialRecvOpCPUKernel<double>,
ops::PartialRecvOpCPUKernel<int>,
ops::PartialRecvOpCPUKernel<int64_t>,
ops::PartialRecvOpCPUKernel<plat::float16>);
PD_REGISTER_STRUCT_KERNEL(partial_recv,
CPU,
ALL_LAYOUT,
ops::PartialRecvOpCPUKernel,
float,
double,
int,
int64_t,
plat::float16) {}
......@@ -23,7 +23,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class PartialRecvOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -118,12 +118,16 @@ class PartialRecvOpCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(partial_recv,
ops::PartialRecvOpCUDAKernel<float>,
PD_REGISTER_STRUCT_KERNEL(partial_recv,
GPU,
ALL_LAYOUT,
ops::PartialRecvOpCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
ops::PartialRecvOpCUDAKernel<plat::bfloat16>,
plat::bfloat16,
#endif
ops::PartialRecvOpCUDAKernel<double>,
ops::PartialRecvOpCUDAKernel<int>,
ops::PartialRecvOpCUDAKernel<int64_t>,
ops::PartialRecvOpCUDAKernel<plat::float16>);
int,
int64_t,
plat::float16) {
}
......@@ -24,7 +24,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class PartialRecvOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -94,9 +94,12 @@ REGISTER_OP_WITHOUT_GRADIENT(partial_send,
ops::PartialSendOp,
ops::PartialSendMaker);
REGISTER_OP_CPU_KERNEL(partial_send,
ops::PartialSendOpCPUKernel<float>,
ops::PartialSendOpCPUKernel<double>,
ops::PartialSendOpCPUKernel<int>,
ops::PartialSendOpCPUKernel<int64_t>,
ops::PartialSendOpCPUKernel<plat::float16>);
PD_REGISTER_STRUCT_KERNEL(partial_send,
CPU,
ALL_LAYOUT,
ops::PartialSendOpCPUKernel,
float,
double,
int,
int64_t,
plat::float16) {}
......@@ -24,7 +24,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class PartialSendCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -117,12 +117,16 @@ class PartialSendCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(partial_send,
ops::PartialSendCUDAKernel<float>,
ops::PartialSendCUDAKernel<double>,
PD_REGISTER_STRUCT_KERNEL(partial_send,
GPU,
ALL_LAYOUT,
ops::PartialSendCUDAKernel,
float,
double,
#if NCCL_VERSION_CODE >= 21000
ops::PartialSendCUDAKernel<plat::bfloat16>,
plat::bfloat16,
#endif
ops::PartialSendCUDAKernel<int>,
ops::PartialSendCUDAKernel<int64_t>,
ops::PartialSendCUDAKernel<plat::float16>);
int,
int64_t,
plat::float16) {
}
......@@ -25,7 +25,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class PartialSendOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -17,7 +17,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class PolygonBoxTransformCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -111,7 +111,10 @@ REGISTER_OPERATOR(
ops::PolygonBoxTransformOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
polygon_box_transform,
ops::PolygonBoxTransformCPUKernel<paddle::platform::CPUPlace, float>,
ops::PolygonBoxTransformCPUKernel<paddle::platform::CPUPlace, double>);
PD_REGISTER_STRUCT_KERNEL(polygon_box_transform,
CPU,
ALL_LAYOUT,
ops::PolygonBoxTransformCPUKernel,
float,
double) {}
......@@ -38,7 +38,7 @@ __global__ void PolygonBoxTransformKernel(
}
}
template <typename T>
template <typename T, typename DeviceContext>
class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -73,7 +73,10 @@ class PolygonBoxTransformOpCUDAKernel : public framework::OpKernel<T> {
} // namespace operators
} // namespace paddle
REGISTER_OP_CUDA_KERNEL(
polygon_box_transform,
paddle::operators::PolygonBoxTransformOpCUDAKernel<float>,
paddle::operators::PolygonBoxTransformOpCUDAKernel<double>);
namespace ops = paddle::operators;
PD_REGISTER_STRUCT_KERNEL(polygon_box_transform,
GPU,
ALL_LAYOUT,
ops::PolygonBoxTransformOpCUDAKernel,
float,
double) {}
......@@ -242,7 +242,9 @@ REGISTER_OPERATOR(
ops::PrecisionRecallOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
precision_recall,
ops::PrecisionRecallKernel<paddle::platform::CPUPlace, float>,
ops::PrecisionRecallKernel<paddle::platform::CPUPlace, double>);
PD_REGISTER_STRUCT_KERNEL(precision_recall,
CPU,
ALL_LAYOUT,
ops::PrecisionRecallKernel,
float,
double) {}
......@@ -26,7 +26,7 @@ using EigenMatrix = framework::EigenMatrix<T, MajorType, IndexType>;
enum StateVariable { TP = 0, FP, TN, FN };
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class PrecisionRecallKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -52,7 +52,7 @@ static ncclRedOp_t str_to_nccl_red_type(std::string reduction) {
return it->second;
}
template <typename T>
template <typename T, typename DeviceContext>
class NCCLAllReduceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -87,7 +87,7 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class NCCLReduceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -128,7 +128,7 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class NCCLBcastKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -172,6 +172,9 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(ncclAllReduce, ops::NCCLAllReduceKernel<float>);
REGISTER_OP_CUDA_KERNEL(ncclBcast, ops::NCCLBcastKernel<float>);
REGISTER_OP_CUDA_KERNEL(ncclReduce, ops::NCCLReduceKernel<float>);
PD_REGISTER_STRUCT_KERNEL(
ncclAllReduce, GPU, ALL_LAYOUT, ops::NCCLAllReduceKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
ncclBcast, GPU, ALL_LAYOUT, ops::NCCLBcastKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
ncclReduce, GPU, ALL_LAYOUT, ops::NCCLReduceKernel, float) {}
......@@ -31,9 +31,12 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h"
USE_NO_KERNEL_OP(ncclInit);
USE_CUDA_ONLY_OP(ncclAllReduce);
USE_CUDA_ONLY_OP(ncclReduce);
USE_CUDA_ONLY_OP(ncclBcast);
USE_OP_ITSELF(ncclAllReduce);
USE_OP_ITSELF(ncclReduce);
USE_OP_ITSELF(ncclBcast);
PD_DECLARE_KERNEL(ncclAllReduce, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(ncclReduce, GPU, ALL_LAYOUT);
PD_DECLARE_KERNEL(ncclBcast, GPU, ALL_LAYOUT);
namespace f = paddle::framework;
namespace p = paddle::platform;
......
......@@ -320,9 +320,8 @@ REGISTER_OPERATOR(nce_grad,
ops::NCEOpGrad,
ops::NCEOpGradVarTypeInference,
ops::NCEGradOpNoNeedBufferVarInferer);
REGISTER_OP_CPU_KERNEL(nce,
ops::NCEKernel<paddle::platform::CPUPlace, float>,
ops::NCEKernel<paddle::platform::CPUPlace, double>);
REGISTER_OP_CPU_KERNEL(nce_grad,
ops::NCEGradKernel<paddle::platform::CPUPlace, float>,
ops::NCEGradKernel<paddle::platform::CPUPlace, double>);
PD_REGISTER_STRUCT_KERNEL(nce, CPU, ALL_LAYOUT, ops::NCEKernel, float, double) {
}
PD_REGISTER_STRUCT_KERNEL(
nce_grad, CPU, ALL_LAYOUT, ops::NCEGradKernel, float, double) {}
......@@ -75,7 +75,7 @@ void PrepareSamples(const framework::ExecutionContext &context,
}
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class NCEKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
......@@ -245,7 +245,7 @@ class NCEKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class NCEGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &context) const override {
......
......@@ -45,7 +45,7 @@ establish the dependency between input and output tensors.
}
};
template <typename T>
template <typename T, typename DeviceContext>
class NopKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {}
......@@ -58,8 +58,8 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(nop, ops::NopOp, ops::NopOpMaker);
REGISTER_OP_CPU_KERNEL(nop, ops::NopKernel<float>);
PD_REGISTER_STRUCT_KERNEL(nop, CPU, ALL_LAYOUT, ops::NopKernel, float) {}
REGISTER_OP_CUDA_KERNEL(nop, ops::NopKernel<float>);
REGISTER_OP_NPU_KERNEL(nop, ops::NopKernel<float>);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_STRUCT_KERNEL(nop, GPU, ALL_LAYOUT, ops::NopKernel, float) {}
#endif
......@@ -58,10 +58,9 @@ class NumberCountOpMaker : public framework::OpProtoAndCheckerMaker {
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CPU_KERNEL(number_count,
ops::NumberCountOpCPUKernel<int>,
ops::NumberCountOpCPUKernel<int64_t>);
REGISTER_OP_WITHOUT_GRADIENT(number_count,
ops::NumberCountOp,
ops::NumberCountOpMaker);
PD_REGISTER_STRUCT_KERNEL(
number_count, CPU, ALL_LAYOUT, ops::NumberCountOpCPUKernel, int, int64_t) {}
......@@ -79,7 +79,7 @@ __global__ void NumberCount(const T* numbers,
}
}
template <typename T>
template <typename T, typename DeviceContext>
class NumberCountOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -111,4 +111,5 @@ class NumberCountOpCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(number_count, ops::NumberCountOpCUDAKernel<int64_t>);
PD_REGISTER_STRUCT_KERNEL(
number_count, GPU, ALL_LAYOUT, ops::NumberCountOpCUDAKernel, int64_t) {}
......@@ -24,7 +24,7 @@
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class NumberCountOpCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -133,5 +133,5 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(proximal_adagrad,
ops::ProximalAdagradOp,
ops::ProximalAdagradOpMaker);
REGISTER_OP_CPU_KERNEL(proximal_adagrad,
ops::ProximalAdagradOpKernel<phi::CPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
proximal_adagrad, CPU, ALL_LAYOUT, ops::ProximalAdagradOpKernel, float) {}
......@@ -13,5 +13,5 @@ specific language governing permissions and limitations under the License. */
#include "paddle/fluid/operators/optimizers/proximal_adagrad_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(proximal_adagrad,
ops::ProximalAdagradOpKernel<phi::GPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
proximal_adagrad, GPU, ALL_LAYOUT, ops::ProximalAdagradOpKernel, float) {}
......@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class ProximalAdagradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -106,5 +106,6 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(proximal_gd,
ops::ProximalGDOp,
ops::ProximalGDOpMaker);
REGISTER_OP_CPU_KERNEL(proximal_gd,
ops::ProximalGDOpKernel<phi::CPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
proximal_gd, CPU, ALL_LAYOUT, ops::ProximalGDOpKernel, float) {}
......@@ -13,5 +13,5 @@ specific language governing permissions and limitations under the License. */
#include "paddle/fluid/operators/optimizers/proximal_gd_op.h"
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(proximal_gd,
ops::ProximalGDOpKernel<phi::GPUContext, float>);
PD_REGISTER_STRUCT_KERNEL(
proximal_gd, GPU, ALL_LAYOUT, ops::ProximalGDOpKernel, float) {}
......@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class ProximalGDOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -402,7 +402,7 @@ static inline void GetPaddings(int* paddings,
}
}
template <typename T>
template <typename T, typename DeviceContext>
class Pad2dCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -520,7 +520,7 @@ class Pad2dCPUKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class Pad2dGradCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -873,11 +873,8 @@ REGISTER_OPERATOR(pad2d,
REGISTER_OPERATOR(pad2d_grad,
ops::Pad2dOpGrad,
ops::Pad2dOpGradNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(pad2d,
ops::Pad2dCPUKernel<float>,
ops::Pad2dCPUKernel<double>,
ops::Pad2dCPUKernel<int>,
ops::Pad2dCPUKernel<int64_t>);
REGISTER_OP_CPU_KERNEL(pad2d_grad,
ops::Pad2dGradCPUKernel<float>,
ops::Pad2dGradCPUKernel<double>);
PD_REGISTER_STRUCT_KERNEL(
pad2d, CPU, ALL_LAYOUT, ops::Pad2dCPUKernel, float, double, int, int64_t) {}
PD_REGISTER_STRUCT_KERNEL(
pad2d_grad, CPU, ALL_LAYOUT, ops::Pad2dGradCPUKernel, float, double) {}
......@@ -361,7 +361,7 @@ static inline void GetPaddings(int* paddings,
}
}
template <typename T>
template <typename T, typename DeviceContext>
class Pad2dCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -489,7 +489,7 @@ class Pad2dCUDAKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class Pad2dGradCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -618,13 +618,19 @@ class Pad2dGradCUDAKernel : public framework::OpKernel<T> {
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(pad2d,
ops::Pad2dCUDAKernel<plat::float16>,
ops::Pad2dCUDAKernel<float>,
ops::Pad2dCUDAKernel<double>,
ops::Pad2dCUDAKernel<int>,
ops::Pad2dCUDAKernel<int64_t>);
REGISTER_OP_CUDA_KERNEL(pad2d_grad,
ops::Pad2dGradCUDAKernel<plat::float16>,
ops::Pad2dGradCUDAKernel<float>,
ops::Pad2dGradCUDAKernel<double>);
PD_REGISTER_STRUCT_KERNEL(pad2d,
GPU,
ALL_LAYOUT,
ops::Pad2dCUDAKernel,
float,
double,
int,
int64_t,
plat::float16) {}
PD_REGISTER_STRUCT_KERNEL(pad2d_grad,
GPU,
ALL_LAYOUT,
ops::Pad2dGradCUDAKernel,
float,
double,
plat::float16) {}
......@@ -243,26 +243,38 @@ REGISTER_OPERATOR(pad_constant_like,
ops::PadConstantLikeOpGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(pad_constant_like_grad, ops::PadConstantLikeOpGrad);
REGISTER_OP_CPU_KERNEL(pad_constant_like,
ops::PadConstantLikeKernel<phi::CPUContext, float>,
ops::PadConstantLikeKernel<phi::CPUContext, double>,
ops::PadConstantLikeKernel<phi::CPUContext, int>,
ops::PadConstantLikeKernel<phi::CPUContext, int64_t>);
REGISTER_OP_CPU_KERNEL(
pad_constant_like_grad,
ops::PadConstantLikeGradKernel<phi::CPUContext, float>,
ops::PadConstantLikeGradKernel<phi::CPUContext, double>,
ops::PadConstantLikeGradKernel<phi::CPUContext, int>,
ops::PadConstantLikeGradKernel<phi::CPUContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(pad_constant_like,
ops::PadConstantLikeKernel<phi::GPUContext, float>,
ops::PadConstantLikeKernel<phi::GPUContext, double>,
ops::PadConstantLikeKernel<phi::GPUContext, int>,
ops::PadConstantLikeKernel<phi::GPUContext, int64_t>);
REGISTER_OP_CUDA_KERNEL(
pad_constant_like_grad,
ops::PadConstantLikeGradKernel<phi::GPUContext, int>,
ops::PadConstantLikeGradKernel<phi::GPUContext, int64_t>,
ops::PadConstantLikeGradKernel<phi::GPUContext, float>,
ops::PadConstantLikeGradKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(pad_constant_like,
CPU,
ALL_LAYOUT,
ops::PadConstantLikeKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_STRUCT_KERNEL(pad_constant_like_grad,
CPU,
ALL_LAYOUT,
ops::PadConstantLikeGradKernel,
float,
double,
int,
int64_t) {}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
PD_REGISTER_STRUCT_KERNEL(pad_constant_like,
GPU,
ALL_LAYOUT,
ops::PadConstantLikeKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_STRUCT_KERNEL(pad_constant_like_grad,
GPU,
ALL_LAYOUT,
ops::PadConstantLikeGradKernel,
float,
double,
int,
int64_t) {}
#endif
......@@ -26,7 +26,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class PadConstantLikeKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -61,7 +61,7 @@ class PadConstantLikeKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class PadConstantLikeGradKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......
......@@ -202,14 +202,19 @@ REGISTER_OPERATOR(partial_concat,
REGISTER_OPERATOR(partial_concat_grad, ops::PartialConcatGradOp);
REGISTER_OP_CPU_KERNEL(partial_concat,
ops::PartialConcatKernel<phi::CPUContext, double>,
ops::PartialConcatKernel<phi::CPUContext, float>,
ops::PartialConcatKernel<phi::CPUContext, int64_t>,
ops::PartialConcatKernel<phi::CPUContext, int>);
REGISTER_OP_CPU_KERNEL(partial_concat_grad,
ops::PartialConcatGradientOpKernel<float>,
ops::PartialConcatGradientOpKernel<int>,
ops::PartialConcatGradientOpKernel<double>,
ops::PartialConcatGradientOpKernel<int64_t>);
PD_REGISTER_STRUCT_KERNEL(partial_concat,
CPU,
ALL_LAYOUT,
ops::PartialConcatKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_STRUCT_KERNEL(partial_concat_grad,
CPU,
ALL_LAYOUT,
ops::PartialConcatGradientOpKernel,
float,
double,
int,
int64_t) {}
......@@ -65,7 +65,7 @@ __global__ void ConcatPartialGradCUDAKernel(T **in,
}
}
template <typename T>
template <typename T, typename DeviceContext>
class PartialConcatOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -146,7 +146,7 @@ class PartialConcatOpCUDAKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PartialConcatGradOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -231,16 +231,22 @@ class PartialConcatGradOpCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(partial_concat,
ops::PartialConcatOpCUDAKernel<float>,
ops::PartialConcatOpCUDAKernel<double>,
ops::PartialConcatOpCUDAKernel<int>,
ops::PartialConcatOpCUDAKernel<int64_t>,
ops::PartialConcatOpCUDAKernel<plat::float16>);
REGISTER_OP_CUDA_KERNEL(partial_concat_grad,
ops::PartialConcatGradOpCUDAKernel<float>,
ops::PartialConcatGradOpCUDAKernel<double>,
ops::PartialConcatGradOpCUDAKernel<int>,
ops::PartialConcatGradOpCUDAKernel<int64_t>,
ops::PartialConcatGradOpCUDAKernel<plat::float16>);
PD_REGISTER_STRUCT_KERNEL(partial_concat,
GPU,
ALL_LAYOUT,
ops::PartialConcatOpCUDAKernel,
float,
double,
int,
int64_t,
plat::float16) {}
PD_REGISTER_STRUCT_KERNEL(partial_concat_grad,
GPU,
ALL_LAYOUT,
ops::PartialConcatGradOpCUDAKernel,
float,
double,
int,
int64_t,
plat::float16) {}
......@@ -39,7 +39,7 @@ static inline int64_t ComputeStartIndex(int64_t start_index, int64_t size) {
return start_index;
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class PartialConcatKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -84,7 +84,7 @@ class PartialConcatKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PartialConcatGradientOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -204,14 +204,19 @@ REGISTER_OPERATOR(partial_sum,
REGISTER_OPERATOR(partial_sum_grad, ops::PartialSumGradOp);
REGISTER_OP_CPU_KERNEL(partial_sum,
ops::PartialSumKernel<phi::CPUContext, float>,
ops::PartialSumKernel<phi::CPUContext, int>,
ops::PartialSumKernel<phi::CPUContext, double>,
ops::PartialSumKernel<phi::CPUContext, int64_t>);
REGISTER_OP_CPU_KERNEL(partial_sum_grad,
ops::PartialSumGradientOpKernel<float>,
ops::PartialSumGradientOpKernel<int>,
ops::PartialSumGradientOpKernel<double>,
ops::PartialSumGradientOpKernel<int64_t>);
PD_REGISTER_STRUCT_KERNEL(partial_sum,
CPU,
ALL_LAYOUT,
ops::PartialSumKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_STRUCT_KERNEL(partial_sum_grad,
CPU,
ALL_LAYOUT,
ops::PartialSumGradientOpKernel,
float,
double,
int,
int64_t) {}
......@@ -70,7 +70,7 @@ __global__ void PartialSumGradCUDAKernel(T **res_grad,
}
}
template <typename T>
template <typename T, typename DeviceContext>
class PartialSumOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -144,7 +144,7 @@ class PartialSumOpCUDAKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PartialSumGradOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -233,18 +233,3 @@ class PartialSumGradOpCUDAKernel : public framework::OpKernel<T> {
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(partial_sum,
ops::PartialSumOpCUDAKernel<float>,
ops::PartialSumOpCUDAKernel<double>,
ops::PartialSumOpCUDAKernel<int>,
ops::PartialSumOpCUDAKernel<int64_t>,
ops::PartialSumOpCUDAKernel<plat::float16>);
REGISTER_OP_CUDA_KERNEL(partial_sum_grad,
ops::PartialSumGradOpCUDAKernel<float>,
ops::PartialSumGradOpCUDAKernel<double>,
ops::PartialSumGradOpCUDAKernel<int>,
ops::PartialSumGradOpCUDAKernel<int64_t>,
ops::PartialSumGradOpCUDAKernel<plat::float16>);
......@@ -21,7 +21,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class PartialSumKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -57,7 +57,7 @@ class PartialSumKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PartialSumGradientOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -253,7 +253,10 @@ namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(positive_negative_pair,
ops::PositiveNegativePairOp,
ops::PositiveNegativePairOpMaker);
REGISTER_OP_CPU_KERNEL(
positive_negative_pair,
ops::PositiveNegativePairKernel<paddle::platform::CPUPlace, float>,
ops::PositiveNegativePairKernel<paddle::platform::CPUPlace, double>);
PD_REGISTER_STRUCT_KERNEL(positive_negative_pair,
CPU,
ALL_LAYOUT,
ops::PositiveNegativePairKernel,
float,
double) {}
......@@ -19,7 +19,7 @@ limitations under the License. */
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class PositiveNegativePairKernel : public framework::OpKernel<T> {
public:
struct PredictionResult {
......
......@@ -195,13 +195,20 @@ REGISTER_OPERATOR(prroi_pool,
ops::PRROIPoolGradMaker<paddle::framework::OpDesc>,
ops::PRROIPoolGradMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(prroi_pool_grad, ops::PRROIPoolGradOp);
REGISTER_OP_CPU_KERNEL(prroi_pool,
ops::CPUPRROIPoolOpKernel<phi::CPUContext, float>,
ops::CPUPRROIPoolOpKernel<phi::CPUContext, double>,
ops::CPUPRROIPoolOpKernel<phi::CPUContext, int>,
ops::CPUPRROIPoolOpKernel<phi::CPUContext, int64_t>);
REGISTER_OP_CPU_KERNEL(prroi_pool_grad,
ops::CPUPRROIPoolGradOpKernel<phi::CPUContext, float>,
ops::CPUPRROIPoolGradOpKernel<phi::CPUContext, double>,
ops::CPUPRROIPoolGradOpKernel<phi::CPUContext, int>,
ops::CPUPRROIPoolGradOpKernel<phi::CPUContext, int64_t>);
PD_REGISTER_STRUCT_KERNEL(prroi_pool,
CPU,
ALL_LAYOUT,
ops::CPUPRROIPoolOpKernel,
float,
double,
int,
int64_t) {}
PD_REGISTER_STRUCT_KERNEL(prroi_pool_grad,
CPU,
ALL_LAYOUT,
ops::CPUPRROIPoolGradOpKernel,
float,
double,
int,
int64_t) {}
......@@ -211,7 +211,7 @@ __global__ void GPUPRROIPoolBackward(const int nthreads,
}
}
template <typename T>
template <typename T, typename DeviceContext>
class GPUPRROIPoolOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -314,7 +314,7 @@ class GPUPRROIPoolOpKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class GPUPRROIPoolGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -428,9 +428,12 @@ class GPUPRROIPoolGradOpKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(prroi_pool,
ops::GPUPRROIPoolOpKernel<float>,
ops::GPUPRROIPoolOpKernel<double>);
REGISTER_OP_CUDA_KERNEL(prroi_pool_grad,
ops::GPUPRROIPoolGradOpKernel<phi::GPUContext, float>,
ops::GPUPRROIPoolGradOpKernel<phi::GPUContext, double>);
PD_REGISTER_STRUCT_KERNEL(
prroi_pool, GPU, ALL_LAYOUT, ops::GPUPRROIPoolOpKernel, float, double) {}
PD_REGISTER_STRUCT_KERNEL(prroi_pool_grad,
GPU,
ALL_LAYOUT,
ops::GPUPRROIPoolGradOpKernel,
float,
double) {}
......@@ -327,7 +327,7 @@ inline HOSTDEVICE void PrRoIPoolingCoorBackward(int s_w,
(*this_out_grad));
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class CPUPRROIPoolOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -481,7 +481,7 @@ class CPUPRROIPoolOpKernel : public framework::OpKernel<T> {
}
};
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class CPUPRROIPoolGradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -126,7 +126,9 @@ REGISTER_OP_WITHOUT_GRADIENT(prune_gate_by_capacity,
ops::PruneGateByCapacityOp,
ops::PruneGateByCapacityOpMaker);
REGISTER_OP_CPU_KERNEL(
prune_gate_by_capacity,
ops::PruneGateByCapacityCPUKernel<phi::CPUContext, int>,
ops::PruneGateByCapacityCPUKernel<phi::CPUContext, int64_t>);
PD_REGISTER_STRUCT_KERNEL(prune_gate_by_capacity,
CPU,
ALL_LAYOUT,
ops::PruneGateByCapacityCPUKernel,
int,
int64_t) {}
......@@ -105,7 +105,7 @@ static void VisitDataType(phi::DataType type, Visitor visitor) {
}
}
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class PruneGateByCapacityCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......@@ -127,6 +127,8 @@ class PruneGateByCapacityCUDAKernel : public framework::OpKernel<T> {
} // namespace operators
} // namespace paddle
REGISTER_OP_CUDA_KERNEL(
prune_gate_by_capacity,
ops::PruneGateByCapacityCUDAKernel<phi::GPUContext, int64_t>);
PD_REGISTER_STRUCT_KERNEL(prune_gate_by_capacity,
GPU,
ALL_LAYOUT,
ops::PruneGateByCapacityCUDAKernel,
int64_t) {}
......@@ -20,7 +20,7 @@
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
template <typename T, typename DeviceContext>
class PruneGateByCapacityCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
......
......@@ -151,10 +151,15 @@ REGISTER_OPERATOR(
REGISTER_OPERATOR(push_box_extended_sparse, ops::PushBoxExtendedSparseOp);
REGISTER_OP_CPU_KERNEL(pull_box_extended_sparse,
ops::PullBoxExtendedSparseCPUKernel<float>,
ops::PullBoxExtendedSparseCPUKernel<double>);
REGISTER_OP_CPU_KERNEL(push_box_extended_sparse,
ops::PushBoxExtendedSparseCPUKernel<float>,
ops::PushBoxExtendedSparseCPUKernel<double>);
PD_REGISTER_STRUCT_KERNEL(pull_box_extended_sparse,
CPU,
ALL_LAYOUT,
ops::PullBoxExtendedSparseCPUKernel,
float,
double) {}
PD_REGISTER_STRUCT_KERNEL(push_box_extended_sparse,
CPU,
ALL_LAYOUT,
ops::PushBoxExtendedSparseCPUKernel,
float,
double) {}
......@@ -19,7 +19,7 @@
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class PullBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -27,7 +27,7 @@ class PullBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PushBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -38,9 +38,16 @@ class PushBoxExtendedSparseCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(pull_box_extended_sparse,
ops::PullBoxExtendedSparseCUDAKernel<float>,
ops::PullBoxExtendedSparseCUDAKernel<double>);
REGISTER_OP_CUDA_KERNEL(push_box_extended_sparse,
ops::PushBoxExtendedSparseCUDAKernel<float>,
ops::PushBoxExtendedSparseCUDAKernel<double>);
PD_REGISTER_STRUCT_KERNEL(pull_box_extended_sparse,
GPU,
ALL_LAYOUT,
ops::PullBoxExtendedSparseCUDAKernel,
float,
double) {}
PD_REGISTER_STRUCT_KERNEL(push_box_extended_sparse,
GPU,
ALL_LAYOUT,
ops::PushBoxExtendedSparseCUDAKernel,
float,
double) {}
......@@ -108,7 +108,7 @@ static void PushBoxExtendedSparseFunctor(
#endif
}
template <typename T>
template <typename T, typename DeviceContext>
class PullBoxExtendedSparseCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -116,7 +116,7 @@ class PullBoxExtendedSparseCPUKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PushBoxExtendedSparseCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......
......@@ -135,5 +135,8 @@ REGISTER_OPERATOR(pull_box_sparse,
ops::PushBoxSparseOpMaker<paddle::framework::OpDesc>,
ops::PushBoxSparseOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(push_box_sparse, ops::PushBoxSparseOp);
REGISTER_OP_CPU_KERNEL(pull_box_sparse, ops::PullBoxSparseKernel<float>);
REGISTER_OP_CPU_KERNEL(push_box_sparse, ops::PushBoxSparseKernel<float>);
PD_REGISTER_STRUCT_KERNEL(
pull_box_sparse, CPU, ALL_LAYOUT, ops::PullBoxSparseKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
push_box_sparse, CPU, ALL_LAYOUT, ops::PushBoxSparseKernel, float) {}
......@@ -113,7 +113,7 @@ static void PushBoxSparseFunctor(const framework::ExecutionContext &ctx) {
#endif
}
template <typename T>
template <typename T, typename DeviceContext>
class PullBoxSparseKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -121,7 +121,7 @@ class PullBoxSparseKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PushBoxSparseKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......
......@@ -45,16 +45,7 @@ limitations under the License. */
namespace ops = paddle::operators;
namespace plat = paddle::platform;
#ifdef PADDLE_WITH_XPU_KP
REGISTER_OP_KERNEL(pull_box_sparse,
KP,
plat::XPUPlace,
ops::PullBoxSparseKernel<float>);
REGISTER_OP_KERNEL(push_box_sparse,
KP,
plat::XPUPlace,
ops::PushBoxSparseKernel<float>);
#else
REGISTER_OP_CUDA_KERNEL(pull_box_sparse, ops::PullBoxSparseKernel<float>);
REGISTER_OP_CUDA_KERNEL(push_box_sparse, ops::PushBoxSparseKernel<float>);
#endif
PD_REGISTER_STRUCT_KERNEL(
pull_box_sparse, KPS, ALL_LAYOUT, ops::PullBoxSparseKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
push_box_sparse, KPS, ALL_LAYOUT, ops::PushBoxSparseKernel, float) {}
......@@ -145,9 +145,16 @@ REGISTER_OPERATOR(pull_gpups_sparse,
ops::PushGpuPSSparseOpMaker<paddle::framework::OpDesc>,
ops::PushGpuPSSparseOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(push_gpups_sparse, ops::PushGpuPSSparseOp);
REGISTER_OP_CPU_KERNEL(pull_gpups_sparse,
ops::PullGpuPSSparseCPUKernel<float>,
ops::PullGpuPSSparseCPUKernel<double>)
REGISTER_OP_CPU_KERNEL(push_gpups_sparse,
ops::PushGpuPSSparseCPUKernel<float>,
ops::PushGpuPSSparseCPUKernel<double>)
PD_REGISTER_STRUCT_KERNEL(pull_gpups_sparse,
CPU,
ALL_LAYOUT,
ops::PullGpuPSSparseCPUKernel,
float,
double) {}
PD_REGISTER_STRUCT_KERNEL(push_gpups_sparse,
CPU,
ALL_LAYOUT,
ops::PushGpuPSSparseCPUKernel,
float,
double) {}
......@@ -20,7 +20,7 @@ namespace paddle {
namespace operators {
using phi::PADDLE_CUDA_NUM_THREADS;
template <typename T>
template <typename T, typename DeviceContext>
class PullGpuPSSparseCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -28,7 +28,7 @@ class PullGpuPSSparseCUDAKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PushGpuPSSparseCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -39,9 +39,15 @@ class PushGpuPSSparseCUDAKernel : public framework::OpKernel<T> {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(pull_gpups_sparse,
ops::PullGpuPSSparseCUDAKernel<float>,
ops::PullGpuPSSparseCUDAKernel<double>)
REGISTER_OP_CUDA_KERNEL(push_gpups_sparse,
ops::PushGpuPSSparseCUDAKernel<float>,
ops::PushGpuPSSparseCUDAKernel<double>)
PD_REGISTER_STRUCT_KERNEL(pull_gpups_sparse,
GPU,
ALL_LAYOUT,
ops::PullGpuPSSparseCUDAKernel,
float,
double) {}
PD_REGISTER_STRUCT_KERNEL(push_gpups_sparse,
GPU,
ALL_LAYOUT,
ops::PushGpuPSSparseCUDAKernel,
float,
double) {}
......@@ -97,7 +97,7 @@ static void PushGpuPSSparseFunctor(const framework::ExecutionContext &ctx) {
#endif
}
template <typename T>
template <typename T, typename DeviceContext>
class PullGpuPSSparseCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......@@ -105,7 +105,7 @@ class PullGpuPSSparseCPUKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PushGpuPSSparseCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext &ctx) const override {
......
......@@ -143,5 +143,7 @@ REGISTER_OPERATOR(pull_sparse,
ops::PushSparseOpMaker<paddle::framework::OpDesc>,
ops::PushSparseOpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(push_sparse, ops::PushSparseOp);
REGISTER_OP_CPU_KERNEL(pull_sparse, ops::PullSparseCPUKernel<float>)
REGISTER_OP_CPU_KERNEL(push_sparse, ops::PushSparseCPUKernel<float>)
PD_REGISTER_STRUCT_KERNEL(
pull_sparse, CPU, ALL_LAYOUT, ops::PullSparseCPUKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
push_sparse, CPU, ALL_LAYOUT, ops::PushSparseCPUKernel, float) {}
......@@ -66,7 +66,7 @@ void PushSparseFunctor(const framework::ExecutionContext& ctx) {
&grads);
}
template <typename T>
template <typename T, typename DeviceContext>
class PullSparseCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -74,7 +74,7 @@ class PullSparseCPUKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PushSparseCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -135,5 +135,7 @@ REGISTER_OPERATOR(pull_sparse_v2,
ops::PushSparseV2OpMaker<paddle::framework::OpDesc>,
ops::PushSparseV2OpMaker<paddle::imperative::OpBase>);
REGISTER_OPERATOR(push_sparse_v2, ops::PushSparseV2Op);
REGISTER_OP_CPU_KERNEL(pull_sparse_v2, ops::PullSparseV2CPUKernel<float>)
REGISTER_OP_CPU_KERNEL(push_sparse_v2, ops::PushSparseV2CPUKernel<float>)
PD_REGISTER_STRUCT_KERNEL(
pull_sparse_v2, CPU, ALL_LAYOUT, ops::PullSparseV2CPUKernel, float) {}
PD_REGISTER_STRUCT_KERNEL(
push_sparse_v2, CPU, ALL_LAYOUT, ops::PushSparseV2CPUKernel, float) {}
......@@ -25,7 +25,7 @@
namespace paddle {
namespace operators {
template <typename T>
template <typename T, typename DeviceContext>
class PullSparseV2CPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......@@ -33,7 +33,7 @@ class PullSparseV2CPUKernel : public framework::OpKernel<T> {
}
};
template <typename T>
template <typename T, typename DeviceContext>
class PushSparseV2CPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
......
......@@ -202,7 +202,6 @@ register_unity_group(
pad_op.cc)
register_unity_group(
cc
modified_huber_loss_op.cc
partial_sum_op.cc
pixel_shuffle_op.cc
pool_op.cc
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册