update (#52879)

b1bb7484 · huangjiyi · GitHub · e93e8a3f · b1bb7484 · b1bb7484
18 changed file
--- a/paddle/fluid/operators/gru_op.cc
+++ b/paddle/fluid/operators/gru_op.cc
@@ -313,11 +313,10 @@ class GRUGradOp : public framework::OperatorWithKernel {
  }
 };
-template <typename T>
+template <typename T, typename DeviceContext>
 class GRUCPUKernel : public framework::OpKernel<T> {
 public:
  void BatchCompute(const framework::ExecutionContext& context) const {
-    using DeviceContext = phi::CPUContext;
    using LodTensorPtr = phi::DenseTensor*;
    bool is_test = context.Attr<bool>("is_test");
@@ -585,9 +584,8 @@ REGISTER_OPERATOR(gru,
 REGISTER_OPERATOR(gru_grad,
                  ops::GRUGradOp,
                  ops::GRUGradOpNoNeedBufferVarInferer);
-REGISTER_OP_CPU_KERNEL(gru,
-                       ops::GRUCPUKernel<float>,
+PD_REGISTER_STRUCT_KERNEL(
-                       ops::GRUCPUKernel<double>);
+    gru, CPU, ALL_LAYOUT, ops::GRUCPUKernel, float, double) {}
-REGISTER_OP_CPU_KERNEL(gru_grad,
+PD_REGISTER_STRUCT_KERNEL(
-                       ops::GRUGradKernel<phi::CPUContext, float>,
+    gru_grad, CPU, ALL_LAYOUT, ops::GRUGradKernel, float, double) {}
-                       ops::GRUGradKernel<phi::CPUContext, double>);
--- a/paddle/fluid/operators/gru_op.cu.cc
+++ b/paddle/fluid/operators/gru_op.cu.cc
@@ -17,7 +17,7 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class GRUKernel : public framework::OpKernel<T> {
 public:
  void BatchCompute(const framework::ExecutionContext& context) const {
@@ -133,9 +133,8 @@ class GRUKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(gru,
-                        ops::GRUKernel<phi::GPUContext, float>,
+PD_REGISTER_STRUCT_KERNEL(gru, GPU, ALL_LAYOUT, ops::GRUKernel, float, double) {
-                        ops::GRUKernel<phi::GPUContext, double>);
+}
-REGISTER_OP_CUDA_KERNEL(gru_grad,
+PD_REGISTER_STRUCT_KERNEL(
-                        ops::GRUGradKernel<phi::GPUContext, float>,
+    gru_grad, GPU, ALL_LAYOUT, ops::GRUGradKernel, float, double) {}
-                        ops::GRUGradKernel<phi::GPUContext, double>);
--- a/paddle/fluid/operators/gru_op.h
+++ b/paddle/fluid/operators/gru_op.h
@@ -36,7 +36,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
  row_shuffle(ctx, src, index_lod, dst, indexed_src);
 }
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class GRUGradKernel : public framework::OpKernel<T> {
 public:
  void BatchCompute(const framework::ExecutionContext& context) const {

--- a/paddle/fluid/operators/gru_unit_op.cc
+++ b/paddle/fluid/operators/gru_unit_op.cc
@@ -323,9 +323,7 @@ REGISTER_OPERATOR(gru_unit_grad,
                  ops::GRUUnitGradOp,
                  ops::GRUUnitGradOpNoNeedBufferVarInferer);
-REGISTER_OP_CPU_KERNEL(gru_unit,
+PD_REGISTER_STRUCT_KERNEL(
-                       ops::GRUUnitKernel<phi::CPUContext, float>,
+    gru_unit, CPU, ALL_LAYOUT, ops::GRUUnitKernel, float, double) {}
-                       ops::GRUUnitKernel<phi::CPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(
-REGISTER_OP_CPU_KERNEL(gru_unit_grad,
+    gru_unit_grad, CPU, ALL_LAYOUT, ops::GRUUnitGradKernel, float, double) {}
-                       ops::GRUUnitGradKernel<phi::CPUContext, float>,
-                       ops::GRUUnitGradKernel<phi::CPUContext, double>);
--- a/paddle/fluid/operators/gru_unit_op.cu
+++ b/paddle/fluid/operators/gru_unit_op.cu
@@ -14,9 +14,8 @@ limitations under the License. */
 #include "paddle/fluid/operators/gru_unit_op.h"
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(gru_unit,
-                        ops::GRUUnitKernel<phi::GPUContext, float>,
+PD_REGISTER_STRUCT_KERNEL(
-                        ops::GRUUnitKernel<phi::GPUContext, double>);
+    gru_unit, GPU, ALL_LAYOUT, ops::GRUUnitKernel, float, double) {}
-REGISTER_OP_CUDA_KERNEL(gru_unit_grad,
+PD_REGISTER_STRUCT_KERNEL(
-                        ops::GRUUnitGradKernel<phi::GPUContext, float>,
+    gru_unit_grad, GPU, ALL_LAYOUT, ops::GRUUnitGradKernel, float, double) {}
-                        ops::GRUUnitGradKernel<phi::GPUContext, double>);
--- a/paddle/fluid/operators/gru_unit_op.h
+++ b/paddle/fluid/operators/gru_unit_op.h
@@ -25,7 +25,7 @@ namespace operators {
 enum GRUActivationType { identity = 0, sigmoid = 1, tanh = 2, relu = 3 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class GRUUnitKernel : public framework::OpKernel<T> {
 public:
  template <typename Device, typename X, typename Y>
@@ -153,7 +153,7 @@ class GRUUnitKernel : public framework::OpKernel<T> {
  }
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class GRUUnitGradKernel : public framework::OpKernel<T> {
 public:
  template <typename Device, typename X, typename Y, typename DX, typename DY>

--- a/paddle/fluid/operators/lrn_op.cc
+++ b/paddle/fluid/operators/lrn_op.cc
@@ -400,5 +400,7 @@ REGISTER_OPERATOR(lrn,
                  ops::LRNGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(lrn_grad, ops::LRNOpGrad);
-REGISTER_OP_CPU_KERNEL(lrn, ops::LRNKernel<phi::CPUContext, float>);
-REGISTER_OP_CPU_KERNEL(lrn_grad, ops::LRNGradKernel<phi::CPUContext, float>);
+PD_REGISTER_STRUCT_KERNEL(lrn, CPU, ALL_LAYOUT, ops::LRNKernel, float) {}
+PD_REGISTER_STRUCT_KERNEL(
+    lrn_grad, CPU, ALL_LAYOUT, ops::LRNGradKernel, float) {}
--- a/paddle/fluid/operators/lrn_op.cu
+++ b/paddle/fluid/operators/lrn_op.cu
@@ -274,5 +274,6 @@ template struct LRNGradFunctor<phi::GPUContext, double>;
 }  // namespace paddle
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(lrn, ops::LRNKernel<phi::GPUContext, float>);
+PD_REGISTER_STRUCT_KERNEL(lrn, GPU, ALL_LAYOUT, ops::LRNKernel, float) {}
-REGISTER_OP_CUDA_KERNEL(lrn_grad, ops::LRNGradKernel<phi::GPUContext, float>);
+PD_REGISTER_STRUCT_KERNEL(
+    lrn_grad, GPU, ALL_LAYOUT, ops::LRNGradKernel, float) {}
--- a/paddle/fluid/operators/lrn_op.h
+++ b/paddle/fluid/operators/lrn_op.h
@@ -43,7 +43,7 @@ struct LRNFunctor {
                  const DataLayout data_layout = DataLayout::kAnyLayout);
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class LRNKernel : public framework::OpKernel<T> {
 public:
  // f(x) = x * ( k + alpha * SUM((x)^2) )^(-beta)
@@ -136,7 +136,7 @@ struct LRNGradFunctor {
 * The upper and lower is the same as forward. The logic of the sum
 * is also the same as forward.
 */
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class LRNGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {

--- a/paddle/fluid/operators/lstm_op.cc
+++ b/paddle/fluid/operators/lstm_op.cc
@@ -358,9 +358,8 @@ REGISTER_OPERATOR(lstm,
                  ops::LSTMGradOpMaker<paddle::framework::OpDesc>,
                  ops::LSTMGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(lstm_grad, ops::LSTMGradOp);
-REGISTER_OP_CPU_KERNEL(lstm,
-                       ops::LSTMKernel<phi::CPUContext, float>,
+PD_REGISTER_STRUCT_KERNEL(
-                       ops::LSTMKernel<phi::CPUContext, double>);
+    lstm, CPU, ALL_LAYOUT, ops::LSTMKernel, float, double) {}
-REGISTER_OP_CPU_KERNEL(lstm_grad,
+PD_REGISTER_STRUCT_KERNEL(
-                       ops::LSTMGradKernel<phi::CPUContext, float>,
+    lstm_grad, CPU, ALL_LAYOUT, ops::LSTMGradKernel, float, double) {}
-                       ops::LSTMGradKernel<phi::CPUContext, double>);
--- a/paddle/fluid/operators/lstm_op.cu.cc
+++ b/paddle/fluid/operators/lstm_op.cu.cc
@@ -15,9 +15,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/lstm_op.h"
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(lstm,
+PD_REGISTER_STRUCT_KERNEL(
-                        ops::LSTMKernel<phi::GPUContext, float>,
+    lstm, GPU, ALL_LAYOUT, ops::LSTMKernel, float, double) {}
-                        ops::LSTMKernel<phi::GPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(
-REGISTER_OP_CUDA_KERNEL(lstm_grad,
+    lstm_grad, GPU, ALL_LAYOUT, ops::LSTMGradKernel, float, double) {}
-                        ops::LSTMGradKernel<phi::GPUContext, float>,
-                        ops::LSTMGradKernel<phi::GPUContext, double>);
--- a/paddle/fluid/operators/lstm_op.h
+++ b/paddle/fluid/operators/lstm_op.h
@@ -35,7 +35,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
  row_shuffle(ctx, src, index_lod, dst, indexed_src);
 }
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class LSTMKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -197,7 +197,7 @@ class LSTMKernel : public framework::OpKernel<T> {
  }
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class LSTMGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {

--- a/paddle/fluid/operators/lstm_unit_op.cc
+++ b/paddle/fluid/operators/lstm_unit_op.cc
@@ -142,10 +142,8 @@ REGISTER_OPERATOR(lstm_unit,
                  ops::LstmUnitGradOpMaker<paddle::framework::OpDesc>,
                  ops::LstmUnitGradOpMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(lstm_unit_grad, ops::LstmUnitGradOp);
-REGISTER_OP_CPU_KERNEL(lstm_unit,
-                       ops::LstmUnitKernel<paddle::platform::CPUPlace, float>,
+PD_REGISTER_STRUCT_KERNEL(
-                       ops::LstmUnitKernel<paddle::platform::CPUPlace, double>);
+    lstm_unit, CPU, ALL_LAYOUT, ops::LstmUnitKernel, float, double) {}
-REGISTER_OP_CPU_KERNEL(
+PD_REGISTER_STRUCT_KERNEL(
-    lstm_unit_grad,
+    lstm_unit_grad, CPU, ALL_LAYOUT, ops::LstmUnitGradKernel, float, double) {}
-    ops::LstmUnitGradKernel<paddle::platform::CPUPlace, float>,
-    ops::LstmUnitGradKernel<paddle::platform::CPUPlace, double>);
--- a/paddle/fluid/operators/lstm_unit_op.cu
+++ b/paddle/fluid/operators/lstm_unit_op.cu
@@ -98,7 +98,7 @@ __global__ void LSTMUnitGradientKernel(const int nthreads,
  }
 }
-template <typename T>
+template <typename T, typename DeviceContext>
 class LstmUnitOpCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -131,7 +131,7 @@ class LstmUnitOpCUDAKernel : public framework::OpKernel<T> {
  }
 };
-template <typename T>
+template <typename T, typename DeviceContext>
 class LstmUnitGradOpCUDAKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -183,9 +183,11 @@ class LstmUnitGradOpCUDAKernel : public framework::OpKernel<T> {
 }  // namespace paddle
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(lstm_unit,
+PD_REGISTER_STRUCT_KERNEL(
-                        ops::LstmUnitOpCUDAKernel<float>,
+    lstm_unit, GPU, ALL_LAYOUT, ops::LstmUnitOpCUDAKernel, float, double) {}
-                        ops::LstmUnitOpCUDAKernel<double>);
+PD_REGISTER_STRUCT_KERNEL(lstm_unit_grad,
-REGISTER_OP_CUDA_KERNEL(lstm_unit_grad,
+                          GPU,
-                        ops::LstmUnitGradOpCUDAKernel<float>,
+                          ALL_LAYOUT,
-                        ops::LstmUnitGradOpCUDAKernel<double>);
+                          ops::LstmUnitGradOpCUDAKernel,
+                          float,
+                          double) {}
--- a/paddle/fluid/operators/lstm_unit_op.h
+++ b/paddle/fluid/operators/lstm_unit_op.h
@@ -33,7 +33,7 @@ inline T tanh(T x) {
  return 2. * sigmoid(2. * x) - 1.;
 }
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class LstmUnitKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {
@@ -78,7 +78,7 @@ class LstmUnitKernel : public framework::OpKernel<T> {
  }
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class LstmUnitGradKernel : public framework::OpKernel<T> {
 public:
  void Compute(const framework::ExecutionContext& ctx) const override {

--- a/paddle/fluid/operators/lstmp_op.cc
+++ b/paddle/fluid/operators/lstmp_op.cc
@@ -405,9 +405,7 @@ REGISTER_OPERATOR(lstmp,
                  ops::LSTMPGradMaker<paddle::framework::OpDesc>,
                  ops::LSTMPGradMaker<paddle::imperative::OpBase>);
 REGISTER_OPERATOR(lstmp_grad, ops::LSTMPGradOp);
-REGISTER_OP_CPU_KERNEL(lstmp,
+PD_REGISTER_STRUCT_KERNEL(
-                       ops::LSTMPKernel<phi::CPUContext, float>,
+    lstmp, CPU, ALL_LAYOUT, ops::LSTMPKernel, float, double) {}
-                       ops::LSTMPKernel<phi::CPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(
-REGISTER_OP_CPU_KERNEL(lstmp_grad,
+    lstmp_grad, CPU, ALL_LAYOUT, ops::LSTMPGradKernel, float, double) {}
-                       ops::LSTMPGradKernel<phi::CPUContext, float>,
-                       ops::LSTMPGradKernel<phi::CPUContext, double>);
--- a/paddle/fluid/operators/lstmp_op.cu
+++ b/paddle/fluid/operators/lstmp_op.cu
@@ -15,9 +15,7 @@ limitations under the License. */
 #include "paddle/fluid/operators/lstmp_op.h"
 namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(lstmp,
+PD_REGISTER_STRUCT_KERNEL(
-                        ops::LSTMPKernel<phi::GPUContext, float>,
+    lstmp, GPU, ALL_LAYOUT, ops::LSTMPKernel, float, double) {}
-                        ops::LSTMPKernel<phi::GPUContext, double>);
+PD_REGISTER_STRUCT_KERNEL(
-REGISTER_OP_CUDA_KERNEL(lstmp_grad,
+    lstmp_grad, GPU, ALL_LAYOUT, ops::LSTMPGradKernel, float, double) {}
-                        ops::LSTMPGradKernel<phi::GPUContext, float>,
-                        ops::LSTMPGradKernel<phi::GPUContext, double>);
--- a/paddle/fluid/operators/lstmp_op.h
+++ b/paddle/fluid/operators/lstmp_op.h
@@ -78,7 +78,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
  row_shuffle(ctx, src, index, dst, indexed_src);
 }
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class LSTMPKernel : public framework::OpKernel<T> {
 public:
  template <typename Device, typename X, typename Y>
@@ -279,7 +279,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
  }
 };
-template <typename DeviceContext, typename T>
+template <typename T, typename DeviceContext>
 class LSTMPGradKernel : public framework::OpKernel<T> {
 public:
  template <typename Device, typename X, typename Y, typename DX, typename DY>