From 78ecb6683a94bfb7dba9039e3a18ad7c24760aaf Mon Sep 17 00:00:00 2001
From: wuhuanzhou <mr.avin0323@gmail.com>
Date: Wed, 26 May 2021 17:35:55 +0800
Subject: [PATCH] optimize OP's compilation time (#32617)

* optimize OP's compilation time, test=develop

* add more op and run ci test, test=develop

* CUDA Kernel register in cc file, test=develop

* fix macros, test=develop

* fix undefined symbol error, test=develop

* fix compilation error and undefined symbol, test=develop

* fix compilation error on Windows, test=develop

* fix compilation error on Windows, test=develop
---
 paddle/fluid/framework/ir/CMakeLists.txt      |   2 +-
 .../ir/memory_optimize_pass/CMakeLists.txt    |   2 +-
 paddle/fluid/framework/op_registry.h          |   4 +
 paddle/fluid/operators/CMakeLists.txt         |   4 +-
 .../fluid/operators/benchmark/CMakeLists.txt  |   2 +-
 paddle/fluid/operators/conv_cudnn_helper.h    |   8 +-
 paddle/fluid/operators/conv_transpose_op.h    |   8 +-
 paddle/fluid/operators/crop_op.cc             |   7 +
 paddle/fluid/operators/crop_op.cu             |  22 --
 paddle/fluid/operators/crop_op.h              |  17 +-
 paddle/fluid/operators/crop_tensor_op.cc      |  13 ++
 paddle/fluid/operators/crop_tensor_op.cu      |  28 ---
 paddle/fluid/operators/crop_tensor_op.h       |  17 +-
 paddle/fluid/operators/eigen/CMakeLists.txt   |  15 +-
 paddle/fluid/operators/eigen/constant.cc      |  31 +++
 paddle/fluid/operators/eigen/constant.cu      |  31 +++
 paddle/fluid/operators/eigen/eigen_function.h | 188 ++++++++++++++++++
 paddle/fluid/operators/eigen/elementwise.cc   |  51 +++++
 paddle/fluid/operators/eigen/elementwise.cu   |  51 +++++
 paddle/fluid/operators/eigen/erf.cc           |  55 +++++
 paddle/fluid/operators/eigen/erf.cu           |  57 ++++++
 paddle/fluid/operators/eigen/l1_norm.cc       |  48 +++++
 paddle/fluid/operators/eigen/l1_norm.cu       |  47 +++++
 paddle/fluid/operators/eigen/loss.cc          |  90 +++++++++
 paddle/fluid/operators/eigen/loss.cu          |  90 +++++++++
 paddle/fluid/operators/eigen/pad.cc           |  64 ++++++
 paddle/fluid/operators/eigen/pad.cu           |  66 ++++++
 paddle/fluid/operators/eigen/reverse.cc       |  48 +++++
 paddle/fluid/operators/eigen/reverse.cu       |  48 +++++
 paddle/fluid/operators/eigen/scale.cc         |  47 +++++
 paddle/fluid/operators/eigen/scale.cu         |  46 +++++
 paddle/fluid/operators/eigen/sign.cc          |  35 ++++
 paddle/fluid/operators/eigen/sign.cu          |  37 ++++
 paddle/fluid/operators/eigen/slice.cc         |  79 ++++++++
 paddle/fluid/operators/eigen/slice.cu         |  66 ++++++
 paddle/fluid/operators/erf_op.cc              |  11 +
 paddle/fluid/operators/erf_op.cu              |  28 ---
 paddle/fluid/operators/erf_op.h               |   8 +-
 paddle/fluid/operators/hinge_loss_op.cc       |   7 +
 paddle/fluid/operators/hinge_loss_op.cu       |  22 --
 paddle/fluid/operators/hinge_loss_op.h        |  11 +-
 paddle/fluid/operators/im2sequence_op.cc      |   7 +
 paddle/fluid/operators/im2sequence_op.cu      |  23 ---
 paddle/fluid/operators/im2sequence_op.h       |   3 +-
 paddle/fluid/operators/increment_op.cc        |   6 +
 paddle/fluid/operators/increment_op.cu        |  22 --
 paddle/fluid/operators/increment_op.h         |   6 +-
 paddle/fluid/operators/l1_norm_op.cc          |   6 +
 paddle/fluid/operators/l1_norm_op.cu          |  21 --
 paddle/fluid/operators/l1_norm_op.h           |   8 +-
 paddle/fluid/operators/math/padding.h         |  11 +-
 paddle/fluid/operators/minus_op.cc            |   3 +
 paddle/fluid/operators/minus_op.cu            |  19 --
 paddle/fluid/operators/minus_op.h             |   8 +-
 .../fluid/operators/pad_constant_like_op.cc   |  15 ++
 .../fluid/operators/pad_constant_like_op.cu   |  30 ---
 paddle/fluid/operators/pad_op.cc              |  13 ++
 paddle/fluid/operators/pad_op.cu              |  27 ---
 paddle/fluid/operators/rank_loss_op.cc        |   7 +
 paddle/fluid/operators/rank_loss_op.cu        |  22 --
 paddle/fluid/operators/rank_loss_op.h         |  13 +-
 paddle/fluid/operators/reverse_op.cc          |  10 +-
 paddle/fluid/operators/reverse_op.cu          |  24 ---
 paddle/fluid/operators/reverse_op.h           |   8 +-
 paddle/fluid/operators/scale_op.cc            |  16 ++
 paddle/fluid/operators/scale_op.cu            |  32 ---
 paddle/fluid/operators/scale_op.h             |   8 +-
 paddle/fluid/operators/sign_op.cc             |   8 +
 paddle/fluid/operators/sign_op.cu             |  23 ---
 paddle/fluid/operators/sign_op.h              |   4 +-
 paddle/fluid/operators/slice_op.cc            |  25 +++
 paddle/fluid/operators/slice_op.cu            |  39 ----
 paddle/fluid/operators/slice_op.h             |  21 +-
 paddle/fluid/pybind/tensor_py.h               |   8 +-
 74 files changed, 1542 insertions(+), 465 deletions(-)
 delete mode 100644 paddle/fluid/operators/crop_op.cu
 delete mode 100644 paddle/fluid/operators/crop_tensor_op.cu
 create mode 100644 paddle/fluid/operators/eigen/constant.cc
 create mode 100644 paddle/fluid/operators/eigen/constant.cu
 create mode 100644 paddle/fluid/operators/eigen/elementwise.cc
 create mode 100644 paddle/fluid/operators/eigen/elementwise.cu
 create mode 100644 paddle/fluid/operators/eigen/erf.cc
 create mode 100644 paddle/fluid/operators/eigen/erf.cu
 create mode 100644 paddle/fluid/operators/eigen/l1_norm.cc
 create mode 100644 paddle/fluid/operators/eigen/l1_norm.cu
 create mode 100644 paddle/fluid/operators/eigen/loss.cc
 create mode 100644 paddle/fluid/operators/eigen/loss.cu
 create mode 100644 paddle/fluid/operators/eigen/pad.cc
 create mode 100644 paddle/fluid/operators/eigen/pad.cu
 create mode 100644 paddle/fluid/operators/eigen/reverse.cc
 create mode 100644 paddle/fluid/operators/eigen/reverse.cu
 create mode 100644 paddle/fluid/operators/eigen/scale.cc
 create mode 100644 paddle/fluid/operators/eigen/scale.cu
 create mode 100644 paddle/fluid/operators/eigen/sign.cc
 create mode 100644 paddle/fluid/operators/eigen/sign.cu
 create mode 100644 paddle/fluid/operators/eigen/slice.cc
 create mode 100644 paddle/fluid/operators/eigen/slice.cu
 delete mode 100644 paddle/fluid/operators/erf_op.cu
 delete mode 100644 paddle/fluid/operators/hinge_loss_op.cu
 delete mode 100644 paddle/fluid/operators/im2sequence_op.cu
 delete mode 100644 paddle/fluid/operators/increment_op.cu
 delete mode 100644 paddle/fluid/operators/l1_norm_op.cu
 delete mode 100644 paddle/fluid/operators/minus_op.cu
 delete mode 100644 paddle/fluid/operators/pad_constant_like_op.cu
 delete mode 100644 paddle/fluid/operators/pad_op.cu
 delete mode 100644 paddle/fluid/operators/rank_loss_op.cu
 delete mode 100644 paddle/fluid/operators/reverse_op.cu
 delete mode 100644 paddle/fluid/operators/scale_op.cu
 delete mode 100644 paddle/fluid/operators/sign_op.cu
 delete mode 100644 paddle/fluid/operators/slice_op.cu

diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt
index 7e7f1fed5a..fb478bb6e8 100644
--- a/paddle/fluid/framework/ir/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/CMakeLists.txt
@@ -171,7 +171,7 @@ if (WITH_MKLDNN)
     cc_test(test_conv_elementwise_add_mkldnn_fuse_pass SRCS mkldnn/conv_elementwise_add_mkldnn_fuse_pass_tester.cc DEPS conv_elementwise_add_mkldnn_fuse_pass pass_test_util)
     cc_test(test_fc_act_mkldnn_fuse_pass SRCS mkldnn/fc_act_mkldnn_fuse_pass_tester.cc DEPS fc_act_mkldnn_fuse_pass pass_test_util)
     cc_test(test_batch_norm_act_fuse_pass SRCS mkldnn/batch_norm_act_fuse_pass_tester.cc DEPS batch_norm_act_fuse_pass pass_test_util)
-    set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass conv_op conv_transpose_op math_function im2col vol2col batch_norm_op gelu_op activation_op elementwise_add_op concat_and_split naive_executor device_context)
+    set(TEST_CONV_BN_PASS_DEPS conv_bn_fuse_pass graph_to_program_pass conv_op conv_transpose_op math_function im2col vol2col batch_norm_op gelu_op activation_op elementwise_add_op concat_and_split naive_executor device_context eigen_function)
 if (WITH_GPU OR WITH_ROCM)
     set(TEST_CONV_BN_PASS_DEPS ${TEST_CONV_BN_PASS_DEPS} depthwise_conv)
 endif()
diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
index a8c0973cac..5434678ccb 100644
--- a/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
+++ b/paddle/fluid/framework/ir/memory_optimize_pass/CMakeLists.txt
@@ -15,4 +15,4 @@ cc_library(buffer_shared_cross_op_memory_reuse_pass SRCS buffer_shared_cross_op_
 
 cc_library(inplace_addto_op_pass SRCS inplace_addto_op_pass.cc DEPS memory_reuse_pass)
 
-cc_test(test_reference_count_pass_last_lived_ops SRCS test_reference_count_pass_last_lived_ops.cc DEPS parallel_executor elementwise_mul_op elementwise_add_op scale_op)
+cc_test(test_reference_count_pass_last_lived_ops SRCS test_reference_count_pass_last_lived_ops.cc DEPS parallel_executor elementwise_mul_op elementwise_add_op scale_op eigen_function)
diff --git a/paddle/fluid/framework/op_registry.h b/paddle/fluid/framework/op_registry.h
index 593d4d839f..348ca5b952 100644
--- a/paddle/fluid/framework/op_registry.h
+++ b/paddle/fluid/framework/op_registry.h
@@ -317,8 +317,12 @@ struct OpKernelRegistrarFunctorEx<PlaceType, false, I,
       ::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
       __VA_ARGS__)
 
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
 #define REGISTER_OP_CUDA_KERNEL(op_type, ...) \
   REGISTER_OP_KERNEL(op_type, CUDA, ::paddle::platform::CUDAPlace, __VA_ARGS__)
+#else
+#define REGISTER_OP_CUDA_KERNEL(op_type, ...)
+#endif
 
 #define REGISTER_OP_CPU_KERNEL(op_type, ...) \
   REGISTER_OP_KERNEL(op_type, CPU, ::paddle::platform::CPUPlace, __VA_ARGS__)
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index 6e11c64afc..578d958ecc 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -115,9 +115,9 @@ set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence_padding sequence_scale cos_sim_fun
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions beam_search fc matrix_inverse)
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} box_wrapper boost ps_gpu_wrapper)
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} common_infer_shape_functions)
-set(COMMON_OP_DEPS ${COMMON_OP_DEPS} eigen_cc_function)
+set(COMMON_OP_DEPS ${COMMON_OP_DEPS} eigen_function)
 if (WITH_GPU OR WITH_ROCM)
-  set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu bert_encoder_functor eigen_cu_function)
+  set(COMMON_OP_DEPS ${COMMON_OP_DEPS} depthwise_conv prelu bert_encoder_functor)
 endif()
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} device_memory_aligment)
 set(COMMON_OP_DEPS ${COMMON_OP_DEPS} layer)
diff --git a/paddle/fluid/operators/benchmark/CMakeLists.txt b/paddle/fluid/operators/benchmark/CMakeLists.txt
index 54008336a9..e5023d8eb3 100644
--- a/paddle/fluid/operators/benchmark/CMakeLists.txt
+++ b/paddle/fluid/operators/benchmark/CMakeLists.txt
@@ -1,3 +1,3 @@
 cc_test(op_tester SRCS op_tester.cc op_tester_config.cc
         DEPS memory timer framework_proto proto_desc lod_tensor op_registry
-        device_context scope ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
+        device_context scope ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} eigen_function)
diff --git a/paddle/fluid/operators/conv_cudnn_helper.h b/paddle/fluid/operators/conv_cudnn_helper.h
index 9825fcd8a6..c7eac903a8 100644
--- a/paddle/fluid/operators/conv_cudnn_helper.h
+++ b/paddle/fluid/operators/conv_cudnn_helper.h
@@ -23,6 +23,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/conv_search_cache.h"
 #include "paddle/fluid/framework/operator_kernel_configs.h"
 #include "paddle/fluid/operators/conv_cudnn_op_cache.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 #include "paddle/fluid/platform/cudnn_desc.h"
 namespace paddle {
 namespace operators {
@@ -58,8 +59,8 @@ static void RemovePaddingSlice(const framework::ExecutionContext& context,
       *context.template device_context<DeviceContext>().eigen_device();
   auto in_dims = input->dims();
   auto new_out_dims = out->dims();
-  auto offsets = Eigen::array<int, D>();
-  auto extents = Eigen::array<int, D>();
+  auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
+  auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
   for (size_t i = 0; i < D; ++i) {
     offsets[i] = 0;
     extents[i] = new_out_dims[i];
@@ -81,7 +82,8 @@ static void RemovePaddingSlice(const framework::ExecutionContext& context,
   auto out_t =
       framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
           *out, new_out_dims);
-  out_t.device(place) = in_t.slice(offsets, extents);
+  EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(place, out_t, in_t,
+                                                        offsets, extents);
 }
 
 template <typename T>
diff --git a/paddle/fluid/operators/conv_transpose_op.h b/paddle/fluid/operators/conv_transpose_op.h
index ecf5b6d774..b8335c7506 100644
--- a/paddle/fluid/operators/conv_transpose_op.h
+++ b/paddle/fluid/operators/conv_transpose_op.h
@@ -19,6 +19,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/conv_op.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 #include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/operators/math/concat_and_split.h"
 #include "paddle/fluid/operators/math/depthwise_conv.h"
@@ -40,8 +41,8 @@ static void Slice(const framework::ExecutionContext& context,
   auto& place =
       *context.template device_context<DeviceContext>().eigen_device();
   auto in_dims = input->dims();
-  auto offsets = Eigen::array<int, D>();
-  auto extents = Eigen::array<int, D>();
+  auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
+  auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
   for (size_t i = 0; i < D; ++i) {
     offsets[i] = 0;
     extents[i] = in_dims[i];
@@ -64,7 +65,8 @@ static void Slice(const framework::ExecutionContext& context,
       framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
           *out, out_dims);
 
-  out_t.device(place) = in_t.slice(offsets, extents);
+  EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(place, out_t, in_t,
+                                                        offsets, extents);
   out->Resize(out_dims);
 }
 
diff --git a/paddle/fluid/operators/crop_op.cc b/paddle/fluid/operators/crop_op.cc
index 2031ed1424..193c0ca8dc 100644
--- a/paddle/fluid/operators/crop_op.cc
+++ b/paddle/fluid/operators/crop_op.cc
@@ -220,3 +220,10 @@ REGISTER_OP_CPU_KERNEL(
 REGISTER_OP_CPU_KERNEL(
     crop_grad, ops::CropGradKernel<paddle::platform::CPUDeviceContext, float>,
     ops::CropGradKernel<paddle::platform::CPUDeviceContext, double>);
+
+REGISTER_OP_CUDA_KERNEL(
+    crop, ops::CropKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::CropKernel<paddle::platform::CUDADeviceContext, double>);
+REGISTER_OP_CUDA_KERNEL(
+    crop_grad, ops::CropGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::CropGradKernel<paddle::platform::CUDADeviceContext, double>);
diff --git a/paddle/fluid/operators/crop_op.cu b/paddle/fluid/operators/crop_op.cu
deleted file mode 100644
index 0a83e6aa57..0000000000
--- a/paddle/fluid/operators/crop_op.cu
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/operators/crop_op.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    crop, ops::CropKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::CropKernel<paddle::platform::CUDADeviceContext, double>);
-REGISTER_OP_CUDA_KERNEL(
-    crop_grad, ops::CropGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::CropGradKernel<paddle::platform::CUDADeviceContext, double>);
diff --git a/paddle/fluid/operators/crop_op.h b/paddle/fluid/operators/crop_op.h
index 0338495096..f1fc216bd4 100644
--- a/paddle/fluid/operators/crop_op.h
+++ b/paddle/fluid/operators/crop_op.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <vector>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 #include "paddle/fluid/operators/strided_memcpy.h"
 
 namespace paddle {
@@ -89,15 +90,16 @@ void CropFunction(const framework::ExecutionContext& context) {
 
   auto x_tensor = EigenTensor<T, D>::From(*x);
   auto out_tensor = EigenTensor<T, D>::From(*out);
-  Eigen::array<int, D> e_offsets;
-  Eigen::array<int, D> e_shape;
+  Eigen::DSizes<Eigen::DenseIndex, D> e_offsets;
+  Eigen::DSizes<Eigen::DenseIndex, D> e_shape;
   for (size_t i = 0; i < D; ++i) {
     e_offsets[i] = offsets[i];
     e_shape[i] = out->dims()[i];
   }
   auto& place =
       *context.template device_context<DeviceContext>().eigen_device();
-  out_tensor.device(place) = x_tensor.slice(e_offsets, e_shape);
+  EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(
+      place, out_tensor, x_tensor, e_offsets, e_shape);
 }
 
 template <typename DeviceContext, typename T>
@@ -148,16 +150,17 @@ void CropGradFunction(const framework::ExecutionContext& context) {
     auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
     d_x->mutable_data<T>(x->dims(), context.GetPlace());
     auto offsets = GetOffsets(context);
-    Eigen::array<std::pair<int, int>, D> paddings;
+    Eigen::array<std::pair<int64_t, int64_t>, D> paddings;
     for (size_t i = 0; i < D; ++i) {
       paddings[i].first = offsets[i];
       paddings[i].second = d_x->dims()[i] - d_out->dims()[i] - offsets[i];
     }
     auto d_x_tensor = EigenTensor<T, D>::From(*d_x);
     auto d_out_tensor = EigenTensor<T, D>::From(*d_out);
-    d_x_tensor.device(
-        *context.template device_context<DeviceContext>().eigen_device()) =
-        d_out_tensor.pad(paddings, 0);
+    auto& place =
+        *context.template device_context<DeviceContext>().eigen_device();
+    EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
+        place, d_x_tensor, d_out_tensor, paddings, static_cast<T>(0));
   }
 }
 
diff --git a/paddle/fluid/operators/crop_tensor_op.cc b/paddle/fluid/operators/crop_tensor_op.cc
index 514333c57f..28238082b1 100644
--- a/paddle/fluid/operators/crop_tensor_op.cc
+++ b/paddle/fluid/operators/crop_tensor_op.cc
@@ -319,3 +319,16 @@ REGISTER_OP_CPU_KERNEL(
     ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, double>,
     ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, int>,
     ops::CropTensorGradKernel<paddle::platform::CPUDeviceContext, int64_t>);
+
+REGISTER_OP_CUDA_KERNEL(
+    crop_tensor,
+    ops::CropTensorKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::CropTensorKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::CropTensorKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::CropTensorKernel<paddle::platform::CUDADeviceContext, int64_t>);
+REGISTER_OP_CUDA_KERNEL(
+    crop_tensor_grad,
+    ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
diff --git a/paddle/fluid/operators/crop_tensor_op.cu b/paddle/fluid/operators/crop_tensor_op.cu
deleted file mode 100644
index c3a144d171..0000000000
--- a/paddle/fluid/operators/crop_tensor_op.cu
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/operators/crop_tensor_op.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    crop_tensor,
-    ops::CropTensorKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::CropTensorKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::CropTensorKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::CropTensorKernel<paddle::platform::CUDADeviceContext, int64_t>);
-REGISTER_OP_CUDA_KERNEL(
-    crop_tensor_grad,
-    ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::CropTensorGradKernel<paddle::platform::CUDADeviceContext, int64_t>);
diff --git a/paddle/fluid/operators/crop_tensor_op.h b/paddle/fluid/operators/crop_tensor_op.h
index 58960465b9..54666c8482 100644
--- a/paddle/fluid/operators/crop_tensor_op.h
+++ b/paddle/fluid/operators/crop_tensor_op.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <vector>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 #include "paddle/fluid/operators/strided_memcpy.h"
 
 namespace paddle {
@@ -199,15 +200,16 @@ void CropTensorFunction(const framework::ExecutionContext& context) {
 
   auto x_tensor = EigenTensor<T, D>::From(*x);
   auto out_tensor = EigenTensor<T, D>::From(*out);
-  Eigen::array<int, D> e_offsets;
-  Eigen::array<int, D> e_shape;
+  Eigen::DSizes<Eigen::DenseIndex, D> e_offsets;
+  Eigen::DSizes<Eigen::DenseIndex, D> e_shape;
   for (size_t i = 0; i < D; ++i) {
     e_offsets[i] = offsets[i];
     e_shape[i] = out->dims()[i];
   }
   auto& place =
       *context.template device_context<DeviceContext>().eigen_device();
-  out_tensor.device(place) = x_tensor.slice(e_offsets, e_shape);
+  EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(
+      place, out_tensor, x_tensor, e_offsets, e_shape);
 }
 
 template <typename DeviceContext, typename T>
@@ -259,16 +261,17 @@ void CropTensorGradFunction(const framework::ExecutionContext& context) {
     auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
     d_x->mutable_data<T>(x->dims(), context.GetPlace());
     auto offsets = GetOffsets(context);
-    Eigen::array<std::pair<int, int>, D> paddings;
+    Eigen::array<std::pair<int64_t, int64_t>, D> paddings;
     for (size_t i = 0; i < D; ++i) {
       paddings[i].first = offsets[i];
       paddings[i].second = d_x->dims()[i] - d_out->dims()[i] - offsets[i];
     }
     auto d_x_tensor = EigenTensor<T, D>::From(*d_x);
     auto d_out_tensor = EigenTensor<T, D>::From(*d_out);
-    d_x_tensor.device(
-        *context.template device_context<DeviceContext>().eigen_device()) =
-        d_out_tensor.pad(paddings, 0);
+    auto& place =
+        *context.template device_context<DeviceContext>().eigen_device();
+    EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
+        place, d_x_tensor, d_out_tensor, paddings, static_cast<T>(0));
   }
 }
 
diff --git a/paddle/fluid/operators/eigen/CMakeLists.txt b/paddle/fluid/operators/eigen/CMakeLists.txt
index 848bf2433c..8b64e35b93 100644
--- a/paddle/fluid/operators/eigen/CMakeLists.txt
+++ b/paddle/fluid/operators/eigen/CMakeLists.txt
@@ -1,10 +1,9 @@
 file(GLOB EIGEN_CC_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc")
-cc_library(eigen_cc_function SRCS ${EIGEN_CC_SOURCES} DEPS eigen3)
-if(WITH_GPU OR WITH_ROCM)
-  file(GLOB EIGEN_CU_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cu")
-  if(WITH_GPU)
-    nv_library(eigen_cu_function SRCS ${EIGEN_CU_SOURCES} DEPS eigen3)
-  elseif(WITH_ROCM)
-    hip_library(eigen_cu_function SRCS ${EIGEN_CU_SOURCES} DEPS eigen3)
-  endif()
+file(GLOB EIGEN_CU_SOURCES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cu")
+if(WITH_GPU)
+  nv_library(eigen_function SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} DEPS eigen3)
+elseif(WITH_ROCM)
+  hip_library(eigen_function SRCS ${EIGEN_CC_SOURCES} ${EIGEN_CU_SOURCES} DEPS eigen3)
+else()
+  cc_library(eigen_function SRCS ${EIGEN_CC_SOURCES} DEPS eigen3)
 endif()
diff --git a/paddle/fluid/operators/eigen/constant.cc b/paddle/fluid/operators/eigen/constant.cc
new file mode 100644
index 0000000000..45b03ccbf1
--- /dev/null
+++ b/paddle/fluid/operators/eigen/constant.cc
@@ -0,0 +1,31 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T, int Rank>
+struct EigenConstant<Eigen::DefaultDevice, T, Rank> {
+  using Type = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, Type out, const T value) {
+    out.device(dev) = out.constant(value);
+  }
+};
+
+template struct EigenConstant<Eigen::DefaultDevice, float, 1>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/constant.cu b/paddle/fluid/operators/eigen/constant.cu
new file mode 100644
index 0000000000..cf4a2917f7
--- /dev/null
+++ b/paddle/fluid/operators/eigen/constant.cu
@@ -0,0 +1,31 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T, int Rank>
+struct EigenConstant<Eigen::GpuDevice, T, Rank> {
+  using Type = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, Type out, const T value) {
+    out.device(dev) = out.constant(value);
+  }
+};
+
+template struct EigenConstant<Eigen::GpuDevice, float, 1>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/eigen_function.h b/paddle/fluid/operators/eigen/eigen_function.h
index 5966950595..8cbc7cd6ac 100644
--- a/paddle/fluid/operators/eigen/eigen_function.h
+++ b/paddle/fluid/operators/eigen/eigen_function.h
@@ -12,6 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 #pragma once
+#ifndef _USE_MATH_DEFINES
+#define _USE_MATH_DEFINES
+#endif
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
 #include "unsupported/Eigen/CXX11/Tensor"
 
 namespace paddle {
@@ -48,5 +54,187 @@ struct EigenBroadcastGrad {
                    const Array& reduce_dims, const Array2& reshape_dims);
 };
 
+template <typename EigenDevice, typename T, int Rank>
+struct EigenConstant {
+  using Type = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, Type out, const T value);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenSign {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType out, const InType& in);
+};
+
+template <typename EigenDevice, typename T, int Rank>
+struct EigenReverse {
+  using Array = Eigen::DSizes<bool, Rank>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType out, const InType& in,
+                   const Array& reverse);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenAdd {
+  using InType = Eigen::TensorMap<Eigen::TensorFixedSize<
+      const T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
+      T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType out, const InType& in,
+                   const T value);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenSub {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType out, const InType& left,
+                   const InType& right);
+};
+
+template <typename EigenDevice, typename T, int Rank>
+struct EigenSlice {
+  using Array = Eigen::DSizes<Eigen::DenseIndex, Rank>;
+  using Array32Bit = Eigen::DSizes<int, Rank>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using InType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+  using OutType = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+  static void Eval(const EigenDevice& dev, OutType out, const InType& in,
+                   const Array& offsets, const Array& extents);
+  static void Eval(const EigenDevice& dev, OutType32BitIndex out,
+                   const InType32BitIndex& in, const Array32Bit& offsets,
+                   const Array32Bit& extents);
+};
+
+template <typename EigenDevice, typename T, int Rank>
+struct EigenPad {
+  using Array = std::array<std::pair<int64_t, int64_t>, Rank>;
+  using Array32Bit = std::array<std::pair<int, int>, Rank>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using InType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+  using OutType = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+  static void Eval(const EigenDevice& dev, OutType out, const InType& in,
+                   const Array& padding, const T value);
+  static void Eval(const EigenDevice& dev, OutType32BitIndex out,
+                   const InType32BitIndex& in, const Array32Bit& padding,
+                   const T value);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenScale {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType out, const InType& in,
+                   const T scale, const T bias, const bool bias_after_scale);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenErf {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType out, const InType& in);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenErfGrad {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType din, const InType& in,
+                   const InType& dout);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenRankLoss {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType out, const InType& label,
+                   const InType& left, const InType& right);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenRankLossGrad {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void EvalLeft(const EigenDevice& dev, OutType dleft,
+                       const InType& dout, const InType& label,
+                       const InType& left, const InType& right);
+  static void EvalRight(const EigenDevice& dev, OutType dright,
+                        const InType& dout, const InType& label,
+                        const InType& left, const InType& right);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenHingeLoss {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType loss, const InType& pred,
+                   const InType& label);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenHingeLossGrad {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType dpred, const InType& dloss,
+                   const InType& pred, const InType& label);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenL1Norm {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
+      T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType out, const InType& in);
+};
+
+template <typename EigenDevice, typename T>
+struct EigenL1NormGrad {
+  using Array = Eigen::DSizes<Eigen::DenseIndex, 1>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const EigenDevice& dev, OutType din, const InType& dout,
+                   const InType& in, const Array& bcast);
+};
+
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/elementwise.cc b/paddle/fluid/operators/eigen/elementwise.cc
new file mode 100644
index 0000000000..bedecfe5c2
--- /dev/null
+++ b/paddle/fluid/operators/eigen/elementwise.cc
@@ -0,0 +1,51 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenAdd<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<Eigen::TensorFixedSize<
+      const T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
+      T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType out,
+                   const InType& in, const T value) {
+    out.device(dev) = in + value;
+  }
+};
+
+template struct EigenAdd<Eigen::DefaultDevice, float>;
+template struct EigenAdd<Eigen::DefaultDevice, double>;
+template struct EigenAdd<Eigen::DefaultDevice, int>;
+template struct EigenAdd<Eigen::DefaultDevice, int64_t>;
+
+template <typename T>
+struct EigenSub<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType out,
+                   const InType& left, const InType& right) {
+    out.device(dev) = left - right;
+  }
+};
+
+template struct EigenSub<Eigen::DefaultDevice, float>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/elementwise.cu b/paddle/fluid/operators/eigen/elementwise.cu
new file mode 100644
index 0000000000..a750a06284
--- /dev/null
+++ b/paddle/fluid/operators/eigen/elementwise.cu
@@ -0,0 +1,51 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenAdd<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<Eigen::TensorFixedSize<
+      const T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
+      T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in,
+                   const T value) {
+    out.device(dev) = in + value;
+  }
+};
+
+template struct EigenAdd<Eigen::GpuDevice, float>;
+template struct EigenAdd<Eigen::GpuDevice, double>;
+template struct EigenAdd<Eigen::GpuDevice, int>;
+template struct EigenAdd<Eigen::GpuDevice, int64_t>;
+
+template <typename T>
+struct EigenSub<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& left,
+                   const InType& right) {
+    out.device(dev) = left - right;
+  }
+};
+
+template struct EigenSub<Eigen::GpuDevice, float>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/erf.cc b/paddle/fluid/operators/eigen/erf.cc
new file mode 100644
index 0000000000..6c2c734c97
--- /dev/null
+++ b/paddle/fluid/operators/eigen/erf.cc
@@ -0,0 +1,55 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/eigen_ext.h"
+#include "paddle/fluid/platform/float16.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenErf<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType out,
+                   const InType& in) {
+    out.device(dev) = in.erf();
+  }
+};
+
+template <typename T>
+struct EigenErfGrad<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType din,
+                   const InType& in, const InType& dout) {
+    din.device(dev) =
+        dout * static_cast<T>(M_2_SQRTPI) * (-(in.square())).exp();
+  }
+};
+
+#define INSTANTIATION(FUNCTOR)                           \
+  template struct FUNCTOR<Eigen::DefaultDevice, float>;  \
+  template struct FUNCTOR<Eigen::DefaultDevice, double>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, platform::float16>
+INSTANTIATION(EigenErf);
+INSTANTIATION(EigenErfGrad);
+#undef INSTANTIATION
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/erf.cu b/paddle/fluid/operators/eigen/erf.cu
new file mode 100644
index 0000000000..632205bdcb
--- /dev/null
+++ b/paddle/fluid/operators/eigen/erf.cu
@@ -0,0 +1,57 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#ifndef _USE_MATH_DEFINES
+#define _USE_MATH_DEFINES
+#endif
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/eigen_ext.h"
+#include "paddle/fluid/platform/float16.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenErf<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in) {
+    out.device(dev) = in.erf();
+  }
+};
+
+template <typename T>
+struct EigenErfGrad<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType din, const InType& in,
+                   const InType& dout) {
+    din.device(dev) =
+        dout * static_cast<T>(M_2_SQRTPI) * (-(in.square())).exp();
+  }
+};
+
+#define INSTANTIATION(FUNCTOR)                       \
+  template struct FUNCTOR<Eigen::GpuDevice, float>;  \
+  template struct FUNCTOR<Eigen::GpuDevice, double>; \
+  template struct FUNCTOR<Eigen::GpuDevice, platform::float16>
+INSTANTIATION(EigenErf);
+INSTANTIATION(EigenErfGrad);
+#undef INSTANTIATION
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/l1_norm.cc b/paddle/fluid/operators/eigen/l1_norm.cc
new file mode 100644
index 0000000000..e7ed60f766
--- /dev/null
+++ b/paddle/fluid/operators/eigen/l1_norm.cc
@@ -0,0 +1,48 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenL1Norm<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
+      T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType out,
+                   const InType& in) {
+    out.device(dev) = in.abs().sum();
+  }
+};
+
+template <typename T>
+struct EigenL1NormGrad<Eigen::DefaultDevice, T> {
+  using Array = Eigen::DSizes<Eigen::DenseIndex, 1>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType din,
+                   const InType& dout, const InType& in, const Array& bcast) {
+    din.device(dev) = dout.broadcast(bcast) * in.sign();
+  }
+};
+
+template struct EigenL1Norm<Eigen::DefaultDevice, float>;
+template struct EigenL1NormGrad<Eigen::DefaultDevice, float>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/l1_norm.cu b/paddle/fluid/operators/eigen/l1_norm.cu
new file mode 100644
index 0000000000..a27cd7ae6b
--- /dev/null
+++ b/paddle/fluid/operators/eigen/l1_norm.cu
@@ -0,0 +1,47 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenL1Norm<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType = Eigen::TensorMap<Eigen::TensorFixedSize<
+      T, Eigen::Sizes<>, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in) {
+    out.device(dev) = in.abs().sum();
+  }
+};
+
+template <typename T>
+struct EigenL1NormGrad<Eigen::GpuDevice, T> {
+  using Array = Eigen::DSizes<Eigen::DenseIndex, 1>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType din, const InType& dout,
+                   const InType& in, const Array& bcast) {
+    din.device(dev) = dout.broadcast(bcast) * in.sign();
+  }
+};
+
+template struct EigenL1Norm<Eigen::GpuDevice, float>;
+template struct EigenL1NormGrad<Eigen::GpuDevice, float>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/loss.cc b/paddle/fluid/operators/eigen/loss.cc
new file mode 100644
index 0000000000..22a3647bc3
--- /dev/null
+++ b/paddle/fluid/operators/eigen/loss.cc
@@ -0,0 +1,90 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenRankLoss<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType out,
+                   const InType& label, const InType& left,
+                   const InType& right) {
+    out.device(dev) =
+        (1.0f + (left - right).exp()).log() - label * (left - right);
+  }
+};
+
+template <typename T>
+struct EigenRankLossGrad<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+
+  static void EvalLeft(const Eigen::DefaultDevice& dev, OutType dleft,
+                       const InType& dout, const InType& label,
+                       const InType& left, const InType& right) {
+    dleft.device(dev) = dout * (1.0f / (1.0f + (right - left).exp()) - label);
+  }
+
+  static void EvalRight(const Eigen::DefaultDevice& dev, OutType dright,
+                        const InType& dout, const InType& label,
+                        const InType& left, const InType& right) {
+    dright.device(dev) = -dout * (1.0f / (1.0f + (right - left).exp()) - label);
+  }
+};
+
+template struct EigenRankLoss<Eigen::DefaultDevice, float>;
+template struct EigenRankLossGrad<Eigen::DefaultDevice, float>;
+
+template <typename T>
+struct EigenHingeLoss<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType loss,
+                   const InType& pred, const InType& label) {
+    loss.device(dev) = (static_cast<T>(1) -
+                        pred * (static_cast<T>(2) * label - static_cast<T>(1)))
+                           .cwiseMax(static_cast<T>(0));
+  }
+};
+
+template <typename T>
+struct EigenHingeLossGrad<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType dpred,
+                   const InType& dloss, const InType& pred,
+                   const InType& label) {
+    auto alt_labels = static_cast<T>(2) * label - static_cast<T>(1);
+    dpred.device(dev) =
+        dloss * ((pred * alt_labels) < static_cast<T>(1)).template cast<T>() *
+        (-alt_labels);
+  }
+};
+
+template struct EigenHingeLoss<Eigen::DefaultDevice, float>;
+template struct EigenHingeLossGrad<Eigen::DefaultDevice, float>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/loss.cu b/paddle/fluid/operators/eigen/loss.cu
new file mode 100644
index 0000000000..fac7e3370b
--- /dev/null
+++ b/paddle/fluid/operators/eigen/loss.cu
@@ -0,0 +1,90 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenRankLoss<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType out,
+                   const InType& label, const InType& left,
+                   const InType& right) {
+    out.device(dev) =
+        (1.0f + (left - right).exp()).log() - label * (left - right);
+  }
+};
+
+template <typename T>
+struct EigenRankLossGrad<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+
+  static void EvalLeft(const Eigen::GpuDevice& dev, OutType dleft,
+                       const InType& dout, const InType& label,
+                       const InType& left, const InType& right) {
+    dleft.device(dev) = dout * (1.0f / (1.0f + (right - left).exp()) - label);
+  }
+
+  static void EvalRight(const Eigen::GpuDevice& dev, OutType dright,
+                        const InType& dout, const InType& label,
+                        const InType& left, const InType& right) {
+    dright.device(dev) = -dout * (1.0f / (1.0f + (right - left).exp()) - label);
+  }
+};
+
+template struct EigenRankLoss<Eigen::GpuDevice, float>;
+template struct EigenRankLossGrad<Eigen::GpuDevice, float>;
+
+template <typename T>
+struct EigenHingeLoss<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType loss,
+                   const InType& pred, const InType& label) {
+    loss.device(dev) = (static_cast<T>(1) -
+                        pred * (static_cast<T>(2) * label - static_cast<T>(1)))
+                           .cwiseMax(static_cast<T>(0));
+  }
+};
+
+template <typename T>
+struct EigenHingeLossGrad<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType dpred,
+                   const InType& dloss, const InType& pred,
+                   const InType& label) {
+    auto alt_labels = static_cast<T>(2) * label - static_cast<T>(1);
+    dpred.device(dev) =
+        dloss * ((pred * alt_labels) < static_cast<T>(1)).template cast<T>() *
+        (-alt_labels);
+  }
+};
+
+template struct EigenHingeLoss<Eigen::GpuDevice, float>;
+template struct EigenHingeLossGrad<Eigen::GpuDevice, float>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/pad.cc b/paddle/fluid/operators/eigen/pad.cc
new file mode 100644
index 0000000000..72668bca9a
--- /dev/null
+++ b/paddle/fluid/operators/eigen/pad.cc
@@ -0,0 +1,64 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/complex128.h"
+#include "paddle/fluid/platform/complex64.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T, int Rank>
+struct EigenPad<Eigen::DefaultDevice, T, Rank> {
+  using Array = std::array<std::pair<int64_t, int64_t>, Rank>;
+  using Array32Bit = std::array<std::pair<int, int>, Rank>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using InType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+  using OutType = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+
+  static void Eval(const Eigen::DefaultDevice& dev, OutType out,
+                   const InType& in, const Array& padding, const T value) {
+    out.device(dev) = in.pad(padding, value);
+  }
+
+  static void Eval(const Eigen::DefaultDevice& dev, OutType32BitIndex out,
+                   const InType32BitIndex& in, const Array32Bit& padding,
+                   const T value) {
+    out.device(dev) = in.pad(padding, value);
+  }
+};
+
+#define INSTANTIATION(FUNCTOR, TYPE)                      \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 1>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 2>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 3>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 4>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 5>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 6>
+INSTANTIATION(EigenPad, int);
+INSTANTIATION(EigenPad, int64_t);
+INSTANTIATION(EigenPad, float);
+INSTANTIATION(EigenPad, double);
+INSTANTIATION(EigenPad, platform::complex64);
+INSTANTIATION(EigenPad, platform::complex128);
+#undef INSTANTIATION
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/pad.cu b/paddle/fluid/operators/eigen/pad.cu
new file mode 100644
index 0000000000..1c936f886a
--- /dev/null
+++ b/paddle/fluid/operators/eigen/pad.cu
@@ -0,0 +1,66 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/complex128.h"
+#include "paddle/fluid/platform/complex64.h"
+#include "paddle/fluid/platform/float16.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T, int Rank>
+struct EigenPad<Eigen::GpuDevice, T, Rank> {
+  using Array = std::array<std::pair<int64_t, int64_t>, Rank>;
+  using Array32Bit = std::array<std::pair<int, int>, Rank>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using InType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+  using OutType = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+
+  static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in,
+                   const Array& padding, const T value) {
+    out.device(dev) = in.pad(padding, value);
+  }
+
+  static void Eval(const Eigen::GpuDevice& dev, OutType32BitIndex out,
+                   const InType32BitIndex& in, const Array32Bit& padding,
+                   const T value) {
+    out.device(dev) = in.pad(padding, value);
+  }
+};
+
+#define INSTANTIATION(FUNCTOR, TYPE)                  \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 1>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 2>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 3>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 4>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 5>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 6>
+INSTANTIATION(EigenPad, int);
+INSTANTIATION(EigenPad, int64_t);
+INSTANTIATION(EigenPad, float);
+INSTANTIATION(EigenPad, double);
+INSTANTIATION(EigenPad, platform::float16);
+INSTANTIATION(EigenPad, platform::complex64);
+INSTANTIATION(EigenPad, platform::complex128);
+#undef INSTANTIATION
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/reverse.cc b/paddle/fluid/operators/eigen/reverse.cc
new file mode 100644
index 0000000000..02044479db
--- /dev/null
+++ b/paddle/fluid/operators/eigen/reverse.cc
@@ -0,0 +1,48 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T, int Rank>
+struct EigenReverse<Eigen::DefaultDevice, T, Rank> {
+  using Array = Eigen::DSizes<bool, Rank>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType out,
+                   const InType& in, const Array& reverse) {
+    out.device(dev) = in.reverse(reverse);
+  }
+};
+
+#define INSTANTIATION(FUNCTOR, TYPE)                      \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 1>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 2>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 3>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 4>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 5>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 6>
+INSTANTIATION(EigenReverse, int);
+INSTANTIATION(EigenReverse, uint8_t);
+INSTANTIATION(EigenReverse, int64_t);
+INSTANTIATION(EigenReverse, bool);
+INSTANTIATION(EigenReverse, float);
+INSTANTIATION(EigenReverse, double);
+#undef INSTANTIATION
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/reverse.cu b/paddle/fluid/operators/eigen/reverse.cu
new file mode 100644
index 0000000000..9b769489ce
--- /dev/null
+++ b/paddle/fluid/operators/eigen/reverse.cu
@@ -0,0 +1,48 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T, int Rank>
+struct EigenReverse<Eigen::GpuDevice, T, Rank> {
+  using Array = Eigen::DSizes<bool, Rank>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in,
+                   const Array& reverse) {
+    out.device(dev) = in.reverse(reverse);
+  }
+};
+
+#define INSTANTIATION(FUNCTOR, TYPE)                  \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 1>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 2>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 3>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 4>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 5>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 6>
+INSTANTIATION(EigenReverse, int);
+INSTANTIATION(EigenReverse, uint8_t);
+INSTANTIATION(EigenReverse, int64_t);
+INSTANTIATION(EigenReverse, bool);
+INSTANTIATION(EigenReverse, float);
+INSTANTIATION(EigenReverse, double);
+#undef INSTANTIATION
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/scale.cc b/paddle/fluid/operators/eigen/scale.cc
new file mode 100644
index 0000000000..e85878f20a
--- /dev/null
+++ b/paddle/fluid/operators/eigen/scale.cc
@@ -0,0 +1,47 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/bfloat16.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenScale<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType out,
+                   const InType& in, const T scale, const T bias,
+                   const bool bias_after_scale) {
+    if (bias_after_scale) {
+      out.device(dev) = scale * in + bias;
+    } else {
+      out.device(dev) = scale * (in + bias);
+    }
+  }
+};
+
+template struct EigenScale<Eigen::DefaultDevice, float>;
+template struct EigenScale<Eigen::DefaultDevice, double>;
+template struct EigenScale<Eigen::DefaultDevice, platform::bfloat16>;
+template struct EigenScale<Eigen::DefaultDevice, uint8_t>;
+template struct EigenScale<Eigen::DefaultDevice, int8_t>;
+template struct EigenScale<Eigen::DefaultDevice, int16_t>;
+template struct EigenScale<Eigen::DefaultDevice, int>;
+template struct EigenScale<Eigen::DefaultDevice, int64_t>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/scale.cu b/paddle/fluid/operators/eigen/scale.cu
new file mode 100644
index 0000000000..6a77f72f62
--- /dev/null
+++ b/paddle/fluid/operators/eigen/scale.cu
@@ -0,0 +1,46 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/float16.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenScale<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in,
+                   const T scale, const T bias, const bool bias_after_scale) {
+    if (bias_after_scale) {
+      out.device(dev) = scale * in + bias;
+    } else {
+      out.device(dev) = scale * (in + bias);
+    }
+  }
+};
+
+template struct EigenScale<Eigen::GpuDevice, float>;
+template struct EigenScale<Eigen::GpuDevice, double>;
+template struct EigenScale<Eigen::GpuDevice, uint8_t>;
+template struct EigenScale<Eigen::GpuDevice, int8_t>;
+template struct EigenScale<Eigen::GpuDevice, int16_t>;
+template struct EigenScale<Eigen::GpuDevice, int>;
+template struct EigenScale<Eigen::GpuDevice, int64_t>;
+template struct EigenScale<Eigen::GpuDevice, platform::float16>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/sign.cc b/paddle/fluid/operators/eigen/sign.cc
new file mode 100644
index 0000000000..4a4445f656
--- /dev/null
+++ b/paddle/fluid/operators/eigen/sign.cc
@@ -0,0 +1,35 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenSign<Eigen::DefaultDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::DefaultDevice& dev, OutType out,
+                   const InType& in) {
+    out.device(dev) = in.sign();
+  }
+};
+
+template struct EigenSign<Eigen::DefaultDevice, float>;
+template struct EigenSign<Eigen::DefaultDevice, double>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/sign.cu b/paddle/fluid/operators/eigen/sign.cu
new file mode 100644
index 0000000000..52c8d3c80d
--- /dev/null
+++ b/paddle/fluid/operators/eigen/sign.cu
@@ -0,0 +1,37 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/eigen_ext.h"
+#include "paddle/fluid/platform/float16.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+struct EigenSign<Eigen::GpuDevice, T> {
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType =
+      Eigen::TensorMap<Eigen::Tensor<T, 1, Eigen::RowMajor, Eigen::DenseIndex>>;
+  static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in) {
+    out.device(dev) = in.sign();
+  }
+};
+
+template struct EigenSign<Eigen::GpuDevice, float>;
+template struct EigenSign<Eigen::GpuDevice, double>;
+template struct EigenSign<Eigen::GpuDevice, platform::float16>;
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/slice.cc b/paddle/fluid/operators/eigen/slice.cc
new file mode 100644
index 0000000000..240b4249ff
--- /dev/null
+++ b/paddle/fluid/operators/eigen/slice.cc
@@ -0,0 +1,79 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/bfloat16.h"
+#include "paddle/fluid/platform/complex.h"
+#include "paddle/fluid/platform/complex128.h"
+#include "paddle/fluid/platform/complex64.h"
+#include "paddle/fluid/platform/float16.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T, int Rank>
+struct EigenSlice<Eigen::DefaultDevice, T, Rank> {
+  using Array = Eigen::DSizes<Eigen::DenseIndex, Rank>;
+  using Array32Bit = Eigen::DSizes<int, Rank>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using InType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+  using OutType = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+
+  static void Eval(const Eigen::DefaultDevice& dev, OutType out,
+                   const InType& in, const Array& offsets,
+                   const Array& extents) {
+    out.device(dev) = in.slice(offsets, extents);
+  }
+
+  static void Eval(const Eigen::DefaultDevice& dev, OutType32BitIndex out,
+                   const InType32BitIndex& in, const Array32Bit& offsets,
+                   const Array32Bit& extents) {
+    out.device(dev) = in.slice(offsets, extents);
+  }
+};
+
+#define INSTANTIATION(FUNCTOR, TYPE)                      \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 1>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 2>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 3>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 4>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 5>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 6>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 7>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 8>; \
+  template struct FUNCTOR<Eigen::DefaultDevice, TYPE, 9>
+INSTANTIATION(EigenSlice, bool);
+INSTANTIATION(EigenSlice, int);
+INSTANTIATION(EigenSlice, int8_t);
+INSTANTIATION(EigenSlice, uint8_t);
+INSTANTIATION(EigenSlice, int16_t);
+INSTANTIATION(EigenSlice, int64_t);
+INSTANTIATION(EigenSlice, float);
+INSTANTIATION(EigenSlice, double);
+INSTANTIATION(EigenSlice, platform::float16);
+INSTANTIATION(EigenSlice, platform::bfloat16);
+INSTANTIATION(EigenSlice, platform::complex64);
+INSTANTIATION(EigenSlice, platform::complex128);
+INSTANTIATION(EigenSlice, platform::complex<float>);
+INSTANTIATION(EigenSlice, platform::complex<double>);
+#undef INSTANTIATION
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/eigen/slice.cu b/paddle/fluid/operators/eigen/slice.cu
new file mode 100644
index 0000000000..91c4a29f4a
--- /dev/null
+++ b/paddle/fluid/operators/eigen/slice.cu
@@ -0,0 +1,66 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/eigen/eigen_function.h"
+#include "paddle/fluid/platform/complex128.h"
+#include "paddle/fluid/platform/complex64.h"
+#include "paddle/fluid/platform/float16.h"
+
+namespace paddle {
+namespace operators {
+
+template <typename T, int Rank>
+struct EigenSlice<Eigen::GpuDevice, T, Rank> {
+  using Array = Eigen::DSizes<Eigen::DenseIndex, Rank>;
+  using Array32Bit = Eigen::DSizes<int, Rank>;
+  using InType = Eigen::TensorMap<
+      Eigen::Tensor<const T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using InType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<const T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+  using OutType = Eigen::TensorMap<
+      Eigen::Tensor<T, Rank, Eigen::RowMajor, Eigen::DenseIndex>>;
+  using OutType32BitIndex =
+      Eigen::TensorMap<Eigen::Tensor<T, Rank, Eigen::RowMajor, int>,
+                       Eigen::Aligned>;
+
+  static void Eval(const Eigen::GpuDevice& dev, OutType out, const InType& in,
+                   const Array& offsets, const Array& extents) {
+    out.device(dev) = in.slice(offsets, extents);
+  }
+
+  static void Eval(const Eigen::GpuDevice& dev, OutType32BitIndex out,
+                   const InType32BitIndex& in, const Array32Bit& offsets,
+                   const Array32Bit& extents) {
+    out.device(dev) = in.slice(offsets, extents);
+  }
+};
+
+#define INSTANTIATION(FUNCTOR, TYPE)                  \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 1>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 2>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 3>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 4>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 5>; \
+  template struct FUNCTOR<Eigen::GpuDevice, TYPE, 6>
+INSTANTIATION(EigenSlice, int);
+INSTANTIATION(EigenSlice, int64_t);
+INSTANTIATION(EigenSlice, float);
+INSTANTIATION(EigenSlice, double);
+INSTANTIATION(EigenSlice, platform::float16);
+INSTANTIATION(EigenSlice, platform::complex64);
+INSTANTIATION(EigenSlice, platform::complex128);
+#undef INSTANTIATION
+
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/erf_op.cc b/paddle/fluid/operators/erf_op.cc
index 09cdf4d8b2..f68f670394 100644
--- a/paddle/fluid/operators/erf_op.cc
+++ b/paddle/fluid/operators/erf_op.cc
@@ -130,3 +130,14 @@ REGISTER_OP_CPU_KERNEL(
     ops::ErfGradKernel<paddle::platform::CPUDeviceContext, double>,
     ops::ErfGradKernel<paddle::platform::CPUDeviceContext,
                        paddle::platform::float16>);
+
+REGISTER_OP_CUDA_KERNEL(
+    erf, ops::ErfKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ErfKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ErfKernel<paddle::platform::CUDADeviceContext,
+                   paddle::platform::float16>);
+REGISTER_OP_CUDA_KERNEL(
+    erf_grad, ops::ErfGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ErfGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::ErfGradKernel<paddle::platform::CUDADeviceContext,
+                       paddle::platform::float16>);
diff --git a/paddle/fluid/operators/erf_op.cu b/paddle/fluid/operators/erf_op.cu
deleted file mode 100644
index 357b9e79c4..0000000000
--- a/paddle/fluid/operators/erf_op.cu
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/erf_op.h"
-#include "paddle/fluid/platform/float16.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    erf, ops::ErfKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::ErfKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::ErfKernel<paddle::platform::CUDADeviceContext,
-                   paddle::platform::float16>);
-REGISTER_OP_CUDA_KERNEL(
-    erf_grad, ops::ErfGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::ErfGradKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::ErfGradKernel<paddle::platform::CUDADeviceContext,
-                       paddle::platform::float16>);
diff --git a/paddle/fluid/operators/erf_op.h b/paddle/fluid/operators/erf_op.h
index 08c827df95..4780b2e7f5 100644
--- a/paddle/fluid/operators/erf_op.h
+++ b/paddle/fluid/operators/erf_op.h
@@ -19,6 +19,7 @@ limitations under the License. */
 #include <cmath>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 
 namespace paddle {
 namespace operators {
@@ -35,7 +36,8 @@ class ErfKernel : public framework::OpKernel<T> {
     auto eigen_in = framework::EigenVector<T>::Flatten(*in);
     auto& place =
         *context.template device_context<DeviceContext>().eigen_device();
-    eigen_out.device(place) = eigen_in.erf();
+    EigenErf<std::decay_t<decltype(place)>, T>::Eval(place, eigen_out,
+                                                     eigen_in);
   }
 };
 
@@ -55,8 +57,8 @@ class ErfGradKernel : public framework::OpKernel<T> {
     auto eigen_dx = framework::EigenVector<T>::Flatten(*dx);
     auto& place =
         *context.template device_context<DeviceContext>().eigen_device();
-    eigen_dx.device(place) =
-        eigen_dout * static_cast<T>(M_2_SQRTPI) * (-(eigen_x.square())).exp();
+    EigenErfGrad<std::decay_t<decltype(place)>, T>::Eval(place, eigen_dx,
+                                                         eigen_x, eigen_dout);
   }
 };
 
diff --git a/paddle/fluid/operators/hinge_loss_op.cc b/paddle/fluid/operators/hinge_loss_op.cc
index e60b1538ee..cce8051835 100644
--- a/paddle/fluid/operators/hinge_loss_op.cc
+++ b/paddle/fluid/operators/hinge_loss_op.cc
@@ -143,3 +143,10 @@ REGISTER_OP_CPU_KERNEL(
 REGISTER_OP_CPU_KERNEL(
     hinge_loss_grad,
     ops::HingeLossGradKernel<paddle::platform::CPUDeviceContext, float>);
+
+REGISTER_OP_CUDA_KERNEL(
+    hinge_loss,
+    ops::HingeLossKernel<paddle::platform::CUDADeviceContext, float>);
+REGISTER_OP_CUDA_KERNEL(
+    hinge_loss_grad,
+    ops::HingeLossGradKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/hinge_loss_op.cu b/paddle/fluid/operators/hinge_loss_op.cu
deleted file mode 100644
index b5ea0a702e..0000000000
--- a/paddle/fluid/operators/hinge_loss_op.cu
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/operators/hinge_loss_op.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    hinge_loss,
-    ops::HingeLossKernel<paddle::platform::CUDADeviceContext, float>);
-REGISTER_OP_CUDA_KERNEL(
-    hinge_loss_grad,
-    ops::HingeLossGradKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/hinge_loss_op.h b/paddle/fluid/operators/hinge_loss_op.h
index 10c17a0982..c78eddd252 100644
--- a/paddle/fluid/operators/hinge_loss_op.h
+++ b/paddle/fluid/operators/hinge_loss_op.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 
 namespace paddle {
 namespace operators {
@@ -33,9 +34,7 @@ class HingeLossKernel : public framework::OpKernel<T> {
     auto y = framework::EigenVector<T>::Flatten(*label);
     loss->mutable_data<T>(context.GetPlace());
     auto l = framework::EigenVector<T>::Flatten(*loss);
-    l.device(place) =
-        (static_cast<T>(1) - x * (static_cast<T>(2) * y - static_cast<T>(1)))
-            .cwiseMax(static_cast<T>(0));
+    EigenHingeLoss<std::decay_t<decltype(place)>, T>::Eval(place, l, x, y);
   }
 };
 
@@ -59,10 +58,8 @@ class HingeLossGradKernel : public framework::OpKernel<T> {
     if (dpred) {
       dpred->mutable_data<T>(context.GetPlace());
       auto dx = framework::EigenVector<T>::Flatten(*dpred);
-      auto alt_labels = static_cast<T>(2) * y - static_cast<T>(1);
-      dx.device(place) =
-          dl * ((x * alt_labels) < static_cast<T>(1)).template cast<T>() *
-          (-alt_labels);
+      EigenHingeLossGrad<std::decay_t<decltype(place)>, T>::Eval(place, dx, dl,
+                                                                 x, y);
     }
   }
 };
diff --git a/paddle/fluid/operators/im2sequence_op.cc b/paddle/fluid/operators/im2sequence_op.cc
index b973d5d9d8..d248857b8f 100644
--- a/paddle/fluid/operators/im2sequence_op.cc
+++ b/paddle/fluid/operators/im2sequence_op.cc
@@ -192,3 +192,10 @@ REGISTER_OP_CPU_KERNEL(
 REGISTER_OP_CPU_KERNEL(
     im2sequence_grad,
     ops::Im2SequenceGradKernel<paddle::platform::CPUDeviceContext, float>);
+
+REGISTER_OP_CUDA_KERNEL(
+    im2sequence,
+    ops::Im2SequenceKernel<paddle::platform::CUDADeviceContext, float>);
+REGISTER_OP_CUDA_KERNEL(
+    im2sequence_grad,
+    ops::Im2SequenceGradKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/im2sequence_op.cu b/paddle/fluid/operators/im2sequence_op.cu
deleted file mode 100644
index 1c34640618..0000000000
--- a/paddle/fluid/operators/im2sequence_op.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License. */
-#include "paddle/fluid/operators/im2sequence_op.h"
-
-namespace ops = paddle::operators;
-
-REGISTER_OP_CUDA_KERNEL(
-    im2sequence,
-    ops::Im2SequenceKernel<paddle::platform::CUDADeviceContext, float>);
-REGISTER_OP_CUDA_KERNEL(
-    im2sequence_grad,
-    ops::Im2SequenceGradKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h
index 9c9069b722..760d6a63de 100644
--- a/paddle/fluid/operators/im2sequence_op.h
+++ b/paddle/fluid/operators/im2sequence_op.h
@@ -18,6 +18,7 @@
 #include "paddle/fluid/framework/data_layout.h"
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 #include "paddle/fluid/operators/math/im2col.h"
 #include "paddle/fluid/operators/math/math_function.h"
 
@@ -157,7 +158,7 @@ class Im2SequenceGradKernel : public framework::OpKernel<T> {
 
     auto x_v = framework::EigenVector<T>::Flatten(*d_x);
     auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
-    x_v.device(place) = x_v.constant(0.0);
+    EigenConstant<std::decay_t<decltype(place)>, T, 1>::Eval(place, x_v, 0.0);
 
     auto in_dim = in->dims();
     int batch_size = in_dim[0];
diff --git a/paddle/fluid/operators/increment_op.cc b/paddle/fluid/operators/increment_op.cc
index e8edfb99f9..e727f6ceb5 100644
--- a/paddle/fluid/operators/increment_op.cc
+++ b/paddle/fluid/operators/increment_op.cc
@@ -107,3 +107,9 @@ REGISTER_OP_CPU_KERNEL(
     ops::IncrementKernel<paddle::platform::CPUDeviceContext, double>,
     ops::IncrementKernel<paddle::platform::CPUDeviceContext, int>,
     ops::IncrementKernel<paddle::platform::CPUDeviceContext, int64_t>);
+
+REGISTER_OP_CUDA_KERNEL(
+    increment, ops::IncrementKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::IncrementKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::IncrementKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::IncrementKernel<paddle::platform::CUDADeviceContext, int64_t>);
diff --git a/paddle/fluid/operators/increment_op.cu b/paddle/fluid/operators/increment_op.cu
deleted file mode 100644
index 228063bf3d..0000000000
--- a/paddle/fluid/operators/increment_op.cu
+++ /dev/null
@@ -1,22 +0,0 @@
-//   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/operators/increment_op.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    increment, ops::IncrementKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::IncrementKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::IncrementKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::IncrementKernel<paddle::platform::CUDADeviceContext, int64_t>);
diff --git a/paddle/fluid/operators/increment_op.h b/paddle/fluid/operators/increment_op.h
index d0e8c66255..4b9d071464 100644
--- a/paddle/fluid/operators/increment_op.h
+++ b/paddle/fluid/operators/increment_op.h
@@ -15,6 +15,7 @@
 #pragma once
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 
 namespace paddle {
 namespace operators {
@@ -30,8 +31,9 @@ class IncrementKernel : public framework::OpKernel<T> {
     out_tensor->mutable_data<T>(context.GetPlace());
     auto& dev =
         *context.template device_context<DeviceContext>().eigen_device();
-    framework::EigenScalar<T>::From(*out_tensor).device(dev) =
-        framework::EigenScalar<T>::From(*x_tensor) + static_cast<T>(step);
+    EigenAdd<std::decay_t<decltype(dev)>, T>::Eval(
+        dev, framework::EigenScalar<T>::From(*out_tensor),
+        framework::EigenScalar<T>::From(*x_tensor), static_cast<T>(step));
   }
 };
 
diff --git a/paddle/fluid/operators/l1_norm_op.cc b/paddle/fluid/operators/l1_norm_op.cc
index e8f83f6b62..ddd0554add 100644
--- a/paddle/fluid/operators/l1_norm_op.cc
+++ b/paddle/fluid/operators/l1_norm_op.cc
@@ -91,3 +91,9 @@ REGISTER_OP_CPU_KERNEL(
 REGISTER_OP_CPU_KERNEL(
     l1_norm_grad,
     ops::L1NormGradKernel<paddle::platform::CPUDeviceContext, float>);
+
+REGISTER_OP_CUDA_KERNEL(
+    l1_norm, ops::L1NormKernel<paddle::platform::CUDADeviceContext, float>);
+REGISTER_OP_CUDA_KERNEL(
+    l1_norm_grad,
+    ops::L1NormGradKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/l1_norm_op.cu b/paddle/fluid/operators/l1_norm_op.cu
deleted file mode 100644
index a5c29bbf5d..0000000000
--- a/paddle/fluid/operators/l1_norm_op.cu
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/operators/l1_norm_op.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    l1_norm, ops::L1NormKernel<paddle::platform::CUDADeviceContext, float>);
-REGISTER_OP_CUDA_KERNEL(
-    l1_norm_grad,
-    ops::L1NormGradKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/l1_norm_op.h b/paddle/fluid/operators/l1_norm_op.h
index c2a302ed05..918526914d 100644
--- a/paddle/fluid/operators/l1_norm_op.h
+++ b/paddle/fluid/operators/l1_norm_op.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 
 namespace paddle {
 namespace operators {
@@ -33,7 +34,7 @@ class L1NormKernel : public framework::OpKernel<T> {
     auto &place =
         *context.template device_context<DeviceContext>().eigen_device();
 
-    out.device(place) = x.abs().sum();
+    EigenL1Norm<std::decay_t<decltype(place)>, T>::Eval(place, out, x);
   }
 };
 
@@ -59,8 +60,9 @@ class L1NormGradKernel : public framework::OpKernel<T> {
     auto &place =
         *context.template device_context<DeviceContext>().eigen_device();
 
-    Eigen::DSizes<int, 1> x_dsize(x->numel());
-    dx_eigen.device(place) = d_out_eigen.broadcast(x_dsize) * x_eigen.sign();
+    Eigen::DSizes<Eigen::DenseIndex, 1> x_dsize(x->numel());
+    EigenL1NormGrad<std::decay_t<decltype(place)>, T>::Eval(
+        place, dx_eigen, d_out_eigen, x_eigen, x_dsize);
   }
 };
 
diff --git a/paddle/fluid/operators/math/padding.h b/paddle/fluid/operators/math/padding.h
index 379b21c3c1..529d39c9ba 100644
--- a/paddle/fluid/operators/math/padding.h
+++ b/paddle/fluid/operators/math/padding.h
@@ -16,6 +16,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 #include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 
 namespace paddle {
 namespace operators {
@@ -29,7 +30,7 @@ template <typename DeviceContext, typename T, size_t D>
 void PadFunction(const framework::ExecutionContext& context,
                  const std::vector<int>& pads, const framework::Tensor& src,
                  T pad_value, framework::Tensor* out) {
-  Eigen::array<std::pair<int, int>, D> paddings;
+  std::array<std::pair<int64_t, int64_t>, D> paddings;
 
   for (size_t i = 0; i < paddings.size(); ++i) {
     paddings[i].first = pads[i * 2];
@@ -41,14 +42,15 @@ void PadFunction(const framework::ExecutionContext& context,
 
   auto& place =
       *context.template device_context<DeviceContext>().eigen_device();
-  out_tensor.device(place) = src_tensor.pad(paddings, pad_value);
+  EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
+      place, out_tensor, src_tensor, paddings, pad_value);
 }
 
 template <typename DeviceContext, typename T, size_t D>
 void PadGradFunction(const framework::ExecutionContext& context,
                      const std::vector<int>& pads, const framework::Tensor& src,
                      framework::Tensor* d_out) {
-  Eigen::array<std::pair<int, int>, D> paddings;
+  std::array<std::pair<int64_t, int64_t>, D> paddings;
   for (size_t i = 0; i < paddings.size(); ++i) {
     paddings[i].first = -pads[i * 2];
     paddings[i].second = -pads[i * 2 + 1];
@@ -58,7 +60,8 @@ void PadGradFunction(const framework::ExecutionContext& context,
   auto src_tensor = EigenTensor<T, D>::From(src);
   auto& place =
       *context.template device_context<DeviceContext>().eigen_device();
-  d_out_tensor.device(place) = src_tensor.pad(paddings, static_cast<T>(0));
+  EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
+      place, d_out_tensor, src_tensor, paddings, static_cast<T>(0));
 }
 
 template <typename DeviceContext, typename T>
diff --git a/paddle/fluid/operators/minus_op.cc b/paddle/fluid/operators/minus_op.cc
index 5b14d4f687..743a61c744 100644
--- a/paddle/fluid/operators/minus_op.cc
+++ b/paddle/fluid/operators/minus_op.cc
@@ -146,3 +146,6 @@ REGISTER_OPERATOR(minus, ops::MinusOp, ops::MinusOpMaker,
                   ops::MinusGradDescMaker, ops::MinusGradMaker);
 REGISTER_OP_CPU_KERNEL(
     minus, ops::MinusKernel<paddle::platform::CPUDeviceContext, float>);
+
+REGISTER_OP_CUDA_KERNEL(
+    minus, ops::MinusKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/minus_op.cu b/paddle/fluid/operators/minus_op.cu
deleted file mode 100644
index 956d935da9..0000000000
--- a/paddle/fluid/operators/minus_op.cu
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/minus_op.h"
-
-REGISTER_OP_CUDA_KERNEL(
-    minus,
-    paddle::operators::MinusKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/minus_op.h b/paddle/fluid/operators/minus_op.h
index 7791b1456a..2300506c62 100644
--- a/paddle/fluid/operators/minus_op.h
+++ b/paddle/fluid/operators/minus_op.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 
 namespace paddle {
 namespace operators {
@@ -30,9 +31,10 @@ class MinusKernel : public framework::OpKernel<T> {
     out_tensor->mutable_data<T>(context.GetPlace());
     auto& dev =
         *context.template device_context<DeviceContext>().eigen_device();
-    framework::EigenVector<T>::Flatten(*out_tensor).device(dev) =
-        framework::EigenVector<T>::Flatten(*left_tensor) -
-        framework::EigenVector<T>::Flatten(*right_tensor);
+    EigenSub<std::decay_t<decltype(dev)>, T>::Eval(
+        dev, framework::EigenVector<T>::Flatten(*out_tensor),
+        framework::EigenVector<T>::Flatten(*left_tensor),
+        framework::EigenVector<T>::Flatten(*right_tensor));
   }
 };
 
diff --git a/paddle/fluid/operators/pad_constant_like_op.cc b/paddle/fluid/operators/pad_constant_like_op.cc
index 95aaed4453..087b8ecba6 100644
--- a/paddle/fluid/operators/pad_constant_like_op.cc
+++ b/paddle/fluid/operators/pad_constant_like_op.cc
@@ -246,3 +246,18 @@ REGISTER_OP_CPU_KERNEL(
     ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext, int>,
     ops::PadConstantLikeGradKernel<paddle::platform::CPUDeviceContext,
                                    int64_t>);
+
+REGISTER_OP_CUDA_KERNEL(
+    pad_constant_like,
+    ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, int64_t>);
+REGISTER_OP_CUDA_KERNEL(
+    pad_constant_like_grad,
+    ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext,
+                                   int64_t>,
+    ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext,
+                                   double>);
diff --git a/paddle/fluid/operators/pad_constant_like_op.cu b/paddle/fluid/operators/pad_constant_like_op.cu
deleted file mode 100644
index 76faf30ed9..0000000000
--- a/paddle/fluid/operators/pad_constant_like_op.cu
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/operators/pad_constant_like_op.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    pad_constant_like,
-    ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::PadConstantLikeKernel<paddle::platform::CUDADeviceContext, int64_t>);
-REGISTER_OP_CUDA_KERNEL(
-    pad_constant_like_grad,
-    ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext,
-                                   int64_t>,
-    ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::PadConstantLikeGradKernel<paddle::platform::CUDADeviceContext,
-                                   double>);
diff --git a/paddle/fluid/operators/pad_op.cc b/paddle/fluid/operators/pad_op.cc
index 577f4f3941..3bf66c77ba 100644
--- a/paddle/fluid/operators/pad_op.cc
+++ b/paddle/fluid/operators/pad_op.cc
@@ -174,3 +174,16 @@ REGISTER_OP_CPU_KERNEL(
 REGISTER_OP_CPU_KERNEL(
     pad_grad, ops::PadGradKernel<paddle::platform::CPUDeviceContext, float>,
     ops::PadGradKernel<paddle::platform::CPUDeviceContext, double>);
+
+REGISTER_OP_CUDA_KERNEL(
+    pad, ops::PadKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::PadKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::PadKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::PadKernel<paddle::platform::CUDADeviceContext, int64_t>,
+    ops::PadKernel<paddle::platform::CUDADeviceContext,
+                   paddle::platform::float16>);
+REGISTER_OP_CUDA_KERNEL(
+    pad_grad, ops::PadGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::PadGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::PadGradKernel<paddle::platform::CUDADeviceContext,
+                       paddle::platform::float16>);
diff --git a/paddle/fluid/operators/pad_op.cu b/paddle/fluid/operators/pad_op.cu
deleted file mode 100644
index 391e305352..0000000000
--- a/paddle/fluid/operators/pad_op.cu
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-#include "paddle/fluid/operators/pad_op.h"
-
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-REGISTER_OP_CUDA_KERNEL(
-    pad, ops::PadKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::PadKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::PadKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::PadKernel<paddle::platform::CUDADeviceContext, int64_t>,
-    ops::PadKernel<paddle::platform::CUDADeviceContext, plat::float16>);
-REGISTER_OP_CUDA_KERNEL(
-    pad_grad, ops::PadGradKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::PadGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::PadGradKernel<paddle::platform::CUDADeviceContext, plat::float16>);
diff --git a/paddle/fluid/operators/rank_loss_op.cc b/paddle/fluid/operators/rank_loss_op.cc
index ec9d1fde45..01f5b4c732 100644
--- a/paddle/fluid/operators/rank_loss_op.cc
+++ b/paddle/fluid/operators/rank_loss_op.cc
@@ -231,3 +231,10 @@ REGISTER_OP_CPU_KERNEL(
 REGISTER_OP_CPU_KERNEL(
     rank_loss_grad,
     ops::RankLossGradKernel<paddle::platform::CPUDeviceContext, float>);
+
+REGISTER_OP_CUDA_KERNEL(rank_loss,
+                        paddle::operators::RankLossKernel<
+                            paddle::platform::CUDADeviceContext, float>);
+REGISTER_OP_CUDA_KERNEL(rank_loss_grad,
+                        paddle::operators::RankLossGradKernel<
+                            paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/rank_loss_op.cu b/paddle/fluid/operators/rank_loss_op.cu
deleted file mode 100644
index ed80527989..0000000000
--- a/paddle/fluid/operators/rank_loss_op.cu
+++ /dev/null
@@ -1,22 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/rank_loss_op.h"
-
-REGISTER_OP_CUDA_KERNEL(rank_loss,
-                        paddle::operators::RankLossKernel<
-                            paddle::platform::CUDADeviceContext, float>);
-REGISTER_OP_CUDA_KERNEL(rank_loss_grad,
-                        paddle::operators::RankLossGradKernel<
-                            paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/rank_loss_op.h b/paddle/fluid/operators/rank_loss_op.h
index 8609958476..3373c846ce 100644
--- a/paddle/fluid/operators/rank_loss_op.h
+++ b/paddle/fluid/operators/rank_loss_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 
 namespace paddle {
 namespace operators {
@@ -36,8 +37,8 @@ class RankLossKernel : public framework::OpKernel<T> {
     auto right = framework::EigenVector<T>::Flatten(*right_t);
 
     auto& dev = *ctx.template device_context<DeviceContext>().eigen_device();
-    out.device(dev) =
-        (1.0f + (left - right).exp()).log() - label * (left - right);
+    EigenRankLoss<std::decay_t<decltype(dev)>, T>::Eval(dev, out, label, left,
+                                                        right);
   }
 };
 
@@ -65,15 +66,15 @@ class RankLossGradKernel : public framework::OpKernel<T> {
     if (d_left_t) {
       d_left_t->mutable_data<T>(ctx.GetPlace());
       auto d_left = framework::EigenVector<T>::Flatten(*d_left_t);
-      d_left.device(dev) =
-          d_out * (1.0f / (1.0f + (right - left).exp()) - label);
+      EigenRankLossGrad<std::decay_t<decltype(dev)>, T>::EvalLeft(
+          dev, d_left, d_out, label, left, right);
     }
     // compute d_right
     if (d_right_t) {
       d_right_t->mutable_data<T>(ctx.GetPlace());
       auto d_right = framework::EigenVector<T>::Flatten(*d_right_t);
-      d_right.device(dev) =
-          -d_out * (1.0f / (1.0f + (right - left).exp()) - label);
+      EigenRankLossGrad<std::decay_t<decltype(dev)>, T>::EvalRight(
+          dev, d_right, d_out, label, left, right);
     }
   }
 };
diff --git a/paddle/fluid/operators/reverse_op.cc b/paddle/fluid/operators/reverse_op.cc
index 8b2b9f464b..98a1610be6 100644
--- a/paddle/fluid/operators/reverse_op.cc
+++ b/paddle/fluid/operators/reverse_op.cc
@@ -145,4 +145,12 @@ REGISTER_OP_CPU_KERNEL(
     ops::ReverseKernel<paddle::platform::CPUDeviceContext, int64_t>,
     ops::ReverseKernel<paddle::platform::CPUDeviceContext, bool>,
     ops::ReverseKernel<paddle::platform::CPUDeviceContext, float>,
-    ops::ReverseKernel<paddle::platform::CPUDeviceContext, double>)
+    ops::ReverseKernel<paddle::platform::CPUDeviceContext, double>);
+
+REGISTER_OP_CUDA_KERNEL(
+    reverse, ops::ReverseKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::ReverseKernel<paddle::platform::CUDADeviceContext, uint8_t>,
+    ops::ReverseKernel<paddle::platform::CUDADeviceContext, int64_t>,
+    ops::ReverseKernel<paddle::platform::CUDADeviceContext, bool>,
+    ops::ReverseKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::ReverseKernel<paddle::platform::CUDADeviceContext, double>);
diff --git a/paddle/fluid/operators/reverse_op.cu b/paddle/fluid/operators/reverse_op.cu
deleted file mode 100644
index 635c41529b..0000000000
--- a/paddle/fluid/operators/reverse_op.cu
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/operators/reverse_op.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    reverse, ops::ReverseKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::ReverseKernel<paddle::platform::CUDADeviceContext, uint8_t>,
-    ops::ReverseKernel<paddle::platform::CUDADeviceContext, int64_t>,
-    ops::ReverseKernel<paddle::platform::CUDADeviceContext, bool>,
-    ops::ReverseKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::ReverseKernel<paddle::platform::CUDADeviceContext, double>)
diff --git a/paddle/fluid/operators/reverse_op.h b/paddle/fluid/operators/reverse_op.h
index 2813f7a486..bf91e2f57a 100644
--- a/paddle/fluid/operators/reverse_op.h
+++ b/paddle/fluid/operators/reverse_op.h
@@ -16,6 +16,7 @@
 #include <vector>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 
 namespace paddle {
 namespace operators {
@@ -23,7 +24,7 @@ template <typename DeviceContext, typename T, int Rank>
 struct ReverseFunctor {
   void operator()(const DeviceContext& context, const framework::LoDTensor& in,
                   framework::LoDTensor* out, const std::vector<int>& axis) {
-    Eigen::array<bool, Rank> reverse_axis;
+    Eigen::DSizes<bool, Rank> reverse_axis;
     for (int i = 0; i < Rank; ++i) {
       reverse_axis[i] = false;
     }
@@ -37,9 +38,10 @@ struct ReverseFunctor {
 
     auto in_eigen = framework::EigenTensor<T, Rank>::From(in);
     auto out_eigen = framework::EigenTensor<T, Rank>::From(*out);
-    auto* dev = context.eigen_device();
+    auto& dev = *context.eigen_device();
 
-    out_eigen.device(*dev) = in_eigen.reverse(reverse_axis);
+    EigenReverse<std::decay_t<decltype(dev)>, T, Rank>::Eval(
+        dev, out_eigen, in_eigen, reverse_axis);
   }
 };
 
diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc
index a71f49585b..a195452791 100644
--- a/paddle/fluid/operators/scale_op.cc
+++ b/paddle/fluid/operators/scale_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/scale_op.h"
 #include <string>
+#include "paddle/fluid/platform/float16.h"
 
 namespace paddle {
 namespace framework {
@@ -155,3 +156,18 @@ REGISTER_OP_CPU_KERNEL(
     ops::ScaleKernel<paddle::platform::CPUDeviceContext, int16_t>,
     ops::ScaleKernel<paddle::platform::CPUDeviceContext, int>,
     ops::ScaleKernel<paddle::platform::CPUDeviceContext, int64_t>);
+
+REGISTER_OP_CUDA_KERNEL(
+    scale,
+    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, float>,
+    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, double>,
+    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
+                                   uint8_t>,
+    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, int8_t>,
+    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
+                                   int16_t>,
+    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, int>,
+    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
+                                   int64_t>,
+    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
+                                   paddle::platform::float16>);
diff --git a/paddle/fluid/operators/scale_op.cu b/paddle/fluid/operators/scale_op.cu
deleted file mode 100644
index e1f20a73b2..0000000000
--- a/paddle/fluid/operators/scale_op.cu
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/scale_op.h"
-#include "paddle/fluid/platform/float16.h"
-namespace plat = paddle::platform;
-
-REGISTER_OP_CUDA_KERNEL(
-    scale,
-    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, float>,
-    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, double>,
-    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
-                                   uint8_t>,
-    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, int8_t>,
-    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
-                                   int16_t>,
-    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext, int>,
-    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
-                                   int64_t>,
-    paddle::operators::ScaleKernel<paddle::platform::CUDADeviceContext,
-                                   plat::float16>);
diff --git a/paddle/fluid/operators/scale_op.h b/paddle/fluid/operators/scale_op.h
index 11c81d23b2..544f0a9166 100644
--- a/paddle/fluid/operators/scale_op.h
+++ b/paddle/fluid/operators/scale_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 
 namespace paddle {
 namespace operators {
@@ -68,11 +69,8 @@ class ScaleKernel : public framework::OpKernel<T> {
     auto eigen_out = framework::EigenVector<T>::Flatten(*out);
     auto eigen_in = framework::EigenVector<T>::Flatten(*in);
     auto& dev = *ctx.template device_context<DeviceContext>().eigen_device();
-    if (bias_after_scale) {
-      eigen_out.device(dev) = scale * eigen_in + bias;
-    } else {
-      eigen_out.device(dev) = scale * (eigen_in + bias);
-    }
+    EigenScale<std::decay_t<decltype(dev)>, T>::Eval(
+        dev, eigen_out, eigen_in, scale, bias, bias_after_scale);
   }
 };
 
diff --git a/paddle/fluid/operators/sign_op.cc b/paddle/fluid/operators/sign_op.cc
index 3485b4e5c2..6207c33f9d 100644
--- a/paddle/fluid/operators/sign_op.cc
+++ b/paddle/fluid/operators/sign_op.cc
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "paddle/fluid/operators/sign_op.h"
 #include <memory>
+#include "paddle/fluid/platform/float16.h"
 
 namespace paddle {
 namespace operators {
@@ -69,3 +70,10 @@ REGISTER_OPERATOR(sign, ops::SignOp, ops::SignOpMaker<float>,
 REGISTER_OP_CPU_KERNEL(
     sign, ops::SignKernel<paddle::platform::CPUDeviceContext, float>,
     ops::SignKernel<paddle::platform::CPUDeviceContext, double>);
+
+REGISTER_OP_CUDA_KERNEL(
+    sign,
+    paddle::operators::SignKernel<paddle::platform::CUDADeviceContext, float>,
+    paddle::operators::SignKernel<paddle::platform::CUDADeviceContext, double>,
+    paddle::operators::SignKernel<paddle::platform::CUDADeviceContext,
+                                  paddle::platform::float16>);
diff --git a/paddle/fluid/operators/sign_op.cu b/paddle/fluid/operators/sign_op.cu
deleted file mode 100644
index 817e0fbbd5..0000000000
--- a/paddle/fluid/operators/sign_op.cu
+++ /dev/null
@@ -1,23 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/sign_op.h"
-#include "paddle/fluid/platform/float16.h"
-
-REGISTER_OP_CUDA_KERNEL(
-    sign,
-    paddle::operators::SignKernel<paddle::platform::CUDADeviceContext, float>,
-    paddle::operators::SignKernel<paddle::platform::CUDADeviceContext, double>,
-    paddle::operators::SignKernel<paddle::platform::CUDADeviceContext,
-                                  paddle::platform::float16>);
diff --git a/paddle/fluid/operators/sign_op.h b/paddle/fluid/operators/sign_op.h
index b99934daee..b6d501afa6 100644
--- a/paddle/fluid/operators/sign_op.h
+++ b/paddle/fluid/operators/sign_op.h
@@ -16,6 +16,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 
 namespace paddle {
 namespace operators {
@@ -31,7 +32,8 @@ class SignKernel : public framework::OpKernel<T> {
     auto eigen_in = framework::EigenVector<T>::Flatten(*in);
     auto& place =
         *context.template device_context<DeviceContext>().eigen_device();
-    eigen_out.device(place) = eigen_in.sign();
+    EigenSign<std::decay_t<decltype(place)>, T>::Eval(place, eigen_out,
+                                                      eigen_in);
   }
 };
 
diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc
index 0a41424cfa..c37fd679be 100644
--- a/paddle/fluid/operators/slice_op.cc
+++ b/paddle/fluid/operators/slice_op.cc
@@ -449,3 +449,28 @@ REGISTER_OP_CPU_KERNEL(
                          paddle::platform::complex64>,
     ops::SliceGradKernel<paddle::platform::CPUDeviceContext,
                          paddle::platform::complex128>);
+
+REGISTER_OP_CUDA_KERNEL(
+    slice, ops::SliceKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::SliceKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::SliceKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::SliceKernel<paddle::platform::CUDADeviceContext, int64_t>,
+    ops::SliceKernel<paddle::platform::CUDADeviceContext,
+                     paddle::platform::float16>,
+    ops::SliceKernel<paddle::platform::CUDADeviceContext,
+                     paddle::platform::complex64>,
+    ops::SliceKernel<paddle::platform::CUDADeviceContext,
+                     paddle::platform::complex128>);
+
+REGISTER_OP_CUDA_KERNEL(
+    slice_grad,
+    ops::SliceGradKernel<paddle::platform::CUDADeviceContext, float>,
+    ops::SliceGradKernel<paddle::platform::CUDADeviceContext, double>,
+    ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int>,
+    ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
+    ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
+                         paddle::platform::float16>,
+    ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
+                         paddle::platform::complex64>,
+    ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
+                         paddle::platform::complex128>);
diff --git a/paddle/fluid/operators/slice_op.cu b/paddle/fluid/operators/slice_op.cu
deleted file mode 100644
index 5f80d3cc97..0000000000
--- a/paddle/fluid/operators/slice_op.cu
+++ /dev/null
@@ -1,39 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/slice_op.h"
-#include "paddle/fluid/platform/float16.h"
-
-namespace ops = paddle::operators;
-namespace plat = paddle::platform;
-
-REGISTER_OP_CUDA_KERNEL(
-    slice, ops::SliceKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::SliceKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::SliceKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::SliceKernel<paddle::platform::CUDADeviceContext, int64_t>,
-    ops::SliceKernel<paddle::platform::CUDADeviceContext, plat::float16>,
-    ops::SliceKernel<paddle::platform::CUDADeviceContext, plat::complex64>,
-    ops::SliceKernel<paddle::platform::CUDADeviceContext, plat::complex128>);
-
-REGISTER_OP_CUDA_KERNEL(
-    slice_grad,
-    ops::SliceGradKernel<paddle::platform::CUDADeviceContext, float>,
-    ops::SliceGradKernel<paddle::platform::CUDADeviceContext, double>,
-    ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int>,
-    ops::SliceGradKernel<paddle::platform::CUDADeviceContext, int64_t>,
-    ops::SliceGradKernel<paddle::platform::CUDADeviceContext, plat::float16>,
-    ops::SliceGradKernel<paddle::platform::CUDADeviceContext, plat::complex64>,
-    ops::SliceGradKernel<paddle::platform::CUDADeviceContext,
-                         plat::complex128>);
diff --git a/paddle/fluid/operators/slice_op.h b/paddle/fluid/operators/slice_op.h
index 22f6fa9e3e..3d294ae238 100644
--- a/paddle/fluid/operators/slice_op.h
+++ b/paddle/fluid/operators/slice_op.h
@@ -17,6 +17,7 @@ limitations under the License. */
 #include <utility>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 #include "paddle/fluid/operators/math/math_function.h"
 #include "paddle/fluid/operators/utils.h"
 
@@ -238,8 +239,8 @@ class SliceKernel : public framework::OpKernel<T> {
     out->mutable_data<T>(context.GetPlace());
 
     auto new_out_dims = out->dims();
-    auto offsets = Eigen::array<int64_t, D>();
-    auto extents = Eigen::array<int64_t, D>();
+    auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
+    auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
     for (size_t i = 0; i < D; ++i) {
       offsets[i] = 0;
       extents[i] = new_out_dims[i];
@@ -268,10 +269,12 @@ class SliceKernel : public framework::OpKernel<T> {
         offsets_32bit[i] = offsets[i];
         extents_32bit[i] = extents[i];
       }
-      framework::To32BitIndex(out_t).device(place) =
-          framework::To32BitIndex(in_t).slice(offsets_32bit, extents_32bit);
+      EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(
+          place, framework::To32BitIndex(out_t), framework::To32BitIndex(in_t),
+          offsets_32bit, extents_32bit);
     } else {
-      out_t.device(place) = in_t.slice(offsets, extents);
+      EigenSlice<std::decay_t<decltype(place)>, T, D>::Eval(place, out_t, in_t,
+                                                            offsets, extents);
     }
 
     out->Resize(out_dims);
@@ -624,10 +627,12 @@ class SliceGradKernel : public framework::OpKernel<T> {
         paddings_32bit[i] =
             std::make_pair(paddings[i].first, paddings[i].second);
       }
-      framework::To32BitIndex(d_in_t).device(place) =
-          framework::To32BitIndex(d_out_t).pad(paddings_32bit, T(0));
+      EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
+          place, framework::To32BitIndex(d_in_t),
+          framework::To32BitIndex(d_out_t), paddings_32bit, static_cast<T>(0));
     } else {
-      d_in_t.device(place) = d_out_t.pad(paddings, T(0));
+      EigenPad<std::decay_t<decltype(place)>, T, D>::Eval(
+          place, d_in_t, d_out_t, paddings, static_cast<T>(0));
     }
   }
 };
diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h
index 586cbda7cc..68e6e049cd 100644
--- a/paddle/fluid/pybind/tensor_py.h
+++ b/paddle/fluid/pybind/tensor_py.h
@@ -24,6 +24,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/memory/memcpy.h"
+#include "paddle/fluid/operators/eigen/eigen_function.h"
 #include "paddle/fluid/operators/math/concat_and_split.h"
 #include "paddle/fluid/operators/strided_memcpy.h"
 #include "paddle/fluid/platform/bfloat16.h"
@@ -402,8 +403,8 @@ void _sliceCompute(const framework::Tensor *in, framework::Tensor *out,
   auto out_dims = out->dims();
   auto in_dims = in->dims();
 
-  auto offsets = Eigen::array<int, D>();
-  auto extents = Eigen::array<int, D>();
+  auto offsets = Eigen::DSizes<Eigen::DenseIndex, D>();
+  auto extents = Eigen::DSizes<Eigen::DenseIndex, D>();
   for (size_t i = 0; i < D; ++i) {
     offsets[i] = 0;
     extents[i] = out_dims[i];
@@ -423,7 +424,8 @@ void _sliceCompute(const framework::Tensor *in, framework::Tensor *out,
   auto out_t =
       framework::EigenTensor<T, D, Eigen::RowMajor, Eigen::DenseIndex>::From(
           *out);
-  out_t.device(eigen_place) = in_t.slice(offsets, extents);
+  operators::EigenSlice<std::decay_t<decltype(eigen_place)>, T, D>::Eval(
+      eigen_place, out_t, in_t, offsets, extents);
 }
 
 template <typename T>
-- 
GitLab