diff --git a/paddle/fluid/operators/jit/gen/jitcode.h b/paddle/fluid/operators/jit/gen/jitcode.h
index 765952fc35276554fc8fcbf19208f81bedd611c6..64126e3f61a8bd63db9c68d6438386c68dd13859 100644
--- a/paddle/fluid/operators/jit/gen/jitcode.h
+++ b/paddle/fluid/operators/jit/gen/jitcode.h
@@ -56,10 +56,6 @@ typedef enum {
   identity
 } operand_type;
 
-#define XMM_FLOAT_BLOCK 4
-#define YMM_FLOAT_BLOCK 8
-#define ZMM_FLOAT_BLOCK 16
-
 #define DECLARE_JIT_CODE(codename) \
   const char* name() const override { return #codename; }
 
diff --git a/paddle/fluid/operators/jit/helper.h b/paddle/fluid/operators/jit/helper.h
index 3431c22111f948b3d5261a59feb761668300ce24..44952fb90797f8bb51f891c4ea69fcf0b7f1dbe5 100644
--- a/paddle/fluid/operators/jit/helper.h
+++ b/paddle/fluid/operators/jit/helper.h
@@ -27,10 +27,6 @@ namespace paddle {
 namespace operators {
 namespace jit {
 
-#define SIGMOID_THRESHOLD_MIN -40.0
-#define SIGMOID_THRESHOLD_MAX 13.0
-#define EXP_MAX_INPUT 40.0
-
 template <KernelType KT, typename KernelTuples, typename PlaceType>
 inline typename std::enable_if<
     std::is_same<typename KernelTuples::data_type, float>::value &&
diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h
index 00d583c60bf73582dab7df75ec8feac1b8f3c3c9..f10d9f3fdd65b280aa59694e6890448e5418cc2d 100644
--- a/paddle/fluid/operators/jit/kernel_base.h
+++ b/paddle/fluid/operators/jit/kernel_base.h
@@ -13,6 +13,7 @@
  * limitations under the License. */
 
 #pragma once
+#include "paddle/fluid/operators/jit/macro.h"
 #include "paddle/fluid/platform/macros.h"
 
 namespace paddle {
diff --git a/paddle/fluid/operators/jit/macro.h b/paddle/fluid/operators/jit/macro.h
new file mode 100644
index 0000000000000000000000000000000000000000..b2622eba8b70cc553a2da44638d577c9d7751b25
--- /dev/null
+++ b/paddle/fluid/operators/jit/macro.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#pragma once
+#include <type_traits>
+
+namespace paddle {
+namespace operators {
+namespace jit {
+
+#define SIGMOID_THRESHOLD_MIN -40.0
+#define SIGMOID_THRESHOLD_MAX 13.0
+#define EXP_MAX_INPUT 40.0
+
+#define XMM_FLOAT_BLOCK 4
+#define YMM_FLOAT_BLOCK 8
+#define ZMM_FLOAT_BLOCK 16
+
+}  // namespace jit
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
index ffecb732975a652456b154a61feb8a20a727d306..3ecb520392e59d995a68ee8d10022b53d1bb1ddd 100644
--- a/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
+++ b/paddle/fluid/operators/jit/more/mkl/CMakeLists.txt
@@ -6,3 +6,6 @@ set(JIT_KERNEL_DEPS ${JIT_KERNEL_DEPS} dynload_mklml jit_kernel_mkl PARENT_SCOPE
 USE_JITKERNEL_MORE(vmul, mkl)
 USE_JITKERNEL_MORE(vadd, mkl)
 USE_JITKERNEL_MORE(vscal, mkl)
+USE_JITKERNEL_MORE(vexp, mkl)
+USE_JITKERNEL_MORE(vsigmoid, mkl)
+USE_JITKERNEL_MORE(vtanh, mkl)
diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.cc b/paddle/fluid/operators/jit/more/mkl/mkl.cc
index 3d963cbf1dd5468afc717178f1a53234d8e14a99..42f6df576b1b025ac8c42cc331993f56e9f42e1b 100644
--- a/paddle/fluid/operators/jit/more/mkl/mkl.cc
+++ b/paddle/fluid/operators/jit/more/mkl/mkl.cc
@@ -62,6 +62,16 @@ void VScal<double>(const double* a, const double* x, double* y, int n) {
   }
 }
 
+template <>
+void VExp<float>(const float* x, float* y, int n) {
+  platform::dynload::vsExp(n, x, y);
+}
+
+template <>
+void VExp<double>(const double* x, double* y, int n) {
+  platform::dynload::vdExp(n, x, y);
+}
+
 // TODO(TJ): tuning me carefully on AVX, AVX2 and AVX512
 template <>
 bool VMulKernel<float>::UseMe(int d) const {
@@ -78,6 +88,21 @@ bool VScalKernel<float>::UseMe(int d) const {
   return platform::MayIUse(platform::avx512f) && d > 512;
 }
 
+template <>
+bool VExpKernel<float>::UseMe(int d) const {
+  return d > 7;
+}
+
+template <>
+bool VSigmoidKernel<float>::UseMe(int d) const {
+  return d > 7;
+}
+
+template <>
+bool VTanhKernel<float>::UseMe(int d) const {
+  return d > 7;
+}
+
 #define AWALYS_USE_ME_WITH_DOUBLE(func)           \
   template <>                                     \
   bool func##Kernel<double>::UseMe(int d) const { \
@@ -87,6 +112,9 @@ bool VScalKernel<float>::UseMe(int d) const {
 AWALYS_USE_ME_WITH_DOUBLE(VMul);
 AWALYS_USE_ME_WITH_DOUBLE(VAdd);
 AWALYS_USE_ME_WITH_DOUBLE(VScal);
+AWALYS_USE_ME_WITH_DOUBLE(VExp);
+AWALYS_USE_ME_WITH_DOUBLE(VSigmoid);
+AWALYS_USE_ME_WITH_DOUBLE(VTanh);
 
 #undef AWALYS_USE_ME_WITH_DOUBLE
 }  // namespace mkl
@@ -104,5 +132,8 @@ namespace mkl = paddle::operators::jit::more::mkl;
 REGISTER_MKL_KERNEL(vmul, VMul);
 REGISTER_MKL_KERNEL(vadd, VAdd);
 REGISTER_MKL_KERNEL(vscal, VScal);
+REGISTER_MKL_KERNEL(vexp, VExp);
+REGISTER_MKL_KERNEL(vsigmoid, VSigmoid);
+REGISTER_MKL_KERNEL(vtanh, VTanh);
 
 #undef REGISTER_MKL_KERNEL
diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h
index 84a93f408f51e444c62ea3b70fba8daab280fed0..bf209d2f9d23dbd1376b0ea2e2ac007280dded4c 100644
--- a/paddle/fluid/operators/jit/more/mkl/mkl.h
+++ b/paddle/fluid/operators/jit/more/mkl/mkl.h
@@ -32,6 +32,34 @@ void VAdd(const T* x, const T* y, T* z, int n);
 template <typename T>
 void VScal(const T* a, const T* x, T* y, int n);
 
+template <typename T>
+void VExp(const T* x, T* y, int n);
+
+template <typename T>
+void VSigmoid(const T* x, T* y, int n) {
+  const T min = SIGMOID_THRESHOLD_MIN;
+  const T max = SIGMOID_THRESHOLD_MAX;
+  for (int i = 0; i < n; ++i) {
+    y[i] = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]);
+    y[i] = static_cast<T>(0) - y[i];
+  }
+  VExp(y, y, n);
+  for (int i = 0; i < n; ++i) {
+    y[i] = static_cast<T>(1) / (static_cast<T>(1) + y[i]);
+  }
+}
+
+template <typename T>
+void VTanh(const T* x, T* y, int n) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = static_cast<T>(2) * x[i];
+  }
+  VSigmoid(y, y, n);
+  for (int i = 0; i < n; ++i) {
+    y[i] = static_cast<T>(2) * y[i] - static_cast<T>(1);
+  }
+}
+
 #define DECLARE_MKL_KERNEL(name, tuples)                      \
   template <typename T>                                       \
   class name##Kernel : public KernelImpl<tuples<T>> {         \
@@ -47,6 +75,11 @@ DECLARE_MKL_KERNEL(VAdd, XYZNTuples);
 // AXYN
 DECLARE_MKL_KERNEL(VScal, AXYNTuples);
 
+// XYN
+DECLARE_MKL_KERNEL(VExp, XYNTuples);
+DECLARE_MKL_KERNEL(VSigmoid, XYNTuples);
+DECLARE_MKL_KERNEL(VTanh, XYNTuples);
+
 #undef DECLARE_MKL_KERNEL
 
 }  // namespace mkl
diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc
index 62d4cdc19ae05870789ca624a454ba9080d82e3b..e211276d189132b113c0ca531b590b31ac084812 100644
--- a/paddle/fluid/operators/jit/test.cc
+++ b/paddle/fluid/operators/jit/test.cc
@@ -312,7 +312,7 @@ void TestXYNKernel() {
 
     std::vector<T> x(d), yref(d);
     std::vector<T> xinp(d);  // inplace test
-    RandomVec<T>(d, x.data());
+    RandomVec<T>(d, x.data(), -2.f, 2.f);
     std::copy(x.begin(), x.end(), xinp.begin());
 
     const T* x_data = x.data();
diff --git a/paddle/fluid/operators/math/jit_kernel_exp.cc b/paddle/fluid/operators/math/jit_kernel_exp.cc
index 7945cfb253a61b7d1191c39537254126e2bb85dd..1f97ed1e62c506caa8edbb89b3b6c9b6a7032fb3 100644
--- a/paddle/fluid/operators/math/jit_kernel_exp.cc
+++ b/paddle/fluid/operators/math/jit_kernel_exp.cc
@@ -30,47 +30,6 @@ namespace operators {
 namespace math {
 namespace jitkernel {
 
-#ifdef PADDLE_WITH_MKLML
-// try to use MKL to speedup
-template <typename T>
-void VExpMKL(const T* x, T* y, int n);
-
-template <>
-void VExpMKL<float>(const float* x, float* y, int n) {
-  platform::dynload::vsExp(n, x, y);
-}
-
-template <>
-void VExpMKL<double>(const double* x, double* y, int n) {
-  platform::dynload::vdExp(n, x, y);
-}
-
-template <typename T>
-void VSigmoidMKL(const T* x, T* y, int n) {
-  const T min = SIGMOID_THRESHOLD_MIN;
-  const T max = SIGMOID_THRESHOLD_MAX;
-  for (int i = 0; i < n; ++i) {
-    y[i] = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]);
-    y[i] = static_cast<T>(0) - y[i];
-  }
-  VExpMKL(y, y, n);
-  for (int i = 0; i < n; ++i) {
-    y[i] = static_cast<T>(1) / (static_cast<T>(1) + y[i]);
-  }
-}
-
-template <typename T>
-void VTanhMKL(const T* x, T* y, int n) {
-  for (int i = 0; i < n; ++i) {
-    y[i] = static_cast<T>(2) * x[i];
-  }
-  VSigmoidMKL(y, y, n);
-  for (int i = 0; i < n; ++i) {
-    y[i] = static_cast<T>(2) * y[i] - static_cast<T>(1);
-  }
-}
-#endif
-
 /* VExp JitKernel */
 template <typename T>
 class VExpKernelImpl : public VExpKernel<T> {