diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc
index 87fda12ea631b9d9e40962aea2b744983566b733..14985a3f74aa234d22a774dee3c9b46c75d24d8f 100644
--- a/paddle/fluid/operators/attention_lstm_op.cc
+++ b/paddle/fluid/operators/attention_lstm_op.cc
@@ -15,9 +15,9 @@ limitations under the License. */
 #include "paddle/fluid/operators/attention_lstm_op.h"
 #include <string>
 #include "paddle/fluid/operators/math/blas.h"
+#include "paddle/fluid/operators/math/cpu_vec.h"
 #include "paddle/fluid/operators/math/fc_compute.h"
-// #include "paddle/fluid/operators/math/detail/activation_functions.h"
-// #include "paddle/fluid/operators/math/cpu_vec.h"
+#include "paddle/fluid/platform/cpu_info.h"
 
 namespace paddle {
 namespace operators {
@@ -230,13 +230,6 @@ use lstm_x_t as input and compute as standard LSTM.
 )DOC");
 }
 
-template <typename T>
-inline void vec_relu(const int n, const T* x, T* y) {
-  for (int i = 0; i < n; ++i) {
-    y[i] = x[i] > 0 ? x[i] : 0;
-  }
-}
-
 // y[i] = (x[i] + bias[0]) > 0 ? (x[i] + bias[0]) : 0;
 template <typename T>
 inline void bias_relu(const int n, const T* x, const T* bias, T* y) {
@@ -244,9 +237,9 @@ inline void bias_relu(const int n, const T* x, const T* bias, T* y) {
     for (int i = 0; i < n; ++i) {
       y[i] = x[i] + bias[0];
     }
-    vec_relu<T>(n, y, y);
+    math::vec_relu<T>(n, y, y);
   } else {
-    vec_relu<T>(n, x, y);
+    math::vec_relu<T>(n, x, y);
   }
 }
 
@@ -277,37 +270,6 @@ inline void vec_softmax(const math::BlasT<DeviceContext, T>& blas, const int n,
   blas.SCAL(n, static_cast<T>(1) / scalar, y);
 }
 
-#define SIGMOID_THRESHOLD_MIN -40.0
-#define SIGMOID_THRESHOLD_MAX 13.0
-#define EXP_MAX_INPUT 40.0
-
-template <typename T>
-inline T sigmoid(T x) {
-  return 1. / (1. + exp(-x));
-}
-
-template <typename T>
-inline T tanh(T x) {
-  return 2. * sigmoid(2. * x) - 1.;
-}
-
-template <typename T>
-inline void vec_sigmoid(const int n, const T* x, T* y) {
-  const T min = SIGMOID_THRESHOLD_MIN;
-  const T max = SIGMOID_THRESHOLD_MAX;
-  for (int i = 0; i < n; ++i) {
-    T tmp = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]);
-    y[i] = 1.0 / (1.0 + std::exp(-tmp));
-  }
-}
-
-template <typename T>
-inline void vec_tanh(const int n, const T* x, T* y) {
-  for (int i = 0; i < n; ++i) {
-    y[i] = tanh<T>(x[i]);
-  }
-}
-
 template <typename T>
 class AttentionLSTMKernel : public framework::OpKernel<T> {
  public:
@@ -351,6 +313,10 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
     fc_out->Resize({max_seq_len, 1});
 
     // TODO(TJ): act functor init here
+    // if (platform::jit::MayIUse(platform::jit::avx2)) {
+    // } else if (platform::jit::MayIUse(platform::jit::avx)) {
+    // } else {
+    // }
 
     const T* x_data = x->data<T>();
     const T* h0_data = h0->data<T>();
@@ -418,9 +384,9 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
         blas.VADD(D4, lstm_b_data, lstm_out_data, lstm_out_data);
 
         // gate act: sigmoid
-        vec_sigmoid(D3, lstm_out_data, lstm_out_data);
+        math::vec_sigmoid(D3, lstm_out_data, lstm_out_data);
         // candicate act: tanh
-        vec_tanh(D, lstm_out_data + D3, lstm_out_data + D3);
+        math::vec_tanh(D, lstm_out_data + D3, lstm_out_data + D3);
 
         // a = forget * prev_cell
         blas.VMUL(D, lstm_out_data, prev_cell_data, lstm_out_data);
@@ -432,7 +398,7 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
         blas.VADD(D, lstm_out_data, lstm_out_data + D, cur_cell_out_data);
 
         // state act tanh(cell_out) * output_gate
-        vec_tanh(D, cur_cell_out_data, lstm_out_data);
+        math::vec_tanh(D, cur_cell_out_data, lstm_out_data);
         blas.VMUL(D, lstm_out_data, lstm_out_data + D2, cur_hidden_out_data);
 
         prev_hidden_data = cur_hidden_out_data;
diff --git a/paddle/fluid/operators/math/cpu_vec.h b/paddle/fluid/operators/math/cpu_vec.h
new file mode 100644
index 0000000000000000000000000000000000000000..29476fce7095a4db9f37b562b4c4bd36538a4077
--- /dev/null
+++ b/paddle/fluid/operators/math/cpu_vec.h
@@ -0,0 +1,81 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/fluid/platform/cpu_info.h"
+
+namespace paddle {
+namespace operators {
+namespace math {
+
+#define SIGMOID_THRESHOLD_MIN -40.0
+#define SIGMOID_THRESHOLD_MAX 13.0
+#define EXP_MAX_INPUT 40.0
+
+template <typename T>
+inline T sigmoid(T x) {
+  return 1. / (1. + exp(-x));
+}
+
+template <typename T>
+inline T tanh(T x) {
+  return 2. * sigmoid(2. * x) - 1.;
+}
+
+template <typename T, platform::jit::cpu_isa_t isa = platform::jit::isa_any>
+inline void vec_sigmoid(const int n, const T* x, T* y) {
+  const T min = SIGMOID_THRESHOLD_MIN;
+  const T max = SIGMOID_THRESHOLD_MAX;
+  for (int i = 0; i < n; ++i) {
+    T tmp = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]);
+    y[i] = 1.0 / (1.0 + std::exp(-tmp));
+  }
+}
+
+template <typename T, platform::jit::cpu_isa_t isa = platform::jit::isa_any>
+inline void vec_tanh(const int n, const T* x, T* y) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = tanh<T>(x[i]);
+  }
+}
+
+template <typename T, platform::jit::cpu_isa_t isa = platform::jit::isa_any>
+inline void vec_relu(const int n, const T* x, T* y) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = x[i] > 0 ? x[i] : 0;
+  }
+}
+
+template <>
+inline void vec_relu<float, platform::jit::avx2>(const int n, const float* x,
+                                                 float* y) {
+  // TODO(TJ): complete me
+  for (int i = 0; i < n; ++i) {
+    y[i] = x[i] > 0 ? x[i] : 0;
+  }
+}
+
+template <>
+inline void vec_relu<float, platform::jit::avx>(const int n, const float* x,
+                                                float* y) {
+  // TODO(TJ): complete me
+  for (int i = 0; i < n; ++i) {
+    y[i] = x[i] > 0 ? x[i] : 0;
+  }
+}
+
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle
diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc
index 7d53a684d6068c79659719159696ef5aebfeaa2b..79a924434ba68bd4607e50f9e3272ad17dec5e74 100644
--- a/paddle/fluid/platform/cpu_info.cc
+++ b/paddle/fluid/platform/cpu_info.cc
@@ -112,6 +112,8 @@ bool MayIUse(const cpu_isa_t cpu_isa) {
   switch (cpu_isa) {
     case sse42:
       return cpu.has(Cpu::tSSE42);
+    case avx:
+      return cpu.has(Cpu::tAVX);
     case avx2:
       return cpu.has(Cpu::tAVX2);
     case avx512_common:
diff --git a/paddle/fluid/platform/cpu_info.h b/paddle/fluid/platform/cpu_info.h
index f5f67667594f1ab80058533e4c5d5b04c2592b60..2baa21c1bd142c77b3fb84786eec30abe1a4a8df 100644
--- a/paddle/fluid/platform/cpu_info.h
+++ b/paddle/fluid/platform/cpu_info.h
@@ -43,6 +43,7 @@ namespace jit {
 typedef enum {
   isa_any,
   sse42,
+  avx,
   avx2,
   avx512_common,
   avx512_core,