add cpu vec

ec59f0d4 · tensor-tang · cf5ea925 · ec59f0d4 · ec59f0d4 · ec59f0d4
4 changed file
--- a/paddle/fluid/operators/attention_lstm_op.cc
+++ b/paddle/fluid/operators/attention_lstm_op.cc
@@ -15,9 +15,9 @@ limitations under the License. */
 #include "paddle/fluid/operators/attention_lstm_op.h"
 #include <string>
 #include "paddle/fluid/operators/math/blas.h"
+#include "paddle/fluid/operators/math/cpu_vec.h"
 #include "paddle/fluid/operators/math/fc_compute.h"
-// #include "paddle/fluid/operators/math/detail/activation_functions.h"
+#include "paddle/fluid/platform/cpu_info.h"
-// #include "paddle/fluid/operators/math/cpu_vec.h"
 namespace paddle {
 namespace operators {
@@ -230,13 +230,6 @@ use lstm_x_t as input and compute as standard LSTM.
 )DOC");
 }
-template <typename T>
-inline void vec_relu(const int n, const T* x, T* y) {
-  for (int i = 0; i < n; ++i) {
-    y[i] = x[i] > 0 ? x[i] : 0;
-  }
-}
 // y[i] = (x[i] + bias[0]) > 0 ? (x[i] + bias[0]) : 0;
 template <typename T>
 inline void bias_relu(const int n, const T* x, const T* bias, T* y) {
@@ -244,9 +237,9 @@ inline void bias_relu(const int n, const T* x, const T* bias, T* y) {
    for (int i = 0; i < n; ++i) {
      y[i] = x[i] + bias[0];
    }
-    vec_relu<T>(n, y, y);
+    math::vec_relu<T>(n, y, y);
  } else {
-    vec_relu<T>(n, x, y);
+    math::vec_relu<T>(n, x, y);
  }
 }
@@ -277,37 +270,6 @@ inline void vec_softmax(const math::BlasT<DeviceContext, T>& blas, const int n,
  blas.SCAL(n, static_cast<T>(1) / scalar, y);
 }
-#define SIGMOID_THRESHOLD_MIN -40.0
-#define SIGMOID_THRESHOLD_MAX 13.0
-#define EXP_MAX_INPUT 40.0
-template <typename T>
-inline T sigmoid(T x) {
-  return 1. / (1. + exp(-x));
-}
-template <typename T>
-inline T tanh(T x) {
-  return 2. * sigmoid(2. * x) - 1.;
-}
-template <typename T>
-inline void vec_sigmoid(const int n, const T* x, T* y) {
-  const T min = SIGMOID_THRESHOLD_MIN;
-  const T max = SIGMOID_THRESHOLD_MAX;
-  for (int i = 0; i < n; ++i) {
-    T tmp = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]);
-    y[i] = 1.0 / (1.0 + std::exp(-tmp));
-  }
-}
-template <typename T>
-inline void vec_tanh(const int n, const T* x, T* y) {
-  for (int i = 0; i < n; ++i) {
-    y[i] = tanh<T>(x[i]);
-  }
-}
 template <typename T>
 class AttentionLSTMKernel : public framework::OpKernel<T> {
 public:
@@ -351,6 +313,10 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
    fc_out->Resize({max_seq_len, 1});
    // TODO(TJ): act functor init here
+    // if (platform::jit::MayIUse(platform::jit::avx2)) {
+    // } else if (platform::jit::MayIUse(platform::jit::avx)) {
+    // } else {
+    // }
    const T* x_data = x->data<T>();
    const T* h0_data = h0->data<T>();
@@ -418,9 +384,9 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
        blas.VADD(D4, lstm_b_data, lstm_out_data, lstm_out_data);
        // gate act: sigmoid
-        vec_sigmoid(D3, lstm_out_data, lstm_out_data);
+        math::vec_sigmoid(D3, lstm_out_data, lstm_out_data);
        // candicate act: tanh
-        vec_tanh(D, lstm_out_data + D3, lstm_out_data + D3);
+        math::vec_tanh(D, lstm_out_data + D3, lstm_out_data + D3);
        // a = forget * prev_cell
        blas.VMUL(D, lstm_out_data, prev_cell_data, lstm_out_data);
@@ -432,7 +398,7 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
        blas.VADD(D, lstm_out_data, lstm_out_data + D, cur_cell_out_data);
        // state act tanh(cell_out) * output_gate
-        vec_tanh(D, cur_cell_out_data, lstm_out_data);
+        math::vec_tanh(D, cur_cell_out_data, lstm_out_data);
        blas.VMUL(D, lstm_out_data, lstm_out_data + D2, cur_hidden_out_data);
        prev_hidden_data = cur_hidden_out_data;

--- a/paddle/fluid/operators/math/cpu_vec.h
+++ b/paddle/fluid/operators/math/cpu_vec.h
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include "paddle/fluid/platform/cpu_info.h"
+namespace paddle {
+namespace operators {
+namespace math {
+#define SIGMOID_THRESHOLD_MIN -40.0
+#define SIGMOID_THRESHOLD_MAX 13.0
+#define EXP_MAX_INPUT 40.0
+template <typename T>
+inline T sigmoid(T x) {
+  return 1. / (1. + exp(-x));
+}
+template <typename T>
+inline T tanh(T x) {
+  return 2. * sigmoid(2. * x) - 1.;
+}
+template <typename T, platform::jit::cpu_isa_t isa = platform::jit::isa_any>
+inline void vec_sigmoid(const int n, const T* x, T* y) {
+  const T min = SIGMOID_THRESHOLD_MIN;
+  const T max = SIGMOID_THRESHOLD_MAX;
+  for (int i = 0; i < n; ++i) {
+    T tmp = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]);
+    y[i] = 1.0 / (1.0 + std::exp(-tmp));
+  }
+}
+template <typename T, platform::jit::cpu_isa_t isa = platform::jit::isa_any>
+inline void vec_tanh(const int n, const T* x, T* y) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = tanh<T>(x[i]);
+  }
+}
+template <typename T, platform::jit::cpu_isa_t isa = platform::jit::isa_any>
+inline void vec_relu(const int n, const T* x, T* y) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = x[i] > 0 ? x[i] : 0;
+  }
+}
+template <>
+inline void vec_relu<float, platform::jit::avx2>(const int n, const float* x,
+                                                 float* y) {
+  // TODO(TJ): complete me
+  for (int i = 0; i < n; ++i) {
+    y[i] = x[i] > 0 ? x[i] : 0;
+  }
+}
+template <>
+inline void vec_relu<float, platform::jit::avx>(const int n, const float* x,
+                                                float* y) {
+  // TODO(TJ): complete me
+  for (int i = 0; i < n; ++i) {
+    y[i] = x[i] > 0 ? x[i] : 0;
+  }
+}
+}  // namespace math
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/fluid/platform/cpu_info.cc
+++ b/paddle/fluid/platform/cpu_info.cc
@@ -112,6 +112,8 @@ bool MayIUse(const cpu_isa_t cpu_isa) {
  switch (cpu_isa) {
    case sse42:
      return cpu.has(Cpu::tSSE42);
+    case avx:
+      return cpu.has(Cpu::tAVX);
    case avx2:
      return cpu.has(Cpu::tAVX2);
    case avx512_common:

--- a/paddle/fluid/platform/cpu_info.h
+++ b/paddle/fluid/platform/cpu_info.h
@@ -43,6 +43,7 @@ namespace jit {
 typedef enum {
  isa_any,
  sse42,
+  avx,
  avx2,
  avx512_common,
  avx512_core,