add cpu vec test

2eb46c2b · tensor-tang · 0d46f518 · 2eb46c2b · 2eb46c2b · 2eb46c2b
3 changed file
--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
@@ -65,3 +65,4 @@ if(WITH_GPU)
    nv_test(selected_rows_functor_gpu_test SRCS selected_rows_functor_test.cu DEPS selected_rows_functor math_function)
 endif()
 cc_test(concat_test SRCS concat_test.cc DEPS concat)
+cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info)
--- a/paddle/fluid/operators/math/cpu_vec.h
+++ b/paddle/fluid/operators/math/cpu_vec.h
@@ -15,6 +15,13 @@ limitations under the License. */
 #pragma once
 #include <string>
 #include "paddle/fluid/platform/cpu_info.h"
+#ifdef __AVX__
+#include <immintrin.h>
+#endif
+
+#ifdef PADDLE_WITH_MKLML
+#include "paddle/fluid/platform/dynload/mklml.h"
+#endif

 namespace paddle {
 namespace operators {
@@ -22,7 +29,6 @@ namespace math {

 #define SIGMOID_THRESHOLD_MIN -40.0
 #define SIGMOID_THRESHOLD_MAX 13.0
-#define EXP_MAX_INPUT 40.0

 template <typename T>
 inline T sigmoid(T x) {
@@ -46,7 +52,7 @@ inline void vec_sigmoid(const int n, const T* x, T* y) {
  const T max = SIGMOID_THRESHOLD_MAX;
  for (int i = 0; i < n; ++i) {
    T tmp = (x[i] < min) ? min : ((x[i] > max) ? max : x[i]);
-    y[i] = 1.0 / (1.0 + std::exp(-tmp));
+    y[i] = sigmoid<T>(tmp);
  }
 }

@@ -96,7 +102,7 @@ class VecActivations {
    } else if (type == "identity" || type == "") {
      return vec_identity<T, isa>;
    }
-    PADDLE_THROW("Not support type %s.", type);
+    LOG(FATAL) << "Not support type: " << type;
  }
 };


--- a/paddle/fluid/operators/math/cpu_vec_test.cc
+++ b/paddle/fluid/operators/math/cpu_vec_test.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <sys/time.h>
+#include <cmath>
+#include <vector>
+#include "gflags/gflags.h"
+#include "glog/logging.h"
+#include "gtest/gtest.h"
+
+#include "paddle/fluid/operators/math/cpu_vec.h"
+
+inline double GetCurrentUS() {
+  struct timeval time;
+  gettimeofday(&time, NULL);
+  return 1e+6 * time.tv_sec + time.tv_usec;
+}
+constexpr int repeat = 1000;
+
+template <typename T>
+inline T _sigmoid(T x) {
+  const T min = SIGMOID_THRESHOLD_MIN;
+  const T max = SIGMOID_THRESHOLD_MAX;
+  T tmp = (x < min) ? min : ((x > max) ? max : x);
+  return 1. / (1. + std::exp(-tmp));
+}
+
+template <typename T>
+inline T _tanh(T x) {
+  return 2. * _sigmoid<T>(2. * x) - 1.;
+}
+
+template <typename T>
+void ref_sigmoid(const int n, const T* x, T* y) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = _sigmoid(x[i]);
+  }
+}
+
+template <typename T>
+void ref_tanh(const int n, const T* x, T* y) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = _tanh(x[i]);
+  }
+}
+template <typename T>
+void ref_relu(const int n, const T* x, T* y) {
+  for (int i = 0; i < n; ++i) {
+    y[i] = x[i] > 0 ? x[i] : 0;
+  }
+}
+
+template <typename T>
+void RandomVec(const int n, T* a) {
+  static unsigned int seed = 100;
+  std::mt19937 rng(seed++);
+  std::uniform_real_distribution<double> uniform_dist(0, 1);
+  const T lower = static_cast<T>(-20.f);
+  const T upper = static_cast<T>(-20.f);
+  for (int i = 0; i < n; ++i) {
+    a[i] = static_cast<T>(uniform_dist(rng) * (upper - lower) + lower);
+  }
+}
+
+template <typename T>
+void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
+                  std::function<void(const int, const T*, T*)> ref) {
+  std::vector<T> x(n);
+  std::vector<T> ytgt(n), yref(n);
+  RandomVec<T>(n, x.data());
+
+  const T* x_data = x.data();
+  T* ytgt_data = ytgt.data();
+  T* yref_data = yref.data();
+  auto st = GetCurrentUS();
+  for (int i = 0; i < repeat; ++i) {
+    tgt(n, x_data, ytgt_data);
+  }
+  auto mt = GetCurrentUS();
+  for (int i = 0; i < repeat; ++i) {
+    ref(n, x_data, yref_data);
+  }
+  auto et = GetCurrentUS();
+
+  VLOG(3) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat
+          << " us, tgt takes: " << (mt - st) / repeat;
+  for (int i = 0; i < n; ++i) {
+    EXPECT_NEAR(ytgt_data[i], yref_data[i], 1e-3);
+  }
+}
+
+TEST(CpuVecTest, sigmoid) {
+  namespace jit = paddle::platform::jit;
+  using namespace paddle::operators::math;  // NOLINT
+  for (auto sz : {1, 2, 15, 16, 32, 128, 200, 512}) {
+    TestAndBench<float>(sz, vec_sigmoid<float>, ref_sigmoid<float>);
+    TestAndBench<float>(sz, vec_sigmoid<float, jit::avx>, ref_sigmoid<float>);
+    TestAndBench<float>(sz, vec_sigmoid<float, jit::avx2>, ref_sigmoid<float>);
+    TestAndBench<float>(sz, vec_sigmoid<float, jit::avx512_common>,
+                        ref_sigmoid<float>);
+  }
+  TestAndBench<double>(30, vec_sigmoid<double>, ref_sigmoid<double>);
+}
+
+TEST(CpuVecTest, tanh) {
+  namespace jit = paddle::platform::jit;
+  using namespace paddle::operators::math;  // NOLINT
+  for (auto sz : {1, 2, 15, 16, 32, 128, 200, 512}) {
+    TestAndBench<float>(sz, vec_tanh<float>, ref_tanh<float>);
+    TestAndBench<float>(sz, vec_tanh<float, jit::avx>, ref_tanh<float>);
+    TestAndBench<float>(sz, vec_tanh<float, jit::avx2>, ref_tanh<float>);
+    TestAndBench<float>(sz, vec_tanh<float, jit::avx512_common>,
+                        ref_tanh<float>);
+  }
+  TestAndBench<double>(30, vec_tanh<double>, ref_tanh<double>);
+}
+
+TEST(CpuVecTest, relu) {
+  namespace jit = paddle::platform::jit;
+  using namespace paddle::operators::math;  // NOLINT
+  for (auto sz : {1, 2, 15, 16, 32, 128, 200, 512}) {
+    TestAndBench<float>(sz, vec_relu<float>, ref_relu<float>);
+    TestAndBench<float>(sz, vec_relu<float, jit::avx>, ref_relu<float>);
+    TestAndBench<float>(sz, vec_relu<float, jit::avx2>, ref_relu<float>);
+    TestAndBench<float>(sz, vec_relu<float, jit::avx512_common>,
+                        ref_relu<float>);
+  }
+  TestAndBench<double>(30, vec_relu<double>, ref_relu<double>);
+}