[phi] move cpu_vec (#39714)

move cpu_vec.h to phi/kernels/funcs.

[phi] move cpu_vec (#39714)
move cpu_vec.h to phi/kernels/funcs.
70540b26 · Feiyu Chan · GitHub · 880dec0f · 70540b26 · 70540b26
7 changed file
--- a/paddle/fluid/operators/attention_lstm_op.cc
+++ b/paddle/fluid/operators/attention_lstm_op.cc
@@ -14,10 +14,10 @@ limitations under the License. */

 #include "paddle/fluid/operators/attention_lstm_op.h"
 #include <string>
-#include "paddle/fluid/operators/math/cpu_vec.h"
 #include "paddle/fluid/operators/math/fc.h"
 #include "paddle/fluid/platform/cpu_info.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/cpu_vec.h"

 namespace paddle {
 namespace operators {
@@ -269,10 +269,10 @@ use lstm_x_t as input and compute as standard LSTM.
 template <typename T>
 inline void bias_relu(const int n, const T* x, const T* bias, T* y) {
  if (bias) {
-    math::vec_add_bias<T, platform::avx>(n, *bias, x, y);
-    math::vec_relu<T, platform::avx>(n, y, y);
+    phi::funcs::vec_add_bias<T, platform::avx>(n, *bias, x, y);
+    phi::funcs::vec_relu<T, platform::avx>(n, y, y);
  } else {
-    math::vec_relu<T, platform::avx>(n, x, y);
+    phi::funcs::vec_relu<T, platform::avx>(n, x, y);
  }
 }

@@ -283,14 +283,14 @@ inline void vec_softmax(const int n, const T* x, T* y) {
  for (int i = 1; i < n; ++i) {
    scalar = scalar < x[i] ? x[i] : scalar;
  }
-  math::vec_add_bias<T, platform::avx>(n, -scalar, x, y);  // sub
-  math::vec_exp<T>(n, y, y);                               // exp
+  phi::funcs::vec_add_bias<T, platform::avx>(n, -scalar, x, y);  // sub
+  phi::funcs::vec_exp<T>(n, y, y);                               // exp
  // sum
  scalar = T(0);
  for (int i = 0; i < n; ++i) {
    scalar += y[i];
  }
-  math::vec_scal<T>(n, static_cast<T>(1) / scalar, y);  // scale
+  phi::funcs::vec_scal<T>(n, static_cast<T>(1) / scalar, y);  // scale
 }

 template <typename T>
@@ -344,12 +344,12 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
    auto& act_cell_str = ctx.Attr<std::string>("cell_activation");
    auto& act_cand_str = ctx.Attr<std::string>("candidate_activation");
    if (platform::MayIUse(platform::avx)) {
-      math::VecActivations<T, platform::avx> act_functor;
+      phi::funcs::VecActivations<T, platform::avx> act_functor;
      act_gate = act_functor(act_gate_str);
      act_cell = act_functor(act_cell_str);
      act_cand = act_functor(act_cand_str);
    } else {
-      math::VecActivations<T, platform::isa_any> act_functor;
+      phi::funcs::VecActivations<T, platform::isa_any> act_functor;
      act_gate = act_functor(act_gate_str);
      act_cell = act_functor(act_cell_str);
      act_cand = act_functor(act_cand_str);

--- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
+++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
@@ -14,9 +14,9 @@ limitations under the License. */

 #include "paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h"
 #include <string>
-#include "paddle/fluid/operators/math/cpu_vec.h"
 #include "paddle/fluid/platform/cpu_info.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/cpu_vec.h"
 #include "paddle/phi/kernels/funcs/sequence2batch.h"

 namespace paddle {
@@ -243,12 +243,12 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
  auto& act_cell_str = ctx.Attr<std::string>("cell_activation");               \
  auto& act_cand_str = ctx.Attr<std::string>("candidate_activation");          \
  if (platform::MayIUse(platform::avx)) {                                      \
-    math::VecActivations<T, platform::avx> act_functor;                        \
+    phi::funcs::VecActivations<T, platform::avx> act_functor;                  \
    act_gate = act_functor(act_gate_str);                                      \
    act_cell = act_functor(act_cell_str);                                      \
    act_cand = act_functor(act_cand_str);                                      \
  } else {                                                                     \
-    math::VecActivations<T, platform::isa_any> act_functor;                    \
+    phi::funcs::VecActivations<T, platform::isa_any> act_functor;              \
    act_gate = act_functor(act_gate_str);                                      \
    act_cell = act_functor(act_cell_str);                                      \
    act_cand = act_functor(act_cand_str);                                      \

--- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
+++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
@@ -14,10 +14,10 @@ limitations under the License. */

 #include "paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h"
 #include <string>
-#include "paddle/fluid/operators/math/cpu_vec.h"
 #include "paddle/fluid/operators/math/fc.h"
 #include "paddle/fluid/platform/cpu_info.h"
 #include "paddle/phi/kernels/funcs/blas/blas.h"
+#include "paddle/phi/kernels/funcs/cpu_vec.h"

 namespace paddle {
 namespace operators {
@@ -196,10 +196,10 @@ class FusionSeqExpandConcatFCOpKernel : public framework::OpKernel<T> {
    std::function<void(const int, const T*, T*)> fc_act;
    auto& fc_act_str = ctx.Attr<std::string>("fc_activation");
    if (platform::MayIUse(platform::avx)) {
-      math::VecActivations<T, platform::avx> act_functor;
+      phi::funcs::VecActivations<T, platform::avx> act_functor;
      fc_act = act_functor(fc_act_str);
    } else {
-      math::VecActivations<T, platform::isa_any> act_functor;
+      phi::funcs::VecActivations<T, platform::isa_any> act_functor;
      fc_act = act_functor(fc_act_str);
    }


--- a/paddle/fluid/operators/math/CMakeLists.txt
+++ b/paddle/fluid/operators/math/CMakeLists.txt
@@ -70,7 +70,6 @@ if(WITH_GPU AND (NOT WITH_ROCM))
    endif()
 endif()

-cc_test(cpu_vec_test SRCS cpu_vec_test.cc DEPS blas cpu_info)
 if(WITH_TESTING AND TEST im2col_test)
    set_tests_properties(im2col_test PROPERTIES TIMEOUT 120)
 endif()
--- a/paddle/phi/kernels/funcs/cpu_vec.h
+++ b/paddle/phi/kernels/funcs/cpu_vec.h
--- a/paddle/phi/tests/kernels/CMakeLists.txt
+++ b/paddle/phi/tests/kernels/CMakeLists.txt
@@ -22,3 +22,5 @@ endif()
 if(WITH_ROCM)
    hip_test(test_math_function_gpu SRCS test_math_function.cu DEPS math_function)
 endif()
+
+cc_test(test_cpu_vec SRCS test_cpu_vec.cc DEPS blas cpu_info)
--- a/paddle/fluid/operators/math/cpu_vec_test.cc
+++ b/paddle/fluid/operators/math/cpu_vec_test.cc
@@ -18,7 +18,10 @@ limitations under the License. */

 #include "glog/logging.h"
 #include "gtest/gtest.h"
-#include "paddle/fluid/operators/math/cpu_vec.h"
+#include "paddle/phi/kernels/funcs/cpu_vec.h"
+
+namespace phi {
+namespace tests {

 inline double GetCurrentUS() {
  struct timeval time;
@@ -62,7 +65,9 @@ void ref_relu(const int n, const T* x, T* y) {
 }

 template <typename T>
-void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f),
+void RandomVec(const int n,
+               T* a,
+               const T lower = static_cast<T>(-20.f),
               const T upper = static_cast<T>(20.f)) {
  static unsigned int seed = 100;
  std::mt19937 rng(seed++);
@@ -73,7 +78,8 @@ void RandomVec(const int n, T* a, const T lower = static_cast<T>(-20.f),
 }

 template <typename T>
-void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
+void TestAndBench(const int n,
+                  std::function<void(const int, const T*, T*)> tgt,
                  std::function<void(const int, const T*, T*)> ref) {
  std::vector<T> x(n);
  std::vector<T> ytgt(n), yref(n);
@@ -101,47 +107,48 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,

 TEST(CpuVecTest, sigmoid) {
  namespace platform = paddle::platform;
-  using namespace paddle::operators::math;  // NOLINT
+  using namespace phi::funcs;  // NOLINT
  for (auto sz : {1, 2, 15, 16, 30, 32, 128, 200, 512}) {
    TestAndBench<float>(sz, vec_sigmoid<float>, ref_sigmoid<float>);
-    TestAndBench<float>(sz, vec_sigmoid<float, platform::avx>,
-                        ref_sigmoid<float>);
-    TestAndBench<float>(sz, vec_sigmoid<float, platform::avx2>,
-                        ref_sigmoid<float>);
-    TestAndBench<float>(sz, vec_sigmoid<float, platform::avx512f>,
-                        ref_sigmoid<float>);
+    TestAndBench<float>(
+        sz, vec_sigmoid<float, platform::avx>, ref_sigmoid<float>);
+    TestAndBench<float>(
+        sz, vec_sigmoid<float, platform::avx2>, ref_sigmoid<float>);
+    TestAndBench<float>(
+        sz, vec_sigmoid<float, platform::avx512f>, ref_sigmoid<float>);
  }
  TestAndBench<double>(30, vec_sigmoid<double>, ref_sigmoid<double>);
 }

 TEST(CpuVecTest, tanh) {
  namespace platform = paddle::platform;
-  using namespace paddle::operators::math;  // NOLINT
+  using namespace phi::funcs;  // NOLINT
  for (auto sz : {1, 2, 15, 16, 30, 32, 128, 200, 512}) {
    TestAndBench<float>(sz, vec_tanh<float>, ref_tanh<float>);
    TestAndBench<float>(sz, vec_tanh<float, platform::avx>, ref_tanh<float>);
    TestAndBench<float>(sz, vec_tanh<float, platform::avx2>, ref_tanh<float>);
-    TestAndBench<float>(sz, vec_tanh<float, platform::avx512f>,
-                        ref_tanh<float>);
+    TestAndBench<float>(
+        sz, vec_tanh<float, platform::avx512f>, ref_tanh<float>);
  }
  TestAndBench<double>(30, vec_tanh<double>, ref_tanh<double>);
 }

 TEST(CpuVecTest, relu) {
  namespace platform = paddle::platform;
-  using namespace paddle::operators::math;  // NOLINT
+  using namespace phi::funcs;  // NOLINT
  for (auto sz : {1, 2, 15, 16, 30, 32, 128, 200, 512}) {
    TestAndBench<float>(sz, vec_relu<float>, ref_relu<float>);
    TestAndBench<float>(sz, vec_relu<float, platform::avx>, ref_relu<float>);
    TestAndBench<float>(sz, vec_relu<float, platform::avx2>, ref_relu<float>);
-    TestAndBench<float>(sz, vec_relu<float, platform::avx512f>,
-                        ref_relu<float>);
+    TestAndBench<float>(
+        sz, vec_relu<float, platform::avx512f>, ref_relu<float>);
  }
  TestAndBench<double>(30, vec_relu<double>, ref_relu<double>);
 }

 template <typename T>
-void compare_sum(size_t n, std::function<void(const size_t, const T*, T*)> tgt,
+void compare_sum(size_t n,
+                 std::function<void(const size_t, const T*, T*)> tgt,
                 std::function<void(const size_t, const T*, T*)> ref) {
  std::vector<T> x(n);
  T ytgt_data, yref_data;
@@ -155,18 +162,19 @@ void compare_sum(size_t n, std::function<void(const size_t, const T*, T*)> tgt,

 TEST(CpuVecTest, vec_sum) {
  namespace platform = paddle::platform;
-  using namespace paddle::operators::math;  // NOLINT
+  using namespace phi::funcs;  // NOLINT
  for (size_t sz : {1, 2, 15, 16, 30, 32, 128, 200, 512}) {
    compare_sum<float>(sz, vec_sum<float>, vec_sum<float, platform::isa_any>);
-    compare_sum<float>(sz, vec_sum<float, platform::avx>,
-                       vec_sum<float, platform::isa_any>);
+    compare_sum<float>(
+        sz, vec_sum<float, platform::avx>, vec_sum<float, platform::isa_any>);
  }
  compare_sum<double>(30U, vec_sum<double>, vec_sum<double, platform::isa_any>);
 }

 template <typename T>
 void compare_clip(
-    size_t n, T threshold,
+    size_t n,
+    T threshold,
    std::function<void(const size_t, const T, const T*, T*)> tgt,
    std::function<void(const size_t, const T, const T*, T*)> ref) {
  std::vector<T> x(n);
@@ -185,20 +193,23 @@ void compare_clip(

 TEST(CpuVecTest, vec_clip) {
  namespace platform = paddle::platform;
-  using namespace paddle::operators::math;  // NOLINT
+  using namespace phi::funcs;  // NOLINT
  for (size_t sz : {1, 2, 15, 16, 30, 32, 128, 200, 512}) {
-    compare_clip<float>(sz, -4.f, vec_clip<float>,
-                        vec_clip<float, platform::isa_any>);
-    compare_clip<float>(sz, -1.1f, vec_clip<float, platform::avx>,
+    compare_clip<float>(
+        sz, -4.f, vec_clip<float>, vec_clip<float, platform::isa_any>);
+    compare_clip<float>(sz,
+                        -1.1f,
+                        vec_clip<float, platform::avx>,
                        vec_clip<float, platform::isa_any>);
  }
-  compare_clip<double>(30U, 1.0, vec_clip<double>,
-                       vec_clip<double, platform::isa_any>);
+  compare_clip<double>(
+      30U, 1.0, vec_clip<double>, vec_clip<double, platform::isa_any>);
 }

 template <typename T>
 void compare_mul(
-    size_t n, std::function<void(const size_t, const T*, const T*, T*)> tgt,
+    size_t n,
+    std::function<void(const size_t, const T*, const T*, T*)> tgt,
    std::function<void(const size_t, const T*, const T*, T*)> ref) {
  std::vector<T> x(n), y(n);
  std::vector<T> ztgt(n), zref(n);
@@ -220,18 +231,19 @@ void compare_mul(

 TEST(CpuVecTest, vec_mul) {
  namespace platform = paddle::platform;
-  using namespace paddle::operators::math;  // NOLINT
+  using namespace phi::funcs;  // NOLINT
  for (size_t sz : {1, 2, 15, 16, 30, 32, 128, 200, 512}) {
    compare_mul<float>(sz, vec_mul<float>, vec_mul<float, platform::isa_any>);
-    compare_mul<float>(sz, vec_mul<float, platform::avx>,
-                       vec_mul<float, platform::isa_any>);
+    compare_mul<float>(
+        sz, vec_mul<float, platform::avx>, vec_mul<float, platform::isa_any>);
  }
  compare_mul<double>(30U, vec_mul<double>, vec_mul<double, platform::isa_any>);
 }

 template <typename T>
 void compare_mul_reduce(
-    size_t n, std::function<void(const size_t, const T*, const T*, T*)> tgt,
+    size_t n,
+    std::function<void(const size_t, const T*, const T*, T*)> tgt,
    std::function<void(const size_t, const T*, const T*, T*)> ref) {
  std::vector<T> x(n), y(n);
  T ztgt_data, zref_data;
@@ -249,19 +261,21 @@ void compare_mul_reduce(

 TEST(CpuVecTest, vec_mul_reduce) {
  namespace platform = paddle::platform;
-  using namespace paddle::operators::math;  // NOLINT
+  using namespace phi::funcs;  // NOLINT
  for (size_t sz : {1, 2, 15, 16, 30, 32, 128, 200, 512}) {
-    compare_mul_reduce<float>(sz, vec_mul_reduce<float>,
-                              vec_mul_reduce<float, platform::isa_any>);
-    compare_mul_reduce<float>(sz, vec_mul_reduce<float, platform::avx>,
+    compare_mul_reduce<float>(
+        sz, vec_mul_reduce<float>, vec_mul_reduce<float, platform::isa_any>);
+    compare_mul_reduce<float>(sz,
+                              vec_mul_reduce<float, platform::avx>,
                              vec_mul_reduce<float, platform::isa_any>);
  }
-  compare_mul_reduce<double>(30U, vec_mul_reduce<double>,
-                             vec_mul_reduce<double, platform::isa_any>);
+  compare_mul_reduce<double>(
+      30U, vec_mul_reduce<double>, vec_mul_reduce<double, platform::isa_any>);
 }

 template <typename T>
-void TestInplace(const int n, std::function<void(const int, const T*, T*)> tgt,
+void TestInplace(const int n,
+                 std::function<void(const int, const T*, T*)> tgt,
                 std::function<void(const int, const T*, T*)> ref) {
  std::vector<T> x(n);
  std::vector<T> ytgt(n), yref(n);
@@ -283,22 +297,22 @@ void TestInplace(const int n, std::function<void(const int, const T*, T*)> tgt,

 TEST(CpuVecTest, inplace_sigmoid) {
  namespace platform = paddle::platform;
-  using namespace paddle::operators::math;  // NOLINT
+  using namespace phi::funcs;  // NOLINT
  for (auto sz : {1, 2, 15, 16, 30, 32, 128, 200, 512}) {
    TestInplace<float>(sz, vec_sigmoid<float>, ref_sigmoid<float>);
-    TestInplace<float>(sz, vec_sigmoid<float, platform::avx>,
-                       ref_sigmoid<float>);
-    TestInplace<float>(sz, vec_sigmoid<float, platform::avx2>,
-                       ref_sigmoid<float>);
-    TestInplace<float>(sz, vec_sigmoid<float, platform::avx512f>,
-                       ref_sigmoid<float>);
+    TestInplace<float>(
+        sz, vec_sigmoid<float, platform::avx>, ref_sigmoid<float>);
+    TestInplace<float>(
+        sz, vec_sigmoid<float, platform::avx2>, ref_sigmoid<float>);
+    TestInplace<float>(
+        sz, vec_sigmoid<float, platform::avx512f>, ref_sigmoid<float>);
  }
  TestInplace<double>(30, vec_sigmoid<double>, ref_sigmoid<double>);
 }

 TEST(CpuVecTest, inplace_tanh) {
  namespace platform = paddle::platform;
-  using namespace paddle::operators::math;  // NOLINT
+  using namespace phi::funcs;  // NOLINT
  for (auto sz : {1, 2, 15, 16, 30, 32, 128, 200, 512}) {
    TestInplace<float>(sz, vec_tanh<float>, ref_tanh<float>);
    TestInplace<float>(sz, vec_tanh<float, platform::avx>, ref_tanh<float>);
@@ -310,7 +324,7 @@ TEST(CpuVecTest, inplace_tanh) {

 TEST(CpuVecTest, inplace_relu) {
  namespace platform = paddle::platform;
-  using namespace paddle::operators::math;  // NOLINT
+  using namespace phi::funcs;  // NOLINT
  for (auto sz : {1, 2, 15, 16, 30, 32, 128, 200, 512}) {
    TestInplace<float>(sz, vec_relu<float>, ref_relu<float>);
    TestInplace<float>(sz, vec_relu<float, platform::avx>, ref_relu<float>);
@@ -319,3 +333,5 @@ TEST(CpuVecTest, inplace_relu) {
  }
  TestInplace<double>(30, vec_relu<double>, ref_relu<double>);
 }
+}  // namespace tests
+}  // namespace phi