add blas vexp

3dd66390 · tensor-tang · 0ec1f65c · 3dd66390 · 3dd66390 · 3dd66390
3 changed file
--- a/paddle/fluid/operators/math/blas.h
+++ b/paddle/fluid/operators/math/blas.h
@@ -149,6 +149,9 @@ class Blas {
  template <typename T>
  void VCOPY(int n, const T* x, T* y) const;
+  template <typename T>
+  void VEXP(int n, const T* x, T* y) const;
  template <typename T>
  void GEMV(bool trans_a, int M, int N, T alpha, const T* A, const T* B, T beta,
            T* C) const;

--- a/paddle/fluid/operators/math/blas_impl.h
+++ b/paddle/fluid/operators/math/blas_impl.h
@@ -97,6 +97,11 @@ struct CBlas<float> {
  static void VMUL(ARGS... args) {
    platform::dynload::vsMul(args...);
  }
+  template <typename... ARGS>
+  static void VEXP(ARGS... args) {
+    platform::dynload::vsExp(args...);
+  }
 };
 template <>
@@ -172,6 +177,11 @@ struct CBlas<double> {
  static void VMUL(ARGS... args) {
    platform::dynload::vdMul(args...);
  }
+  template <typename... ARGS>
+  static void VEXP(ARGS... args) {
+    platform::dynload::vdExp(args...);
+  }
 };
 #else
@@ -230,6 +240,7 @@ struct CBlas<platform::float16> {
    PADDLE_THROW("float16 SMM_GEMM not supported on CPU");
  }
  static void VMUL(...) { PADDLE_THROW("float16 VMUL not supported on CPU"); }
+  static void VEXP(...) { PADDLE_THROW("float16 VEXP not supported on CPU"); }
  static void DOT(...) { PADDLE_THROW("float16 DOT not supported on CPU"); };
  static void SCAL(...) { PADDLE_THROW("float16 SCAL not supported on CPU"); };
 #ifdef PADDLE_WITH_MKLML
@@ -374,6 +385,19 @@ void Blas<platform::CPUDeviceContext>::VMUL(int n, const T *x, const T *y,
 #endif
 }
+template <>
+template <typename T>
+void Blas<platform::CPUDeviceContext>::VEXP(int n, const T *x, T *y) const {
+#ifdef PADDLE_WITH_MKLML
+  CBlas<T>::VEXP(n, x, y);
+#else
+  // try to find if openblas support vexp
+  for (int i = 0; i < n; ++i) {
+    y[i] = std::exp(x[i]);
+  }
+#endif
+}
 template <>
 template <typename T>
 T Blas<platform::CPUDeviceContext>::DOT(int n, const T *x, const T *y) const {

--- a/paddle/fluid/platform/dynload/mklml.h
+++ b/paddle/fluid/platform/dynload/mklml.h
@@ -74,6 +74,8 @@ extern void* mklml_dso_handle;
  __macro(vdAdd);                   \
  __macro(vsMul);                   \
  __macro(vdMul);                   \
+  __macro(vsExp);                   \
+  __macro(vdExp);                   \
  __macro(MKL_Set_Num_Threads)
 MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);