From ee2321debd803037da29656c7d6e437fdaac036b Mon Sep 17 00:00:00 2001
From: tensor-tang <tangjian03@baidu.com>
Date: Fri, 22 Feb 2019 16:33:03 +0800
Subject: [PATCH] Revert 15770 develop a6910f900 gelu mkl opt (#15872)

* Revert "Optimze Gelu with MKL Erf function (#15770)"

This reverts commit 676995c86cb4b49f9a41c7a32c5e054b16201753.

* test=develop
---
 cmake/external/mklml.cmake              |  6 ++----
 paddle/fluid/operators/activation_op.h  | 22 ----------------------
 paddle/fluid/operators/math/blas.h      |  8 --------
 paddle/fluid/operators/math/blas_impl.h | 23 -----------------------
 paddle/fluid/platform/dynload/mklml.h   |  2 --
 5 files changed, 2 insertions(+), 59 deletions(-)
diff --git a/cmake/external/mklml.cmake b/cmake/external/mklml.cmake
index 32a9368a9f..54826cedb8 100644
--- a/cmake/external/mklml.cmake
+++ b/cmake/external/mklml.cmake
@@ -39,10 +39,8 @@ IF(WIN32)
     SET(MKLML_IOMP_LIB            ${MKLML_LIB_DIR}/libiomp5md.lib)
     SET(MKLML_SHARED_LIB          ${MKLML_LIB_DIR}/mklml.dll)
     SET(MKLML_SHARED_IOMP_LIB     ${MKLML_LIB_DIR}/libiomp5md.dll)
-ELSE()
-    #TODO(intel-huying):
-    #  Now enable Erf function in mklml library temporarily, it will be updated as offical version later.
-    SET(MKLML_VER "VsErf_mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
+ELSE()  
+    SET(MKLML_VER "mklml_lnx_${TIME_VERSION}" CACHE STRING "" FORCE)
     SET(MKLML_URL "http://paddlepaddledeps.cdn.bcebos.com/${MKLML_VER}.tgz" CACHE STRING "" FORCE)
     SET(MKLML_LIB                 ${MKLML_LIB_DIR}/libmklml_intel.so)
     SET(MKLML_IOMP_LIB            ${MKLML_LIB_DIR}/libiomp5.so)
diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h
index e8f5530b78..c7df3ea58a 100644
--- a/paddle/fluid/operators/activation_op.h
+++ b/paddle/fluid/operators/activation_op.h
@@ -11,7 +11,6 @@ limitations under the License. */
 
 #pragma once
 #include <glog/logging.h>
-#include <algorithm>
 #include <string>
 #include <unordered_set>
 #include <utility>
@@ -25,7 +24,6 @@ limitations under the License. */
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/detail/safe_ref.h"
-#include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/platform/float16.h"
 
 #ifdef PADDLE_WITH_MKLDNN
@@ -303,28 +301,8 @@ template <typename T>
 struct GeluFunctor : public BaseActivationFunctor<T> {
   template <typename Device, typename X, typename Out>
   void operator()(Device d, X x, Out out) const {
-// Because the execute or device context can not be deliver here, it keep the
-// marco for NVCC.
-#if defined(PADDLE_WITH_MKLML) && !defined(_WIN32) && !defined(__APPLE__) && \
-    !defined(__OSX__) && !defined(PADDLE_WITH_CUDA)
-    auto x_data = x.data();
-    auto out_data = out.data();
-    int n = std::min(x.size(), out.size());
-
-    std::memset(out_data, 0, n * sizeof(T));
-    math::CBlas<T>::AXPY(n, static_cast<T>(M_SQRT1_2), x_data, 1, out_data, 1);
-    math::CBlas<T>::VMERF(n, out_data, out_data, VML_LA);
-    for (int i = 0; i < n; i++) {
-      out_data[i] += static_cast<T>(1);
-    }
-    math::CBlas<T>::VMUL(n, x_data, out_data, out_data);
-    for (int i = 0; i < n; i++) {
-      out_data[i] *= static_cast<T>(0.5);
-    }
-#else
     auto temp = (x * static_cast<T>(M_SQRT1_2)).erf();
     out.device(d) = x * static_cast<T>(0.5) * (static_cast<T>(1) + temp);
-#endif
   }
 };
 
diff --git a/paddle/fluid/operators/math/blas.h b/paddle/fluid/operators/math/blas.h
index ce8109f64d..f67f57827b 100644
--- a/paddle/fluid/operators/math/blas.h
+++ b/paddle/fluid/operators/math/blas.h
@@ -184,9 +184,6 @@ class Blas {
   template <typename T>
   void VINV(int n, const T* a, T* y) const;
 
-  template <typename T>
-  void VMERF(int n, const T* a, T* y, int64_t mode) const;
-
  private:
   const DeviceContext& context_;
 };
@@ -293,11 +290,6 @@ class BlasT : private Blas<DeviceContext> {
     Base()->template VINV<T>(args...);
   }
 
-  template <typename... ARGS>
-  void VMERF(ARGS... args) const {
-    Base()->template VMERF<T>(args...);
-  }
-
  private:
   const Blas<DeviceContext>* Base() const {
     return static_cast<const Blas<DeviceContext>*>(this);
diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h
index ba995dabec..972366bc09 100644
--- a/paddle/fluid/operators/math/blas_impl.h
+++ b/paddle/fluid/operators/math/blas_impl.h
@@ -123,11 +123,6 @@ struct CBlas<float> {
   static void VINV(ARGS... args) {
     platform::dynload::vsInv(args...);
   }
-
-  template <typename... ARGS>
-  static void VMERF(ARGS... args) {
-    platform::dynload::vmsErf(args...);
-  }
 };
 
 template <>
@@ -228,11 +223,6 @@ struct CBlas<double> {
   static void VINV(ARGS... args) {
     platform::dynload::vdInv(args...);
   }
-
-  template <typename... ARGS>
-  static void VMERF(ARGS... args) {
-    platform::dynload::vmdErf(args...);
-  }
 };
 
 #else
@@ -635,19 +625,6 @@ void Blas<DeviceContext>::VINV(int n, const T *a, T *y) const {
 #endif
 }
 
-template <>
-template <typename T>
-void Blas<platform::CPUDeviceContext>::VMERF(int n, const T *a, T *y,
-                                             int64_t mode) const {
-#ifdef PADDLE_WITH_MKLML
-  CBlas<T>::VMERF(n, a, y, mode);
-#else
-  for (int i = 0; i < n; ++i) {
-    y[i] = std::erf(a[i]);
-  }
-#endif
-}
-
 }  // namespace math
 }  // namespace operators
 }  // namespace paddle
diff --git a/paddle/fluid/platform/dynload/mklml.h b/paddle/fluid/platform/dynload/mklml.h
index a5b846f500..a260cda491 100644
--- a/paddle/fluid/platform/dynload/mklml.h
+++ b/paddle/fluid/platform/dynload/mklml.h
@@ -86,8 +86,6 @@ extern void* mklml_dso_handle;
   __macro(vdPowx);                  \
   __macro(vsInv);                   \
   __macro(vdInv);                   \
-  __macro(vmsErf);                  \
-  __macro(vmdErf);                  \
   __macro(MKL_Set_Num_Threads)
 
 MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
-- 
GitLab