提交 3dd66390 编写于 作者: T tensor-tang

add blas vexp

上级 0ec1f65c
...@@ -149,6 +149,9 @@ class Blas { ...@@ -149,6 +149,9 @@ class Blas {
template <typename T> template <typename T>
void VCOPY(int n, const T* x, T* y) const; void VCOPY(int n, const T* x, T* y) const;
template <typename T>
void VEXP(int n, const T* x, T* y) const;
template <typename T> template <typename T>
void GEMV(bool trans_a, int M, int N, T alpha, const T* A, const T* B, T beta, void GEMV(bool trans_a, int M, int N, T alpha, const T* A, const T* B, T beta,
T* C) const; T* C) const;
......
...@@ -97,6 +97,11 @@ struct CBlas<float> { ...@@ -97,6 +97,11 @@ struct CBlas<float> {
static void VMUL(ARGS... args) { static void VMUL(ARGS... args) {
platform::dynload::vsMul(args...); platform::dynload::vsMul(args...);
} }
template <typename... ARGS>
static void VEXP(ARGS... args) {
platform::dynload::vsExp(args...);
}
}; };
template <> template <>
...@@ -172,6 +177,11 @@ struct CBlas<double> { ...@@ -172,6 +177,11 @@ struct CBlas<double> {
static void VMUL(ARGS... args) { static void VMUL(ARGS... args) {
platform::dynload::vdMul(args...); platform::dynload::vdMul(args...);
} }
template <typename... ARGS>
static void VEXP(ARGS... args) {
platform::dynload::vdExp(args...);
}
}; };
#else #else
...@@ -230,6 +240,7 @@ struct CBlas<platform::float16> { ...@@ -230,6 +240,7 @@ struct CBlas<platform::float16> {
PADDLE_THROW("float16 SMM_GEMM not supported on CPU"); PADDLE_THROW("float16 SMM_GEMM not supported on CPU");
} }
static void VMUL(...) { PADDLE_THROW("float16 VMUL not supported on CPU"); } static void VMUL(...) { PADDLE_THROW("float16 VMUL not supported on CPU"); }
static void VEXP(...) { PADDLE_THROW("float16 VEXP not supported on CPU"); }
static void DOT(...) { PADDLE_THROW("float16 DOT not supported on CPU"); }; static void DOT(...) { PADDLE_THROW("float16 DOT not supported on CPU"); };
static void SCAL(...) { PADDLE_THROW("float16 SCAL not supported on CPU"); }; static void SCAL(...) { PADDLE_THROW("float16 SCAL not supported on CPU"); };
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
...@@ -374,6 +385,19 @@ void Blas<platform::CPUDeviceContext>::VMUL(int n, const T *x, const T *y, ...@@ -374,6 +385,19 @@ void Blas<platform::CPUDeviceContext>::VMUL(int n, const T *x, const T *y,
#endif #endif
} }
template <>
template <typename T>
void Blas<platform::CPUDeviceContext>::VEXP(int n, const T *x, T *y) const {
#ifdef PADDLE_WITH_MKLML
CBlas<T>::VEXP(n, x, y);
#else
// try to find if openblas support vexp
for (int i = 0; i < n; ++i) {
y[i] = std::exp(x[i]);
}
#endif
}
template <> template <>
template <typename T> template <typename T>
T Blas<platform::CPUDeviceContext>::DOT(int n, const T *x, const T *y) const { T Blas<platform::CPUDeviceContext>::DOT(int n, const T *x, const T *y) const {
......
...@@ -74,6 +74,8 @@ extern void* mklml_dso_handle; ...@@ -74,6 +74,8 @@ extern void* mklml_dso_handle;
__macro(vdAdd); \ __macro(vdAdd); \
__macro(vsMul); \ __macro(vsMul); \
__macro(vdMul); \ __macro(vdMul); \
__macro(vsExp); \
__macro(vdExp); \
__macro(MKL_Set_Num_Threads) __macro(MKL_Set_Num_Threads)
MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP); MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册