mklml.h 4.6 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2

3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
6

7
    http://www.apache.org/licenses/LICENSE-2.0
8

9 10 11 12 13 14 15 16 17 18
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <mkl.h>
#include <mutex>  // NOLINT
W
wanghuancoder 已提交
19

20
#include "paddle/fluid/platform/dynload/dynamic_loader.h"
D
dzhwinter 已提交
21
#include "paddle/fluid/platform/port.h"
22 23 24 25 26 27

namespace paddle {
namespace platform {
namespace dynload {

extern std::once_flag mklml_dso_flag;
28
extern void *mklml_dso_handle;
29 30 31 32 33 34 35 36 37

/**
 * The following macro definition can generate structs
 * (for each function) to dynamic load mklml routine
 * via operator overloading.
 */
#define DYNAMIC_LOAD_MKLML_WRAP(__name)                                    \
  struct DynLoad__##__name {                                               \
    template <typename... Args>                                            \
P
peizhilin 已提交
38
    auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) {       \
39 40 41 42
      using mklmlFunc = decltype(&::__name);                               \
      std::call_once(mklml_dso_flag, []() {                                \
        mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \
      });                                                                  \
43
      static void *p_##_name = dlsym(mklml_dso_handle, #__name);           \
44 45 46 47 48 49 50 51 52 53
      return reinterpret_cast<mklmlFunc>(p_##_name)(args...);              \
    }                                                                      \
  };                                                                       \
  extern DynLoad__##__name __name

#define DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) DYNAMIC_LOAD_MKLML_WRAP(__name)

#define MKLML_ROUTINE_EACH(__macro) \
  __macro(cblas_sgemm);             \
  __macro(cblas_dgemm);             \
54 55
  __macro(cblas_cgemm);             \
  __macro(cblas_zgemm);             \
T
tensor-tang 已提交
56
  __macro(cblas_saxpy);             \
57
  __macro(cblas_daxpy);             \
58 59
  __macro(cblas_caxpy);             \
  __macro(cblas_zaxpy);             \
T
tensor-tang 已提交
60
  __macro(cblas_scopy);             \
61
  __macro(cblas_dcopy);             \
62 63
  __macro(cblas_ccopy);             \
  __macro(cblas_zcopy);             \
T
tensor-tang 已提交
64
  __macro(cblas_sgemv);             \
65
  __macro(cblas_dgemv);             \
66 67
  __macro(cblas_cgemv);             \
  __macro(cblas_zgemv);             \
G
Guo Sheng 已提交
68 69
  __macro(cblas_strsm);             \
  __macro(cblas_dtrsm);             \
70 71
  __macro(cblas_ctrsm);             \
  __macro(cblas_ztrsm);             \
T
tensor-tang 已提交
72 73
  __macro(cblas_sgemm_alloc);       \
  __macro(cblas_dgemm_alloc);       \
T
tensor-tang 已提交
74
  __macro(cblas_sgemm_pack);        \
T
tensor-tang 已提交
75
  __macro(cblas_dgemm_pack);        \
T
tensor-tang 已提交
76
  __macro(cblas_sgemm_compute);     \
T
tensor-tang 已提交
77
  __macro(cblas_dgemm_compute);     \
T
tensor-tang 已提交
78
  __macro(cblas_sgemm_free);        \
T
tensor-tang 已提交
79
  __macro(cblas_dgemm_free);        \
T
tensor-tang 已提交
80 81
  __macro(cblas_sgemm_batch);       \
  __macro(cblas_dgemm_batch);       \
82 83
  __macro(cblas_cgemm_batch);       \
  __macro(cblas_zgemm_batch);       \
T
tensor-tang 已提交
84 85
  __macro(cblas_sdot);              \
  __macro(cblas_ddot);              \
J
Jacek Czaja 已提交
86 87
  __macro(cblas_sasum);             \
  __macro(cblas_dasum);             \
88 89
  __macro(cblas_isamax);            \
  __macro(cblas_idamax);            \
T
tensor-tang 已提交
90 91
  __macro(cblas_sscal);             \
  __macro(cblas_dscal);             \
T
tensor-tang 已提交
92 93
  __macro(vsAdd);                   \
  __macro(vdAdd);                   \
94 95
  __macro(vsSub);                   \
  __macro(vdSub);                   \
T
tensor-tang 已提交
96 97
  __macro(vsMul);                   \
  __macro(vdMul);                   \
98 99
  __macro(vsDiv);                   \
  __macro(vdDiv);                   \
T
tensor-tang 已提交
100 101
  __macro(vsExp);                   \
  __macro(vdExp);                   \
T
tensor-tang 已提交
102 103 104 105
  __macro(vsSqr);                   \
  __macro(vdSqr);                   \
  __macro(vsPowx);                  \
  __macro(vdPowx);                  \
Y
Use mkl  
Yu Yang 已提交
106 107
  __macro(vsInv);                   \
  __macro(vdInv);                   \
Y
Yihua Xu 已提交
108 109
  __macro(vmsErf);                  \
  __macro(vmdErf);                  \
T
Tao Luo 已提交
110
  __macro(MKL_Free_Buffers);        \
T
Tongxin Bai 已提交
111 112
  __macro(MKL_Set_Num_Threads);     \
  __macro(MKL_Get_Max_Threads);
113 114 115

MKLML_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_MKLML_WRAP);

116 117 118 119 120
#if !defined(_WIN32)
DYNAMIC_LOAD_MKLML_WRAP(mkl_scsrmm);
DYNAMIC_LOAD_MKLML_WRAP(mkl_dcsrmm);
#endif

121 122 123 124 125
#undef DYNAMIC_LOAD_MKLML_WRAP

}  // namespace dynload
}  // namespace platform
}  // namespace paddle