mklml.h 3.7 KB
Newer Older
1
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2

3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
6

7
    http://www.apache.org/licenses/LICENSE-2.0
8

9 10 11 12 13 14 15 16 17 18
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <mkl.h>
#include <mutex>  // NOLINT
W
wanghuancoder 已提交
19

20
#include "paddle/pten/backends/dynload/mklml.h"
21 22 23 24 25 26 27 28 29 30

namespace paddle {
namespace platform {
namespace dynload {

/**
 * The following macro definition can generate structs
 * (for each function) to dynamic load mklml routine
 * via operator overloading.
 */
31 32
#define DYNAMIC_LOAD_MKLML_WRAP(__name)                       \
  using DynLoad__##__name = pten::dynload::DynLoad__##__name; \
33 34
  extern DynLoad__##__name __name

35 36
#define PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP(__name) \
  DYNAMIC_LOAD_MKLML_WRAP(__name)
37 38 39 40

#define MKLML_ROUTINE_EACH(__macro) \
  __macro(cblas_sgemm);             \
  __macro(cblas_dgemm);             \
41 42
  __macro(cblas_cgemm);             \
  __macro(cblas_zgemm);             \
T
tensor-tang 已提交
43
  __macro(cblas_saxpy);             \
44
  __macro(cblas_daxpy);             \
45 46
  __macro(cblas_caxpy);             \
  __macro(cblas_zaxpy);             \
T
tensor-tang 已提交
47
  __macro(cblas_scopy);             \
48
  __macro(cblas_dcopy);             \
49 50
  __macro(cblas_ccopy);             \
  __macro(cblas_zcopy);             \
T
tensor-tang 已提交
51
  __macro(cblas_sgemv);             \
52
  __macro(cblas_dgemv);             \
53 54
  __macro(cblas_cgemv);             \
  __macro(cblas_zgemv);             \
G
Guo Sheng 已提交
55 56
  __macro(cblas_strsm);             \
  __macro(cblas_dtrsm);             \
57 58
  __macro(cblas_ctrsm);             \
  __macro(cblas_ztrsm);             \
T
tensor-tang 已提交
59 60
  __macro(cblas_sgemm_alloc);       \
  __macro(cblas_dgemm_alloc);       \
T
tensor-tang 已提交
61
  __macro(cblas_sgemm_pack);        \
T
tensor-tang 已提交
62
  __macro(cblas_dgemm_pack);        \
T
tensor-tang 已提交
63
  __macro(cblas_sgemm_compute);     \
T
tensor-tang 已提交
64
  __macro(cblas_dgemm_compute);     \
T
tensor-tang 已提交
65
  __macro(cblas_sgemm_free);        \
T
tensor-tang 已提交
66
  __macro(cblas_dgemm_free);        \
T
tensor-tang 已提交
67 68
  __macro(cblas_sgemm_batch);       \
  __macro(cblas_dgemm_batch);       \
69 70
  __macro(cblas_cgemm_batch);       \
  __macro(cblas_zgemm_batch);       \
T
tensor-tang 已提交
71 72
  __macro(cblas_sdot);              \
  __macro(cblas_ddot);              \
J
Jacek Czaja 已提交
73 74
  __macro(cblas_sasum);             \
  __macro(cblas_dasum);             \
75 76
  __macro(cblas_isamax);            \
  __macro(cblas_idamax);            \
T
tensor-tang 已提交
77 78
  __macro(cblas_sscal);             \
  __macro(cblas_dscal);             \
T
tensor-tang 已提交
79 80
  __macro(vsAdd);                   \
  __macro(vdAdd);                   \
81 82
  __macro(vsSub);                   \
  __macro(vdSub);                   \
T
tensor-tang 已提交
83 84
  __macro(vsMul);                   \
  __macro(vdMul);                   \
85 86
  __macro(vsDiv);                   \
  __macro(vdDiv);                   \
T
tensor-tang 已提交
87 88
  __macro(vsExp);                   \
  __macro(vdExp);                   \
T
tensor-tang 已提交
89 90 91 92
  __macro(vsSqr);                   \
  __macro(vdSqr);                   \
  __macro(vsPowx);                  \
  __macro(vdPowx);                  \
Y
Use mkl  
Yu Yang 已提交
93 94
  __macro(vsInv);                   \
  __macro(vdInv);                   \
Y
Yihua Xu 已提交
95 96
  __macro(vmsErf);                  \
  __macro(vmdErf);                  \
T
Tao Luo 已提交
97
  __macro(MKL_Free_Buffers);        \
T
Tongxin Bai 已提交
98 99
  __macro(MKL_Set_Num_Threads);     \
  __macro(MKL_Get_Max_Threads);
100

101
MKLML_ROUTINE_EACH(PLATFORM_DECLARE_DYNAMIC_LOAD_MKLML_WRAP);
102

103 104 105 106 107
#if !defined(_WIN32)
DYNAMIC_LOAD_MKLML_WRAP(mkl_scsrmm);
DYNAMIC_LOAD_MKLML_WRAP(mkl_dcsrmm);
#endif

108 109 110 111 112
#undef DYNAMIC_LOAD_MKLML_WRAP

}  // namespace dynload
}  // namespace platform
}  // namespace paddle