Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
4a93db92
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4a93db92
编写于
12月 05, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove jit namespace
test=develop
上级
8cda28f3
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
167 addition
and
179 deletion
+167
-179
paddle/fluid/operators/attention_lstm_op.cc
paddle/fluid/operators/attention_lstm_op.cc
+8
-8
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
+3
-3
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
...le/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
+3
-3
paddle/fluid/operators/math/cpu_vec.h
paddle/fluid/operators/math/cpu_vec.h
+72
-76
paddle/fluid/operators/math/cpu_vec_test.cc
paddle/fluid/operators/math/cpu_vec_test.cc
+30
-24
paddle/fluid/operators/math/jit_code.cc
paddle/fluid/operators/math/jit_code.cc
+1
-1
paddle/fluid/operators/math/jit_code.h
paddle/fluid/operators/math/jit_code.h
+1
-1
paddle/fluid/operators/math/jit_gen.cc
paddle/fluid/operators/math/jit_gen.cc
+1
-1
paddle/fluid/operators/math/jit_kernel.cc
paddle/fluid/operators/math/jit_kernel.cc
+0
-2
paddle/fluid/operators/math/jit_kernel_blas.cc
paddle/fluid/operators/math/jit_kernel_blas.cc
+1
-2
paddle/fluid/operators/math/jit_kernel_crf_decode.cc
paddle/fluid/operators/math/jit_kernel_crf_decode.cc
+11
-13
paddle/fluid/operators/math/jit_kernel_exp.cc
paddle/fluid/operators/math/jit_kernel_exp.cc
+0
-1
paddle/fluid/operators/math/jit_kernel_layer_norm.cc
paddle/fluid/operators/math/jit_kernel_layer_norm.cc
+10
-12
paddle/fluid/operators/math/jit_kernel_macro.h
paddle/fluid/operators/math/jit_kernel_macro.h
+18
-19
paddle/fluid/operators/math/jit_kernel_test.cc
paddle/fluid/operators/math/jit_kernel_test.cc
+1
-1
paddle/fluid/platform/cpu_info.cc
paddle/fluid/platform/cpu_info.cc
+0
-2
paddle/fluid/platform/cpu_info.h
paddle/fluid/platform/cpu_info.h
+0
-3
paddle/fluid/platform/init.cc
paddle/fluid/platform/init.cc
+7
-7
未找到文件。
paddle/fluid/operators/attention_lstm_op.cc
浏览文件 @
4a93db92
...
...
@@ -231,10 +231,10 @@ use lstm_x_t as input and compute as standard LSTM.
template
<
typename
T
>
inline
void
bias_relu
(
const
int
n
,
const
T
*
x
,
const
T
*
bias
,
T
*
y
)
{
if
(
bias
)
{
math
::
vec_add_bias
<
T
,
platform
::
jit
::
avx
>
(
n
,
*
bias
,
x
,
y
);
math
::
vec_relu
<
T
,
platform
::
jit
::
avx
>
(
n
,
y
,
y
);
math
::
vec_add_bias
<
T
,
platform
::
avx
>
(
n
,
*
bias
,
x
,
y
);
math
::
vec_relu
<
T
,
platform
::
avx
>
(
n
,
y
,
y
);
}
else
{
math
::
vec_relu
<
T
,
platform
::
jit
::
avx
>
(
n
,
x
,
y
);
math
::
vec_relu
<
T
,
platform
::
avx
>
(
n
,
x
,
y
);
}
}
...
...
@@ -245,8 +245,8 @@ inline void vec_softmax(const int n, const T* x, T* y) {
for
(
int
i
=
1
;
i
<
n
;
++
i
)
{
scalar
=
scalar
<
x
[
i
]
?
x
[
i
]
:
scalar
;
}
math
::
vec_add_bias
<
T
,
platform
::
jit
::
avx
>
(
n
,
-
scalar
,
x
,
y
);
// sub
math
::
vec_exp
<
T
>
(
n
,
y
,
y
);
// exp
math
::
vec_add_bias
<
T
,
platform
::
avx
>
(
n
,
-
scalar
,
x
,
y
);
// sub
math
::
vec_exp
<
T
>
(
n
,
y
,
y
);
// exp
// sum
scalar
=
T
(
0
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
...
@@ -302,13 +302,13 @@ class AttentionLSTMKernel : public framework::OpKernel<T> {
auto
&
act_gate_str
=
ctx
.
Attr
<
std
::
string
>
(
"gate_activation"
);
auto
&
act_cell_str
=
ctx
.
Attr
<
std
::
string
>
(
"cell_activation"
);
auto
&
act_cand_str
=
ctx
.
Attr
<
std
::
string
>
(
"candidate_activation"
);
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx
))
{
math
::
VecActivations
<
T
,
platform
::
jit
::
avx
>
act_functor
;
if
(
platform
::
MayIUse
(
platform
::
avx
))
{
math
::
VecActivations
<
T
,
platform
::
avx
>
act_functor
;
act_gate
=
act_functor
(
act_gate_str
);
act_cell
=
act_functor
(
act_cell_str
);
act_cand
=
act_functor
(
act_cand_str
);
}
else
{
math
::
VecActivations
<
T
,
platform
::
jit
::
isa_any
>
act_functor
;
math
::
VecActivations
<
T
,
platform
::
isa_any
>
act_functor
;
act_gate
=
act_functor
(
act_gate_str
);
act_cell
=
act_functor
(
act_cell_str
);
act_cand
=
act_functor
(
act_cand_str
);
...
...
paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc
浏览文件 @
4a93db92
...
...
@@ -217,13 +217,13 @@ class FusedEmbeddingFCLSTMKernel : public framework::OpKernel<T> {
auto& act_gate_str = ctx.Attr<std::string>("gate_activation"); \
auto& act_cell_str = ctx.Attr<std::string>("cell_activation"); \
auto& act_cand_str = ctx.Attr<std::string>("candidate_activation"); \
if (platform::
jit::MayIUse(platform::jit::avx)) {
\
math::VecActivations<T, platform::
jit::avx> act_functor;
\
if (platform::
MayIUse(platform::avx)) {
\
math::VecActivations<T, platform::
avx> act_functor;
\
act_gate = act_functor(act_gate_str); \
act_cell = act_functor(act_cell_str); \
act_cand = act_functor(act_cand_str); \
} else { \
math::VecActivations<T, platform::
jit::isa_any> act_functor;
\
math::VecActivations<T, platform::
isa_any> act_functor;
\
act_gate = act_functor(act_gate_str); \
act_cell = act_functor(act_cell_str); \
act_cand = act_functor(act_cand_str); \
...
...
paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc
浏览文件 @
4a93db92
...
...
@@ -151,11 +151,11 @@ class FusionSeqExpandConcatFCOpKernel : public framework::OpKernel<T> {
std
::
function
<
void
(
const
int
,
const
T
*
,
T
*
)
>
fc_act
;
auto
&
fc_act_str
=
ctx
.
Attr
<
std
::
string
>
(
"fc_activation"
);
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx
))
{
math
::
VecActivations
<
T
,
platform
::
jit
::
avx
>
act_functor
;
if
(
platform
::
MayIUse
(
platform
::
avx
))
{
math
::
VecActivations
<
T
,
platform
::
avx
>
act_functor
;
fc_act
=
act_functor
(
fc_act_str
);
}
else
{
math
::
VecActivations
<
T
,
platform
::
jit
::
isa_any
>
act_functor
;
math
::
VecActivations
<
T
,
platform
::
isa_any
>
act_functor
;
fc_act
=
act_functor
(
fc_act_str
);
}
...
...
paddle/fluid/operators/math/cpu_vec.h
浏览文件 @
4a93db92
...
...
@@ -77,7 +77,7 @@ inline void vec_scal<double>(const int n, const double a, double* x) {
#endif
// MKL scal only support inplace, choose this if src and dst are not equal
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
=
platform
::
jit
::
isa_any
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
=
platform
::
isa_any
>
inline
void
vec_scal
(
const
int
n
,
const
T
a
,
const
T
*
x
,
T
*
y
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
a
*
x
[
i
];
...
...
@@ -85,12 +85,12 @@ inline void vec_scal(const int n, const T a, const T* x, T* y) {
}
template
<
>
inline
void
vec_scal
<
float
,
platform
::
jit
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_scal
<
float
,
platform
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_scal
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_scal
<
float
,
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
return
;
}
const
int
rest
=
n
%
block
;
...
...
@@ -114,24 +114,24 @@ inline void vec_scal<float, platform::jit::avx>(const int n, const float a,
y
[
i
]
=
a
*
x
[
i
];
}
#else
vec_scal
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_scal
<
float
,
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
#endif
}
template
<
>
inline
void
vec_scal
<
float
,
platform
::
jit
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_scal
<
float
,
platform
::
jit
::
avx
>
(
n
,
a
,
x
,
y
);
inline
void
vec_scal
<
float
,
platform
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_scal
<
float
,
platform
::
avx
>
(
n
,
a
,
x
,
y
);
}
template
<
>
inline
void
vec_scal
<
float
,
platform
::
jit
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_scal
<
float
,
platform
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
// TODO(TJ): enable me
vec_scal
<
float
,
platform
::
jit
::
avx2
>
(
n
,
a
,
x
,
y
);
vec_scal
<
float
,
platform
::
avx2
>
(
n
,
a
,
x
,
y
);
}
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
=
platform
::
jit
::
isa_any
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
=
platform
::
isa_any
>
inline
void
vec_bias_sub
(
const
int
n
,
const
T
a
,
const
T
*
x
,
T
*
y
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
a
-
x
[
i
];
...
...
@@ -139,12 +139,12 @@ inline void vec_bias_sub(const int n, const T a, const T* x, T* y) {
}
template
<
>
inline
void
vec_bias_sub
<
float
,
platform
::
jit
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_bias_sub
<
float
,
platform
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_bias_sub
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_bias_sub
<
float
,
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
return
;
}
const
int
rest
=
n
%
block
;
...
...
@@ -168,27 +168,25 @@ inline void vec_bias_sub<float, platform::jit::avx>(const int n, const float a,
y
[
i
]
=
a
-
x
[
i
];
}
#else
vec_bias_sub
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_bias_sub
<
float
,
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
#endif
}
template
<
>
inline
void
vec_bias_sub
<
float
,
platform
::
jit
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_bias_sub
<
float
,
platform
::
jit
::
avx
>
(
n
,
a
,
x
,
y
);
inline
void
vec_bias_sub
<
float
,
platform
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_bias_sub
<
float
,
platform
::
avx
>
(
n
,
a
,
x
,
y
);
}
template
<
>
inline
void
vec_bias_sub
<
float
,
platform
::
jit
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_bias_sub
<
float
,
platform
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
// TODO(TJ): enable me
vec_bias_sub
<
float
,
platform
::
jit
::
avx2
>
(
n
,
a
,
x
,
y
);
vec_bias_sub
<
float
,
platform
::
avx2
>
(
n
,
a
,
x
,
y
);
}
// out = x*y + (1-x)*z
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
=
platform
::
jit
::
isa_any
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
=
platform
::
isa_any
>
inline
void
vec_cross
(
const
int
n
,
const
T
*
x
,
const
T
*
y
,
const
T
*
z
,
T
*
out
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
out
[
i
]
=
x
[
i
]
*
y
[
i
]
+
(
static_cast
<
T
>
(
1
)
-
x
[
i
])
*
z
[
i
];
...
...
@@ -196,13 +194,13 @@ inline void vec_cross(const int n, const T* x, const T* y, const T* z, T* out) {
}
template
<
>
inline
void
vec_cross
<
float
,
platform
::
jit
::
avx
>
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
const
float
*
z
,
float
*
out
)
{
inline
void
vec_cross
<
float
,
platform
::
avx
>
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
const
float
*
z
,
float
*
out
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_cross
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
x
,
y
,
z
,
out
);
vec_cross
<
float
,
platform
::
isa_any
>
(
n
,
x
,
y
,
z
,
out
);
return
;
}
const
int
rest
=
n
%
block
;
...
...
@@ -228,25 +226,26 @@ inline void vec_cross<float, platform::jit::avx>(const int n, const float* x,
out
[
i
]
=
x
[
i
]
*
y
[
i
]
+
(
1.
f
-
x
[
i
])
*
z
[
i
];
}
#else
vec_cross
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
x
,
y
,
z
,
out
);
vec_cross
<
float
,
platform
::
isa_any
>
(
n
,
x
,
y
,
z
,
out
);
#endif
}
template
<
>
inline
void
vec_cross
<
float
,
platform
::
jit
::
avx2
>
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
const
float
*
z
,
float
*
out
)
{
vec_cross
<
float
,
platform
::
jit
::
avx
>
(
n
,
x
,
y
,
z
,
out
);
inline
void
vec_cross
<
float
,
platform
::
avx2
>
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
const
float
*
z
,
float
*
out
)
{
vec_cross
<
float
,
platform
::
avx
>
(
n
,
x
,
y
,
z
,
out
);
}
template
<
>
inline
void
vec_cross
<
float
,
platform
::
jit
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
const
float
*
z
,
float
*
out
)
{
inline
void
vec_cross
<
float
,
platform
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
const
float
*
z
,
float
*
out
)
{
// TODO(TJ): enable me
vec_cross
<
float
,
platform
::
jit
::
avx
>
(
n
,
x
,
y
,
z
,
out
);
vec_cross
<
float
,
platform
::
avx
>
(
n
,
x
,
y
,
z
,
out
);
}
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
=
platform
::
jit
::
isa_any
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
=
platform
::
isa_any
>
inline
void
vec_add_bias
(
const
int
n
,
const
T
a
,
const
T
*
x
,
T
*
y
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
x
[
i
]
+
a
;
...
...
@@ -254,12 +253,12 @@ inline void vec_add_bias(const int n, const T a, const T* x, T* y) {
}
template
<
>
inline
void
vec_add_bias
<
float
,
platform
::
jit
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_add_bias
<
float
,
platform
::
avx
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_add_bias
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_add_bias
<
float
,
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
return
;
}
const
int
rest
=
n
%
block
;
...
...
@@ -283,32 +282,30 @@ inline void vec_add_bias<float, platform::jit::avx>(const int n, const float a,
y
[
i
]
=
x
[
i
]
+
a
;
}
#else
vec_add_bias
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
a
,
x
,
y
);
vec_add_bias
<
float
,
platform
::
isa_any
>
(
n
,
a
,
x
,
y
);
#endif
}
template
<
>
inline
void
vec_add_bias
<
float
,
platform
::
jit
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_add_bias
<
float
,
platform
::
jit
::
avx
>
(
n
,
a
,
x
,
y
);
inline
void
vec_add_bias
<
float
,
platform
::
avx2
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
vec_add_bias
<
float
,
platform
::
avx
>
(
n
,
a
,
x
,
y
);
}
template
<
>
inline
void
vec_add_bias
<
float
,
platform
::
jit
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_add_bias
<
float
,
platform
::
avx512f
>
(
const
int
n
,
const
float
a
,
const
float
*
x
,
float
*
y
)
{
// TODO(TJ): enable me
vec_add_bias
<
float
,
platform
::
jit
::
avx2
>
(
n
,
a
,
x
,
y
);
vec_add_bias
<
float
,
platform
::
avx2
>
(
n
,
a
,
x
,
y
);
}
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
=
platform
::
jit
::
isa_any
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
=
platform
::
isa_any
>
inline
void
vec_identity
(
const
int
n
,
const
T
*
x
,
T
*
y
)
{
// do nothing
return
;
}
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
=
platform
::
jit
::
isa_any
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
=
platform
::
isa_any
>
inline
void
vec_sigmoid
(
const
int
n
,
const
T
*
x
,
T
*
y
)
{
const
T
min
=
SIGMOID_THRESHOLD_MIN
;
const
T
max
=
SIGMOID_THRESHOLD_MAX
;
...
...
@@ -323,12 +320,12 @@ inline void vec_sigmoid(const int n, const T* x, T* y) {
}
template
<
>
inline
void
vec_sigmoid
<
float
,
platform
::
jit
::
avx
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_sigmoid
<
float
,
platform
::
avx
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
)
{
vec_sigmoid
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
x
,
y
);
vec_sigmoid
<
float
,
platform
::
isa_any
>
(
n
,
x
,
y
);
return
;
}
const
int
rest
=
n
%
block
;
...
...
@@ -377,25 +374,24 @@ inline void vec_sigmoid<float, platform::jit::avx>(const int n, const float* x,
y
[
i
]
=
1.
f
/
(
1.
f
+
y
[
i
]);
}
#else
vec_sigmoid
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
x
,
y
);
vec_sigmoid
<
float
,
platform
::
isa_any
>
(
n
,
x
,
y
);
#endif
}
template
<
>
inline
void
vec_sigmoid
<
float
,
platform
::
jit
::
avx2
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
vec_sigmoid
<
float
,
platform
::
jit
::
avx
>
(
n
,
x
,
y
);
inline
void
vec_sigmoid
<
float
,
platform
::
avx2
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
vec_sigmoid
<
float
,
platform
::
avx
>
(
n
,
x
,
y
);
}
template
<
>
inline
void
vec_sigmoid
<
float
,
platform
::
jit
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_sigmoid
<
float
,
platform
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
// TODO(TJ): enable me
vec_sigmoid
<
float
,
platform
::
jit
::
avx2
>
(
n
,
x
,
y
);
vec_sigmoid
<
float
,
platform
::
avx2
>
(
n
,
x
,
y
);
}
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
=
platform
::
jit
::
isa_any
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
=
platform
::
isa_any
>
inline
void
vec_tanh
(
const
int
n
,
const
T
*
x
,
T
*
y
)
{
vec_scal
<
T
,
isa
>
(
n
,
static_cast
<
T
>
(
2
),
x
,
y
);
vec_sigmoid
<
T
,
isa
>
(
n
,
y
,
y
);
...
...
@@ -404,7 +400,7 @@ inline void vec_tanh(const int n, const T* x, T* y) {
}
// TODO(TJ): make relu clip
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
=
platform
::
jit
::
isa_any
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
=
platform
::
isa_any
>
inline
void
vec_relu
(
const
int
n
,
const
T
*
x
,
T
*
y
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
y
[
i
]
=
x
[
i
]
>
0
?
x
[
i
]
:
0
;
...
...
@@ -412,12 +408,12 @@ inline void vec_relu(const int n, const T* x, T* y) {
}
template
<
>
inline
void
vec_relu
<
float
,
platform
::
jit
::
avx
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_relu
<
float
,
platform
::
avx
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
#ifdef __AVX__
constexpr
int
block
=
YMM_FLOAT_BLOCK
;
if
(
n
<
block
*
4
)
{
vec_relu
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
x
,
y
);
vec_relu
<
float
,
platform
::
isa_any
>
(
n
,
x
,
y
);
return
;
}
...
...
@@ -441,26 +437,26 @@ inline void vec_relu<float, platform::jit::avx>(const int n, const float* x,
#undef MOVE_ONE_STEP
#else
vec_relu
<
float
,
platform
::
jit
::
isa_any
>
(
n
,
x
,
y
);
vec_relu
<
float
,
platform
::
isa_any
>
(
n
,
x
,
y
);
#endif
}
template
<
>
inline
void
vec_relu
<
float
,
platform
::
jit
::
avx2
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
vec_relu
<
float
,
platform
::
jit
::
avx
>
(
n
,
x
,
y
);
inline
void
vec_relu
<
float
,
platform
::
avx2
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
vec_relu
<
float
,
platform
::
avx
>
(
n
,
x
,
y
);
}
template
<
>
inline
void
vec_relu
<
float
,
platform
::
jit
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
inline
void
vec_relu
<
float
,
platform
::
avx512f
>
(
const
int
n
,
const
float
*
x
,
float
*
y
)
{
// TODO(TJ): enable me
vec_relu
<
float
,
platform
::
jit
::
avx2
>
(
n
,
x
,
y
);
vec_relu
<
float
,
platform
::
avx2
>
(
n
,
x
,
y
);
}
// TODO(TJ): optimize double of sigmoid, tanh and relu if necessary
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
=
platform
::
jit
::
isa_any
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
=
platform
::
isa_any
>
class
VecActivations
{
public:
std
::
function
<
void
(
const
int
,
const
T
*
,
T
*
)
>
operator
()(
...
...
paddle/fluid/operators/math/cpu_vec_test.cc
浏览文件 @
4a93db92
...
...
@@ -104,38 +104,42 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
}
TEST
(
CpuVecTest
,
sigmoid
)
{
namespace
jit
=
paddle
::
platform
::
jit
;
namespace
platform
=
paddle
::
platform
;
using
namespace
paddle
::
operators
::
math
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
jit
::
avx
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
jit
::
avx2
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
jit
::
avx512f
>
,
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx2
>
,
ref_sigmoid
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx512f
>
,
ref_sigmoid
<
float
>
);
}
TestAndBench
<
double
>
(
30
,
vec_sigmoid
<
double
>
,
ref_sigmoid
<
double
>
);
}
TEST
(
CpuVecTest
,
tanh
)
{
namespace
jit
=
paddle
::
platform
::
jit
;
namespace
platform
=
paddle
::
platform
;
using
namespace
paddle
::
operators
::
math
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
jit
::
avx
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
jit
::
avx2
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
jit
::
avx512f
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx2
>
,
ref_tanh
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx512f
>
,
ref_tanh
<
float
>
);
}
TestAndBench
<
double
>
(
30
,
vec_tanh
<
double
>
,
ref_tanh
<
double
>
);
}
TEST
(
CpuVecTest
,
relu
)
{
namespace
jit
=
paddle
::
platform
::
jit
;
namespace
platform
=
paddle
::
platform
;
using
namespace
paddle
::
operators
::
math
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
jit
::
avx
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
jit
::
avx2
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
jit
::
avx512f
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx2
>
,
ref_relu
<
float
>
);
TestAndBench
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx512f
>
,
ref_relu
<
float
>
);
}
TestAndBench
<
double
>
(
30
,
vec_relu
<
double
>
,
ref_relu
<
double
>
);
}
...
...
@@ -162,38 +166,40 @@ void TestInplace(const int n, std::function<void(const int, const T*, T*)> tgt,
}
TEST
(
CpuVecTest
,
inplace_sigmoid
)
{
namespace
jit
=
paddle
::
platform
::
jit
;
namespace
platform
=
paddle
::
platform
;
using
namespace
paddle
::
operators
::
math
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
jit
::
avx
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
jit
::
avx2
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
jit
::
avx512f
>
,
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx2
>
,
ref_sigmoid
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_sigmoid
<
float
,
platform
::
avx512f
>
,
ref_sigmoid
<
float
>
);
}
TestInplace
<
double
>
(
30
,
vec_sigmoid
<
double
>
,
ref_sigmoid
<
double
>
);
}
TEST
(
CpuVecTest
,
inplace_tanh
)
{
namespace
jit
=
paddle
::
platform
::
jit
;
namespace
platform
=
paddle
::
platform
;
using
namespace
paddle
::
operators
::
math
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
jit
::
avx
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
jit
::
avx2
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
jit
::
avx512f
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx2
>
,
ref_tanh
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_tanh
<
float
,
platform
::
avx512f
>
,
ref_tanh
<
float
>
);
}
TestInplace
<
double
>
(
30
,
vec_tanh
<
double
>
,
ref_tanh
<
double
>
);
}
TEST
(
CpuVecTest
,
inplace_relu
)
{
namespace
jit
=
paddle
::
platform
::
jit
;
namespace
platform
=
paddle
::
platform
;
using
namespace
paddle
::
operators
::
math
;
// NOLINT
for
(
auto
sz
:
{
1
,
2
,
15
,
16
,
30
,
32
,
128
,
200
,
512
})
{
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
jit
::
avx
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
jit
::
avx2
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
jit
::
avx512f
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx2
>
,
ref_relu
<
float
>
);
TestInplace
<
float
>
(
sz
,
vec_relu
<
float
,
platform
::
avx512f
>
,
ref_relu
<
float
>
);
}
TestInplace
<
double
>
(
30
,
vec_relu
<
double
>
,
ref_relu
<
double
>
);
}
paddle/fluid/operators/math/jit_code.cc
浏览文件 @
4a93db92
...
...
@@ -22,7 +22,7 @@ namespace math {
namespace
jitkernel
{
namespace
gen
{
using
namespace
platform
::
jit
;
// NOLINT
using
namespace
platform
;
// NOLINT
bool
VXXJitCode
::
init
(
int
d
,
int
scalar_index
)
{
// It's not necessary to use avx512 since it would slow down the frequency
...
...
paddle/fluid/operators/math/jit_code.h
浏览文件 @
4a93db92
...
...
@@ -179,7 +179,7 @@ class VActJitCode : public JitCode {
template
<
typename
JMM
>
void
exp_jmm
(
JMM
&
dst
,
JMM
&
src
,
int
src_idx
=
11
,
int
fx_idx
=
12
,
// NOLINT
int
fy_idx
=
13
,
int
mask_idx
=
14
,
int
tmp_idx
=
15
)
{
using
namespace
platform
::
jit
;
// NOLINT
using
namespace
platform
;
// NOLINT
// check all idx can not equal
JMM
jmm_src
=
JMM
(
src_idx
);
JMM
jmm_fx
=
JMM
(
fx_idx
);
...
...
paddle/fluid/operators/math/jit_gen.cc
浏览文件 @
4a93db92
...
...
@@ -36,7 +36,7 @@ void JitCode::preCode() {
for
(
int
i
=
0
;
i
<
num_g_abi_regs
;
++
i
)
{
push
(
Xbyak
::
Reg64
(
g_abi_regs
[
i
]));
}
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx512f
))
{
if
(
platform
::
MayIUse
(
platform
::
avx512f
))
{
mov
(
reg_EVEX_max_8b_offt
,
2
*
EVEX_max_8b_offt
);
}
}
...
...
paddle/fluid/operators/math/jit_kernel.cc
浏览文件 @
4a93db92
...
...
@@ -21,8 +21,6 @@ namespace operators {
namespace
math
{
namespace
jitkernel
{
namespace
jit
=
platform
::
jit
;
KernelPool
&
KernelPool
::
Instance
()
{
static
thread_local
KernelPool
g_jit_kernels
;
return
g_jit_kernels
;
...
...
paddle/fluid/operators/math/jit_kernel_blas.cc
浏览文件 @
4a93db92
...
...
@@ -30,7 +30,6 @@ namespace paddle {
namespace
operators
{
namespace
math
{
namespace
jitkernel
{
namespace
jit
=
platform
::
jit
;
#ifdef PADDLE_WITH_MKLML
template
<
typename
T
>
...
...
@@ -125,7 +124,7 @@ bool VMulKernelImpl<float>::useJIT(int d) {
#ifdef PADDLE_WITH_MKLML
template
<
>
bool
VMulKernelImpl
<
float
>::
useMKL
(
int
d
)
{
return
jit
::
MayIUse
(
jit
::
avx512f
)
&&
d
>
512
;
return
platform
::
MayIUse
(
platform
::
avx512f
)
&&
d
>
512
;
}
template
<
>
...
...
paddle/fluid/operators/math/jit_kernel_crf_decode.cc
浏览文件 @
4a93db92
...
...
@@ -25,10 +25,8 @@ namespace operators {
namespace
math
{
namespace
jitkernel
{
namespace
jit
=
platform
::
jit
;
/* CRF Decode JitKernel */
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
,
jit_block
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
,
jit_block
>
class
CRFDecodeKernelImpl
:
public
CRFDecodeKernel
<
T
>
{
public:
explicit
CRFDecodeKernelImpl
(
int
tag_num
)
:
CRFDecodeKernel
<
T
>
()
{
...
...
@@ -101,7 +99,7 @@ class CRFDecodeKernelImpl : public CRFDecodeKernel<T> {
#define INTRIAVX_FLOAT(block) \
template <> \
CRFDecodeKernelImpl<float,
jit::avx, block>::CRFDecodeKernelImpl(
\
CRFDecodeKernelImpl<float,
platform::avx, block>::CRFDecodeKernelImpl(
\
int tag_num) \
: CRFDecodeKernel<float>() { \
this->num_ = tag_num; \
...
...
@@ -109,7 +107,7 @@ class CRFDecodeKernelImpl : public CRFDecodeKernel<T> {
this->rest_ = this->num_ % YMM_FLOAT_BLOCK; \
} \
template <> \
void CRFDecodeKernelImpl<float,
jit::avx, block>::Compute(
\
void CRFDecodeKernelImpl<float,
platform::avx, block>::Compute(
\
const int seq_len, const float* x, const float* w, float* alpha, \
int* track) const { \
INIT_ALPHA(YMM_FLOAT_BLOCK) \
...
...
@@ -204,7 +202,7 @@ class CRFDecodeKernelImpl : public CRFDecodeKernel<T> {
#define INTRIAVX512_FLOAT(block) \
template <> \
CRFDecodeKernelImpl<float,
jit::avx512f, block>::CRFDecodeKernelImpl(
\
CRFDecodeKernelImpl<float,
platform::avx512f, block>::CRFDecodeKernelImpl(
\
int tag_num) \
: CRFDecodeKernel<float>() { \
this->num_ = tag_num; \
...
...
@@ -212,7 +210,7 @@ class CRFDecodeKernelImpl : public CRFDecodeKernel<T> {
this->rest_ = this->num_ % ZMM_FLOAT_BLOCK; \
} \
template <> \
void CRFDecodeKernelImpl<float,
jit::avx512f, block>::Compute(
\
void CRFDecodeKernelImpl<float,
platform::avx512f, block>::Compute(
\
const int seq_len, const float* x, const float* w, float* alpha, \
int* track) const { \
INIT_ALPHA(ZMM_FLOAT_BLOCK) \
...
...
@@ -270,14 +268,14 @@ INTRIAVX_FLOAT(kEQ16);
INTRIAVX_FLOAT
(
kGT16
);
#endif
#ifdef __AVX2__
INTRIAVX2_FLOAT
(
jit
::
avx2
,
kEQ8
);
INTRIAVX2_FLOAT
(
jit
::
avx2
,
kGT8LT16
);
INTRIAVX2_FLOAT
(
jit
::
avx2
,
kEQ16
);
INTRIAVX2_FLOAT
(
jit
::
avx2
,
kGT16
);
INTRIAVX2_FLOAT
(
platform
::
avx2
,
kEQ8
);
INTRIAVX2_FLOAT
(
platform
::
avx2
,
kGT8LT16
);
INTRIAVX2_FLOAT
(
platform
::
avx2
,
kEQ16
);
INTRIAVX2_FLOAT
(
platform
::
avx2
,
kGT16
);
#endif
#ifdef __AVX512F__
INTRIAVX2_FLOAT
(
jit
::
avx512f
,
kEQ8
);
INTRIAVX2_FLOAT
(
jit
::
avx512f
,
kGT8LT16
);
INTRIAVX2_FLOAT
(
platform
::
avx512f
,
kEQ8
);
INTRIAVX2_FLOAT
(
platform
::
avx512f
,
kGT8LT16
);
INTRIAVX512_FLOAT
(
kEQ16
);
INTRIAVX512_FLOAT
(
kGT16
);
#endif
...
...
paddle/fluid/operators/math/jit_kernel_exp.cc
浏览文件 @
4a93db92
...
...
@@ -29,7 +29,6 @@ namespace paddle {
namespace
operators
{
namespace
math
{
namespace
jitkernel
{
namespace
jit
=
platform
::
jit
;
#ifdef PADDLE_WITH_MKLML
// try to use MKL to speedup
...
...
paddle/fluid/operators/math/jit_kernel_layer_norm.cc
浏览文件 @
4a93db92
...
...
@@ -22,10 +22,8 @@ namespace operators {
namespace
math
{
namespace
jitkernel
{
namespace
jit
=
platform
::
jit
;
/* Layer Norm JitKernel */
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
,
jit_block
>
template
<
typename
T
,
platform
::
cpu_isa_t
isa
,
jit_block
>
class
LayerNormKernelImpl
:
public
LayerNormKernel
<
T
>
{
public:
explicit
LayerNormKernelImpl
(
int
right
)
:
LayerNormKernel
<
T
>
()
{
...
...
@@ -90,7 +88,7 @@ class LayerNormKernelImpl : public LayerNormKernel<T> {
this->end_ = this->num_ - this->rest_; \
} \
template <> \
void LayerNormKernelImpl<float,
jit::avx, block>::Compute(
\
void LayerNormKernelImpl<float,
platform::avx, block>::Compute(
\
float* x, float* out, float* mean, float* var, const float* scale, \
const float* bias, int height, const float epsilon) const { \
__m256 sum; \
...
...
@@ -219,16 +217,16 @@ class LayerNormKernelImpl : public LayerNormKernel<T> {
}
#ifdef __AVX__
INTRIAVX_FLOAT
(
jit
::
avx
,
kEQ8
);
INTRIAVX_FLOAT
(
jit
::
avx
,
kGT8LT16
);
INTRIAVX_FLOAT
(
jit
::
avx
,
kEQ16
);
INTRIAVX_FLOAT
(
jit
::
avx
,
kGT16
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kEQ8
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kGT8LT16
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kEQ16
);
INTRIAVX_FLOAT
(
platform
::
avx
,
kGT16
);
#endif
#ifdef __AVX2__
INTRIAVX_FLOAT
(
jit
::
avx2
,
kEQ8
);
INTRIAVX_FLOAT
(
jit
::
avx2
,
kGT8LT16
);
INTRIAVX_FLOAT
(
jit
::
avx2
,
kEQ16
);
INTRIAVX_FLOAT
(
jit
::
avx2
,
kGT16
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kEQ8
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kGT8LT16
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kEQ16
);
INTRIAVX_FLOAT
(
platform
::
avx2
,
kGT16
);
#endif
#undef INTRIAVX_FLOAT
...
...
paddle/fluid/operators/math/jit_kernel_macro.h
浏览文件 @
4a93db92
...
...
@@ -92,7 +92,6 @@ namespace jitkernel {
JITKERNEL_DECLARE, JITKERNEL_FIND_KEY, \
JITKERNEL_IMPL)
namespace
jit
=
platform
::
jit
;
// TODO(TJ): below defines are deprecated, would be remove recently
#define SEARCH_BLOCK(macro_, ker, dtype, isa) \
if (d < YMM_FLOAT_BLOCK) { \
...
...
@@ -107,15 +106,15 @@ namespace jit = platform::jit;
macro_(ker, dtype, isa, kGT16); \
}
#define SEARCH_ISA_BLOCK(macro_, ker, dtype) \
if (
jit::MayIUse(jit::avx512f)) {
\
SEARCH_BLOCK(macro_, ker, dtype,
jit
::avx512f); \
} else if (
jit::MayIUse(jit::avx2)) {
\
SEARCH_BLOCK(macro_, ker, dtype,
jit
::avx2); \
} else if (
jit::MayIUse(jit::avx)) {
\
SEARCH_BLOCK(macro_, ker, dtype,
jit
::avx); \
} else { \
SEARCH_BLOCK(macro_, ker, dtype,
jit
::isa_any); \
#define SEARCH_ISA_BLOCK(macro_, ker, dtype)
\
if (
platform::MayIUse(platform::avx512f)) {
\
SEARCH_BLOCK(macro_, ker, dtype,
platform
::avx512f); \
} else if (
platform::MayIUse(platform::avx2)) {
\
SEARCH_BLOCK(macro_, ker, dtype,
platform
::avx2); \
} else if (
platform::MayIUse(platform::avx)) {
\
SEARCH_BLOCK(macro_, ker, dtype,
platform
::avx); \
} else {
\
SEARCH_BLOCK(macro_, ker, dtype,
platform
::isa_any); \
}
#define JITKERNEL_KEY(ker_key, dtype_key) \
...
...
@@ -156,10 +155,10 @@ namespace jit = platform::jit;
marco_declare, macro_key, macro_impl)
#define FOR_EACH_ISA(macro_, block) \
macro_(
jit::avx512f, block);
\
macro_(
jit::avx2, block);
\
macro_(
jit::avx, block);
\
macro_(
jit
::isa_any, block)
macro_(
platform::avx512f, block);
\
macro_(
platform::avx2, block);
\
macro_(
platform::avx, block);
\
macro_(
platform
::isa_any, block)
#define FOR_EACH_BLOCK(macro_, isa) \
macro_(isa, kLT8); \
...
...
@@ -168,11 +167,11 @@ namespace jit = platform::jit;
macro_(isa, kEQ16); \
macro_(isa, kGT16)
#define FOR_EACH_ISA_BLOCK(macro_) \
FOR_EACH_BLOCK(macro_,
jit
::avx512f); \
FOR_EACH_BLOCK(macro_,
jit
::avx2); \
FOR_EACH_BLOCK(macro_,
jit
::avx); \
FOR_EACH_BLOCK(macro_,
jit
::isa_any)
#define FOR_EACH_ISA_BLOCK(macro_)
\
FOR_EACH_BLOCK(macro_,
platform
::avx512f); \
FOR_EACH_BLOCK(macro_,
platform
::avx2); \
FOR_EACH_BLOCK(macro_,
platform
::avx); \
FOR_EACH_BLOCK(macro_,
platform
::isa_any)
}
// namespace jitkernel
}
// namespace math
...
...
paddle/fluid/operators/math/jit_kernel_test.cc
浏览文件 @
4a93db92
...
...
@@ -705,7 +705,7 @@ TEST(JitKernel, pool) {
jit
::
lstm_attr_t
attr
(
frame_size
,
act_gate
,
act_cand
,
act_cell
,
false
);
// empty call it to avoid unknown flag 'use_pinned_memory' on Mac
paddle
::
platform
::
jit
::
MayIUse
(
paddle
::
platform
::
jit
::
avx
);
paddle
::
platform
::
MayIUse
(
paddle
::
platform
::
avx
);
const
auto
&
plstm1
=
jit
::
KernelPool
::
Instance
()
.
template
Get
<
jit
::
LSTMKernel
<
float
>,
const
jit
::
lstm_attr_t
&>
(
attr
);
...
...
paddle/fluid/platform/cpu_info.cc
浏览文件 @
4a93db92
...
...
@@ -123,7 +123,6 @@ size_t CUDAPinnedMaxChunkSize() {
return
CUDAPinnedMaxAllocSize
()
/
256
;
}
namespace
jit
{
#ifdef PADDLE_WITH_XBYAK
static
Xbyak
::
util
::
Cpu
cpu
;
bool
MayIUse
(
const
cpu_isa_t
cpu_isa
)
{
...
...
@@ -165,6 +164,5 @@ bool MayIUse(const cpu_isa_t cpu_isa) {
}
#endif
}
// namespace jit
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/cpu_info.h
浏览文件 @
4a93db92
...
...
@@ -39,7 +39,6 @@ size_t CUDAPinnedMinChunkSize();
//! Get the maximum chunk size for buddy allocator.
size_t
CUDAPinnedMaxChunkSize
();
namespace
jit
{
typedef
enum
{
isa_any
,
sse42
,
...
...
@@ -55,7 +54,5 @@ typedef enum {
// May I use some instruction
bool
MayIUse
(
const
cpu_isa_t
cpu_isa
);
}
// namespace jit
}
// namespace platform
}
// namespace paddle
paddle/fluid/platform/init.cc
浏览文件 @
4a93db92
...
...
@@ -116,7 +116,7 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
#endif
#if !defined(_WIN32) && !defined(__APPLE__) && !defined(__OSX__)
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx
))
{
if
(
platform
::
MayIUse
(
platform
::
avx
))
{
#ifndef __AVX__
LOG
(
WARNING
)
<<
"AVX is available, Please re-compile on local machine"
;
#endif
...
...
@@ -131,10 +131,10 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
" version or compile from source code."
#ifdef __AVX512F__
if
(
!
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx512f
))
{
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx2
))
{
if
(
!
platform
::
MayIUse
(
platform
::
avx512f
))
{
if
(
platform
::
MayIUse
(
platform
::
avx2
))
{
AVX_GUIDE
(
AVX512
,
AVX2
);
}
else
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx
))
{
}
else
if
(
platform
::
MayIUse
(
platform
::
avx
))
{
AVX_GUIDE
(
AVX512
,
AVX
);
}
else
{
AVX_GUIDE
(
AVX512
,
NonAVX
);
...
...
@@ -143,8 +143,8 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
#endif
#ifdef __AVX2__
if
(
!
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx2
))
{
if
(
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx
))
{
if
(
!
platform
::
MayIUse
(
platform
::
avx2
))
{
if
(
platform
::
MayIUse
(
platform
::
avx
))
{
AVX_GUIDE
(
AVX2
,
AVX
);
}
else
{
AVX_GUIDE
(
AVX2
,
NonAVX
);
...
...
@@ -153,7 +153,7 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
#endif
#ifdef __AVX__
if
(
!
platform
::
jit
::
MayIUse
(
platform
::
jit
::
avx
))
{
if
(
!
platform
::
MayIUse
(
platform
::
avx
))
{
AVX_GUIDE
(
AVX
,
NonAVX
);
}
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录