Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
0987f2b4
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0987f2b4
编写于
9月 28, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add vadd unit test
上级
3d928d4f
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
104 addition
and
29 deletion
+104
-29
paddle/fluid/operators/math/jit_kernel_blas.cc
paddle/fluid/operators/math/jit_kernel_blas.cc
+25
-27
paddle/fluid/operators/math/jit_kernel_test.cc
paddle/fluid/operators/math/jit_kernel_test.cc
+79
-2
未找到文件。
paddle/fluid/operators/math/jit_kernel_blas.cc
浏览文件 @
0987f2b4
...
...
@@ -75,25 +75,24 @@ namespace jit = platform::jit;
DEFINE_WITH_DTYPE(ker_key, ker_class, float, f); \
DEFINE_WITH_DTYPE(ker_key, ker_class, double, d)
// do not include lt8, eq8, eq16
#define FOR_EACH_COMMON_BLOCK(macro_, isa) \
macro_(isa, kGT8LT16) macro_(isa, kGT16)
#define FOR_EACH_ISA_COMMON_BLOCK(macro_) \
FOR_EACH_COMMON_BLOCK(macro_, jit::avx512f) \
FOR_EACH_COMMON_BLOCK(macro_, jit::avx2) \
FOR_EACH_COMMON_BLOCK(macro_, jit::avx) \
FOR_EACH_COMMON_BLOCK(macro_, jit::isa_any)
#define FOR_EACH_ALL_BLOCK(macro_, isa) \
macro_(isa, kLT8) macro_(isa, kEQ8) macro_(isa, kGT8LT16) macro_(isa, kEQ16) \
macro_(isa, kGT16)
#define FOR_EACH_ISA_ALL_BLOCK(macro_) \
FOR_EACH_ALL_BLOCK(macro_, jit::avx512f) \
FOR_EACH_ALL_BLOCK(macro_, jit::avx2) \
FOR_EACH_ALL_BLOCK(macro_, jit::avx) \
FOR_EACH_ALL_BLOCK(macro_, jit::isa_any)
#define FOR_EACH_ISA(macro_, block) \
macro_(jit::avx512f, block); \
macro_(jit::avx2, block); \
macro_(jit::avx, block); \
macro_(jit::isa_any, block)
#define FOR_EACH_BLOCK(macro_, isa) \
macro_(isa, kLT8); \
macro_(isa, kEQ8); \
macro_(isa, kGT8LT16); \
macro_(isa, kEQ16); \
macro_(isa, kGT16)
#define FOR_EACH_ISA_BLOCK(macro_) \
FOR_EACH_BLOCK(macro_, jit::avx512f); \
FOR_EACH_BLOCK(macro_, jit::avx2); \
FOR_EACH_BLOCK(macro_, jit::avx); \
FOR_EACH_BLOCK(macro_, jit::isa_any)
/* VMUL JitKernel */
template
<
typename
T
,
platform
::
jit
::
cpu_isa_t
isa
,
jit_block
>
...
...
@@ -121,8 +120,8 @@ class VMulKernelImpl : public VMulKernel<T> {
platform::dynload::vdMul(n, x, y, z); \
}
FOR_EACH_ISA
_COMMON_BLOCK
(
VMUL_MKL_FLOAT
);
FOR_EACH_ISA_
ALL_
BLOCK
(
VMUL_MKL_DOUBLE
);
FOR_EACH_ISA
(
VMUL_MKL_FLOAT
,
kGT16
);
FOR_EACH_ISA_BLOCK
(
VMUL_MKL_DOUBLE
);
#endif
#define VMUL_INTRI8_FLOAT(isa) \
...
...
@@ -178,8 +177,8 @@ class VAddKernelImpl : public VAddKernel<T> {
platform::dynload::vdAdd(n, x, y, z); \
}
FOR_EACH_ISA
_COMMON_BLOCK
(
VADD_MKL_FLOAT
);
FOR_EACH_ISA_
ALL_
BLOCK
(
VADD_MKL_DOUBLE
);
FOR_EACH_ISA
(
VADD_MKL_FLOAT
,
kGT16
);
FOR_EACH_ISA_BLOCK
(
VADD_MKL_DOUBLE
);
#endif
#define VADD_INTRI8_FLOAT(isa) \
...
...
@@ -210,10 +209,9 @@ VADD_INTRI8_FLOAT(jit::avx512f);
REGISTER_BLAS_JITKERNEL
(
vmul
,
VMulKernel
);
REGISTER_BLAS_JITKERNEL
(
vadd
,
VAddKernel
);
#undef FOR_EACH_ISA_ALL_BLOCK
#undef FOR_EACH_ALL_BLOCK
#undef FOR_EACH_ISA_COMMON_BLOCK
#undef FOR_EACH_COMMON_BLOCK
#undef FOR_EACH_ISA
#undef FOR_EACH_BLOCK
#undef FOR_EACH_ISA_BLOCK
#undef REGISTER_BLAS_JITKERNEL
#undef DEFINE_WITH_DTYPE
#undef SEARCH_ISA_BLOCK
...
...
paddle/fluid/operators/math/jit_kernel_test.cc
浏览文件 @
0987f2b4
...
...
@@ -79,12 +79,10 @@ TEST(JitKernel, vmul) {
RandomVec
<
float
>
(
d
,
y
.
data
());
const
auto
&
ker
=
jit
::
KernelPool
::
Instance
().
template
Get
<
jit
::
VMulKernel
<
float
>
>
(
d
);
const
float
*
x_data
=
x
.
data
();
const
float
*
y_data
=
y
.
data
();
float
*
ztgt_data
=
ztgt
.
data
();
float
*
zref_data
=
zref
.
data
();
auto
trefs
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
vmul_ref
(
d
,
x_data
,
y_data
,
zref_data
);
...
...
@@ -129,6 +127,85 @@ TEST(JitKernel, vmul) {
}
}
void
vadd_ref
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
float
*
z
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
z
[
i
]
=
x
[
i
]
+
y
[
i
];
}
}
#if defined __AVX__ || defined __AVX2__
void
vadd_intri8
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
float
*
z
)
{
__m256
tmpx
,
tmpy
;
tmpx
=
_mm256_loadu_ps
(
x
);
tmpy
=
_mm256_loadu_ps
(
y
);
tmpx
=
_mm256_add_ps
(
tmpx
,
tmpy
);
_mm256_storeu_ps
(
z
,
tmpx
);
}
#endif
#ifdef PADDLE_WITH_MKLML
void
vadd_mkl
(
const
int
n
,
const
float
*
x
,
const
float
*
y
,
float
*
z
)
{
paddle
::
platform
::
dynload
::
vsAdd
(
n
,
x
,
y
,
z
);
}
#endif
TEST
(
JitKernel
,
vadd
)
{
namespace
jit
=
paddle
::
operators
::
math
::
jitkernel
;
for
(
int
d
:
{
7
,
8
,
15
,
16
,
30
,
256
,
512
})
{
std
::
vector
<
float
>
x
(
d
),
y
(
d
);
std
::
vector
<
float
>
zref
(
d
),
ztgt
(
d
);
RandomVec
<
float
>
(
d
,
x
.
data
());
RandomVec
<
float
>
(
d
,
y
.
data
());
const
auto
&
ker
=
jit
::
KernelPool
::
Instance
().
template
Get
<
jit
::
VAddKernel
<
float
>
>
(
d
);
const
float
*
x_data
=
x
.
data
();
const
float
*
y_data
=
y
.
data
();
float
*
ztgt_data
=
ztgt
.
data
();
float
*
zref_data
=
zref
.
data
();
auto
trefs
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
vadd_ref
(
d
,
x_data
,
y_data
,
zref_data
);
}
auto
trefe
=
GetCurrentUS
();
#ifdef PADDLE_WITH_MKLML
auto
tmkls
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
vadd_mkl
(
d
,
x_data
,
y_data
,
zref_data
);
}
auto
tmkle
=
GetCurrentUS
();
#endif
#if defined __AVX__ || defined __AVX2__
if
(
d
==
8
)
{
auto
si0
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
vadd_intri8
(
d
,
x_data
,
y_data
,
zref_data
);
}
auto
si1
=
GetCurrentUS
();
VLOG
(
3
)
<<
"Vec size 8 intr takes: "
<<
(
si1
-
si0
)
/
repeat
;
}
#endif
auto
ttgts
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
repeat
;
++
i
)
{
ker
->
Compute
(
d
,
x_data
,
y_data
,
ztgt_data
);
}
auto
ttgte
=
GetCurrentUS
();
VLOG
(
3
)
<<
"Vec size "
<<
d
<<
": refer takes: "
<<
(
trefe
-
trefs
)
/
repeat
#ifdef PADDLE_WITH_MKLML
<<
" us, mkl takes: "
<<
(
tmkle
-
tmkls
)
/
repeat
<<
" us, "
#else
<<
" us, "
#endif
<<
"tgt takes: "
<<
(
ttgte
-
ttgts
)
/
repeat
;
for
(
int
i
=
0
;
i
<
d
;
++
i
)
{
EXPECT_NEAR
(
ztgt_data
[
i
],
zref_data
[
i
],
1e-3
);
}
}
}
TEST
(
JitKernel
,
pool
)
{
namespace
jit
=
paddle
::
operators
::
math
::
jitkernel
;
const
int
frame_size
=
4
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录