Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
266e625d
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
266e625d
编写于
1月 21, 2019
作者:
T
tensor-tang
提交者:
GitHub
1月 21, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #15399 from tensor-tang/refine/seqpool/fc
fix cpu jitkernel test and refine benchmark test
上级
885c4e57
316e44b1
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
118 addition
and
43 deletion
+118
-43
paddle/fluid/operators/jit/benchmark.cc
paddle/fluid/operators/jit/benchmark.cc
+98
-33
paddle/fluid/operators/jit/test.cc
paddle/fluid/operators/jit/test.cc
+20
-10
未找到文件。
paddle/fluid/operators/jit/benchmark.cc
浏览文件 @
266e625d
...
@@ -22,10 +22,54 @@
...
@@ -22,10 +22,54 @@
#include "paddle/fluid/platform/device_tracer.h"
#include "paddle/fluid/platform/device_tracer.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/platform/variant.h" // for UNUSED
DEFINE_int32
(
burning
,
10
,
"Burning times."
);
DEFINE_int32
(
burning
,
10
,
"Burning times."
);
DEFINE_int32
(
repeat
,
3000
,
"Repeat times."
);
DEFINE_int32
(
repeat
,
3000
,
"Repeat times."
);
DEFINE_int32
(
max_size
,
1000
,
"The Max size would be tested."
);
DEFINE_int32
(
max_size
,
1000
,
"The Max size would be tested."
);
DEFINE_string
(
filter
,
""
,
"The Benchmark name would be run."
);
class
BenchJITKernel
{
public:
BenchJITKernel
()
=
default
;
virtual
~
BenchJITKernel
()
=
default
;
virtual
void
Run
()
=
0
;
virtual
const
char
*
Name
()
=
0
;
virtual
const
char
*
Dtype
()
=
0
;
virtual
const
char
*
Place
()
=
0
;
};
static
std
::
vector
<
BenchJITKernel
*>
g_all_benchmarks
;
BenchJITKernel
*
InsertBenchmark
(
BenchJITKernel
*
b
)
{
g_all_benchmarks
.
push_back
(
b
);
return
b
;
}
#define BENCH_JITKERNEL(name, dtype, place) \
class BenchJITKernel_##name##_##dtype##_##place##_ : public BenchJITKernel { \
public: \
const char* Name() override { return #name; } \
const char* Dtype() override { return #dtype; } \
const char* Place() override { return #place; } \
void Run() override; \
}; \
static auto inserted_##name##_##dtype##_##place##_ UNUSED = \
InsertBenchmark(new BenchJITKernel_##name##_##dtype##_##place##_()); \
void BenchJITKernel_##name##_##dtype##_##place##_::Run()
#define BENCH_FP32_CPU(name) BENCH_JITKERNEL(name, FP32, CPU)
void
RUN_ALL_BENCHMARK
()
{
for
(
auto
p
:
g_all_benchmarks
)
{
if
(
!
FLAGS_filter
.
empty
()
&&
FLAGS_filter
!=
p
->
Name
())
{
continue
;
}
LOG
(
INFO
)
<<
"Benchmark "
<<
p
->
Name
()
<<
"."
<<
p
->
Dtype
()
<<
"."
<<
p
->
Place
();
p
->
Run
();
}
}
template
<
typename
T
>
template
<
typename
T
>
void
RandomVec
(
const
int
n
,
T
*
a
,
const
T
lower
=
static_cast
<
T
>
(
-
20.
f
),
void
RandomVec
(
const
int
n
,
T
*
a
,
const
T
lower
=
static_cast
<
T
>
(
-
20.
f
),
...
@@ -228,49 +272,70 @@ void BenchMatMulKernel() {
...
@@ -228,49 +272,70 @@ void BenchMatMulKernel() {
}
}
}
}
using
T
=
float
;
using
PlaceType
=
paddle
::
platform
::
CPUPlace
;
// xyzn
BENCH_FP32_CPU
(
kVMul
)
{
BenchXYZNKernel
<
jit
::
kVMul
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kVAdd
)
{
BenchXYZNKernel
<
jit
::
kVAdd
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kVAddRelu
)
{
BenchXYZNKernel
<
jit
::
kVAddRelu
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kVSub
)
{
BenchXYZNKernel
<
jit
::
kVSub
,
T
,
PlaceType
>
();
}
// axyn
BENCH_FP32_CPU
(
kVScal
)
{
BenchAXYNKernel
<
jit
::
kVScal
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kVAddBias
)
{
BenchAXYNKernel
<
jit
::
kVAddBias
,
T
,
PlaceType
>
();
}
// xyn
BENCH_FP32_CPU
(
kVRelu
)
{
BenchXYNKernel
<
jit
::
kVRelu
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kVIdentity
)
{
BenchXYNKernel
<
jit
::
kVIdentity
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kVSquare
)
{
BenchXYNKernel
<
jit
::
kVSquare
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kVExp
)
{
BenchXYNKernel
<
jit
::
kVExp
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kVSigmoid
)
{
BenchXYNKernel
<
jit
::
kVSigmoid
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kVTanh
)
{
BenchXYNKernel
<
jit
::
kVTanh
,
T
,
PlaceType
>
();
}
// lstm and peephole
BENCH_FP32_CPU
(
kLSTMCtHt
)
{
BenchLSTMKernel
<
jit
::
kLSTMCtHt
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kLSTMC1H1
)
{
BenchLSTMKernel
<
jit
::
kLSTMC1H1
,
T
,
PlaceType
>
();
}
// gru functions
BENCH_FP32_CPU
(
kGRUH1
)
{
BenchGRUKernel
<
jit
::
kGRUH1
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kGRUHtPart1
)
{
BenchGRUKernel
<
jit
::
kGRUHtPart1
,
T
,
PlaceType
>
();
}
BENCH_FP32_CPU
(
kGRUHtPart2
)
{
BenchGRUKernel
<
jit
::
kGRUHtPart2
,
T
,
PlaceType
>
();
}
// seq pool function
BENCH_FP32_CPU
(
kSeqPool
)
{
BenchSeqPoolKernel
<
jit
::
kSeqPool
,
T
,
PlaceType
>
();
}
// matmul
BENCH_FP32_CPU
(
kMatMul
)
{
BenchMatMulKernel
<
jit
::
kMatMul
,
T
,
PlaceType
>
();
}
// Benchmark all jit kernels including jitcode, mkl and refer.
// Benchmark all jit kernels including jitcode, mkl and refer.
// To use this tool, run command: ./benchmark [options...]
// To use this tool, run command: ./benchmark [options...]
// Options:
// Options:
// --burning: the burning time before count
// --burning: the burning time before count
// --repeat: the repeat times
// --repeat: the repeat times
// --max_size: the max size would be tested
// --max_size: the max size would be tested
// --filter: the bench name would be run
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
google
::
InitGoogleLogging
(
argv
[
0
]);
google
::
InitGoogleLogging
(
argv
[
0
]);
LOG
(
INFO
)
<<
"Burning "
<<
FLAGS_burning
<<
" times, Repeat "
<<
FLAGS_repeat
LOG
(
INFO
)
<<
"Burning "
<<
FLAGS_burning
<<
" times, Repeat "
<<
FLAGS_repeat
<<
" times."
;
<<
" times."
;
using
T
=
float
;
using
PlaceType
=
paddle
::
platform
::
CPUPlace
;
// xyzn
BenchXYZNKernel
<
jit
::
kVMul
,
T
,
PlaceType
>
();
BenchXYZNKernel
<
jit
::
kVAdd
,
T
,
PlaceType
>
();
BenchXYZNKernel
<
jit
::
kVAddRelu
,
T
,
PlaceType
>
();
BenchXYZNKernel
<
jit
::
kVSub
,
T
,
PlaceType
>
();
// axyn
BenchAXYNKernel
<
jit
::
kVScal
,
T
,
PlaceType
>
();
BenchAXYNKernel
<
jit
::
kVAddBias
,
T
,
PlaceType
>
();
// xyn
BenchXYNKernel
<
jit
::
kVRelu
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
kVIdentity
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
kVSquare
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
kVExp
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
kVSigmoid
,
T
,
PlaceType
>
();
BenchXYNKernel
<
jit
::
kVTanh
,
T
,
PlaceType
>
();
// lstm and peephole
BenchLSTMKernel
<
jit
::
kLSTMCtHt
,
T
,
PlaceType
>
();
BenchLSTMKernel
<
jit
::
kLSTMC1H1
,
T
,
PlaceType
>
();
// gru functions
BenchGRUKernel
<
jit
::
kGRUH1
,
T
,
PlaceType
>
();
BenchGRUKernel
<
jit
::
kGRUHtPart1
,
T
,
PlaceType
>
();
BenchGRUKernel
<
jit
::
kGRUHtPart2
,
T
,
PlaceType
>
();
// seq pool function
BenchSeqPoolKernel
<
jit
::
kSeqPool
,
T
,
PlaceType
>
();
// matmul
RUN_ALL_BENCHMARK
();
BenchMatMulKernel
<
jit
::
kMatMul
,
T
,
PlaceType
>
();
}
}
paddle/fluid/operators/jit/test.cc
浏览文件 @
266e625d
...
@@ -22,6 +22,8 @@
...
@@ -22,6 +22,8 @@
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/cpu_info.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
static
double
acc
=
1e-5
;
template
<
typename
T
>
template
<
typename
T
>
void
RandomVec
(
const
int
n
,
T
*
a
,
const
T
lower
=
static_cast
<
T
>
(
-
20.
f
),
void
RandomVec
(
const
int
n
,
T
*
a
,
const
T
lower
=
static_cast
<
T
>
(
-
20.
f
),
const
T
upper
=
static_cast
<
T
>
(
20.
f
))
{
const
T
upper
=
static_cast
<
T
>
(
20.
f
))
{
...
@@ -37,7 +39,7 @@ template <typename T>
...
@@ -37,7 +39,7 @@ template <typename T>
void
ExpectEQ
(
const
T
*
target
,
const
T
*
refer
,
int
n
)
{
void
ExpectEQ
(
const
T
*
target
,
const
T
*
refer
,
int
n
)
{
if
(
std
::
is_floating_point
<
T
>::
value
)
{
if
(
std
::
is_floating_point
<
T
>::
value
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_NEAR
(
target
[
i
],
refer
[
i
],
1e-5
);
EXPECT_NEAR
(
target
[
i
],
refer
[
i
],
acc
);
}
}
}
else
{
}
else
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
@@ -62,7 +64,9 @@ namespace jit = paddle::operators::jit;
...
@@ -62,7 +64,9 @@ namespace jit = paddle::operators::jit;
template
<
typename
KernelTuples
,
typename
...
Args
>
template
<
typename
KernelTuples
,
typename
...
Args
>
struct
TestFuncWithRefer
{
struct
TestFuncWithRefer
{
void
operator
()(
const
typename
KernelTuples
::
func_type
tgt
,
Args
...
args
)
{}
void
operator
()(
const
typename
KernelTuples
::
func_type
tgt
,
Args
...
args
)
{
LOG
(
FATAL
)
<<
"Should specify this function."
;
}
};
};
template
<
typename
T
>
template
<
typename
T
>
...
@@ -140,7 +144,8 @@ struct TestFuncWithRefer<jit::XYNTuples<T>, std::vector<T>, std::vector<T>> {
...
@@ -140,7 +144,8 @@ struct TestFuncWithRefer<jit::XYNTuples<T>, std::vector<T>, std::vector<T>> {
template
<
typename
T
>
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
LSTMTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
struct
TestFuncWithRefer
<
jit
::
LSTMTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>>
{
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
typename
jit
::
LSTMTuples
<
T
>::
attr_type
>
{
void
operator
()(
const
typename
jit
::
LSTMTuples
<
T
>::
func_type
tgt
,
void
operator
()(
const
typename
jit
::
LSTMTuples
<
T
>::
func_type
tgt
,
const
std
::
vector
<
T
>&
xsrc
,
const
std
::
vector
<
T
>&
wp
,
const
std
::
vector
<
T
>&
xsrc
,
const
std
::
vector
<
T
>&
wp
,
const
std
::
vector
<
T
>&
ct_1
,
const
std
::
vector
<
T
>&
ct_ref
,
const
std
::
vector
<
T
>&
ct_1
,
const
std
::
vector
<
T
>&
ct_ref
,
...
@@ -185,7 +190,8 @@ struct TestFuncWithRefer<jit::LSTMTuples<T>, std::vector<T>, std::vector<T>,
...
@@ -185,7 +190,8 @@ struct TestFuncWithRefer<jit::LSTMTuples<T>, std::vector<T>, std::vector<T>,
template
<
typename
T
>
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
GRUTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
struct
TestFuncWithRefer
<
jit
::
GRUTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>>
{
std
::
vector
<
T
>
,
typename
jit
::
GRUTuples
<
T
>::
attr_type
>
{
void
operator
()(
const
typename
jit
::
GRUTuples
<
T
>::
func_type
tgt
,
void
operator
()(
const
typename
jit
::
GRUTuples
<
T
>::
func_type
tgt
,
const
std
::
vector
<
T
>&
xsrc
,
const
std
::
vector
<
T
>&
ht_1
,
const
std
::
vector
<
T
>&
xsrc
,
const
std
::
vector
<
T
>&
ht_1
,
const
std
::
vector
<
T
>&
ht_ref
,
const
std
::
vector
<
T
>&
ht_ref
,
...
@@ -212,8 +218,8 @@ struct TestFuncWithRefer<jit::GRUTuples<T>, std::vector<T>, std::vector<T>,
...
@@ -212,8 +218,8 @@ struct TestFuncWithRefer<jit::GRUTuples<T>, std::vector<T>, std::vector<T>,
};
};
template
<
typename
T
>
template
<
typename
T
>
struct
TestFuncWithRefer
<
jit
::
SeqPoolTuples
<
T
>
,
std
::
vector
<
T
>
,
struct
TestFuncWithRefer
<
jit
::
SeqPoolTuples
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
,
std
::
vector
<
T
>
>
{
typename
jit
::
SeqPoolTuples
<
T
>::
attr_type
>
{
void
operator
()(
const
typename
jit
::
SeqPoolTuples
<
T
>::
func_type
tgt
,
void
operator
()(
const
typename
jit
::
SeqPoolTuples
<
T
>::
func_type
tgt
,
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
T
>&
yref
,
const
std
::
vector
<
T
>&
x
,
const
std
::
vector
<
T
>&
yref
,
const
typename
jit
::
SeqPoolTuples
<
T
>::
attr_type
&
attr
)
{
const
typename
jit
::
SeqPoolTuples
<
T
>::
attr_type
&
attr
)
{
...
@@ -385,8 +391,8 @@ void TestLSTMKernel() {
...
@@ -385,8 +391,8 @@ void TestLSTMKernel() {
std
::
vector
<
T
>
xsrc
(
4
*
d
),
wp
(
3
*
d
),
ct_1
(
d
);
std
::
vector
<
T
>
xsrc
(
4
*
d
),
wp
(
3
*
d
),
ct_1
(
d
);
std
::
vector
<
T
>
ct_ref
(
d
),
ht_ref
(
d
),
checked
(
2
*
d
);
std
::
vector
<
T
>
ct_ref
(
d
),
ht_ref
(
d
),
checked
(
2
*
d
);
RandomVec
<
T
>
(
4
*
d
,
xsrc
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
4
*
d
,
xsrc
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
3
*
d
,
wp
.
data
(),
-
2.
f
,
2
.
f
);
RandomVec
<
T
>
(
3
*
d
,
wp
.
data
(),
-
1.
f
,
1
.
f
);
RandomVec
<
T
>
(
d
,
ct_1
.
data
(),
-
2.
f
,
2
.
f
);
RandomVec
<
T
>
(
d
,
ct_1
.
data
(),
-
1.
f
,
1
.
f
);
// x could be changed after compute, so copy to save src
// x could be changed after compute, so copy to save src
std
::
vector
<
T
>
x
(
xsrc
.
size
());
std
::
vector
<
T
>
x
(
xsrc
.
size
());
std
::
copy
(
xsrc
.
begin
(),
xsrc
.
end
(),
x
.
begin
());
std
::
copy
(
xsrc
.
begin
(),
xsrc
.
end
(),
x
.
begin
());
...
@@ -481,14 +487,17 @@ void TestSeqPoolKernel() {
...
@@ -481,14 +487,17 @@ void TestSeqPoolKernel() {
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
void
TestMatMulKernel
()
{
void
TestMatMulKernel
()
{
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
VLOG
(
10
)
<<
"===== Test JITKernel "
<<
jit
::
to_string
(
KT
);
auto
last_acc
=
acc
;
// TODO(intel): this should be acc issue of MKL
acc
=
1e-3
;
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
for
(
int
m
:
{
1
,
2
,
3
,
4
})
{
for
(
int
n
:
{
1
,
2
,
3
,
4
})
{
for
(
int
n
:
{
1
,
2
,
3
,
4
})
{
for
(
int
k
:
TestSizes
())
{
for
(
int
k
:
TestSizes
())
{
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
MatMulTuples
<
T
>>
();
auto
ref
=
jit
::
GetRefer
<
KT
,
jit
::
MatMulTuples
<
T
>>
();
EXPECT_TRUE
(
ref
!=
nullptr
);
EXPECT_TRUE
(
ref
!=
nullptr
);
std
::
vector
<
T
>
a
(
m
*
k
),
b
(
k
*
n
),
c
(
m
*
n
);
std
::
vector
<
T
>
a
(
m
*
k
),
b
(
k
*
n
),
c
(
m
*
n
);
RandomVec
<
T
>
(
m
*
k
,
a
.
data
(),
-
0.2
f
,
0.2
f
);
RandomVec
<
T
>
(
m
*
k
,
a
.
data
(),
-
2.
f
,
2.
f
);
RandomVec
<
T
>
(
k
*
n
,
b
.
data
(),
-
0.2
f
,
0.2
f
);
RandomVec
<
T
>
(
k
*
n
,
b
.
data
(),
-
2.
f
,
2.
f
);
const
T
*
a_data
=
a
.
data
();
const
T
*
a_data
=
a
.
data
();
const
T
*
b_data
=
b
.
data
();
const
T
*
b_data
=
b
.
data
();
T
*
c_data
=
c
.
data
();
T
*
c_data
=
c
.
data
();
...
@@ -498,6 +507,7 @@ void TestMatMulKernel() {
...
@@ -498,6 +507,7 @@ void TestMatMulKernel() {
}
}
}
}
}
}
acc
=
last_acc
;
}
}
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
template
<
paddle
::
operators
::
jit
::
KernelType
KT
,
typename
T
,
typename
PlaceType
>
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录