Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2c5c6365
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2c5c6365
编写于
9月 21, 2019
作者:
P
pawelpiotrowicz
提交者:
Tao Luo
9月 21, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add two extra flags for test_analyzer_int8_image_classification to disable fp32/int8 (#19840)
test=develop
上级
cb65439d
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
78 addition
and
50 deletion
+78
-50
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+78
-50
未找到文件。
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
2c5c6365
...
@@ -42,6 +42,8 @@ DEFINE_string(infer_model, "", "model path");
...
@@ -42,6 +42,8 @@ DEFINE_string(infer_model, "", "model path");
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_string
(
refer_result
,
""
,
"reference result for comparison"
);
DEFINE_string
(
refer_result
,
""
,
"reference result for comparison"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size"
);
DEFINE_bool
(
enable_fp32
,
true
,
"Enable FP32 type prediction"
);
DEFINE_bool
(
enable_int8
,
true
,
"Enable INT8 type prediction"
);
DEFINE_int32
(
warmup_batch_size
,
100
,
"batch size for quantization warmup"
);
DEFINE_int32
(
warmup_batch_size
,
100
,
"batch size for quantization warmup"
);
// setting iterations to 0 means processing the whole dataset
// setting iterations to 0 means processing the whole dataset
DEFINE_int32
(
iterations
,
0
,
"number of batches to process"
);
DEFINE_int32
(
iterations
,
0
,
"number of batches to process"
);
...
@@ -482,68 +484,88 @@ void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8,
...
@@ -482,68 +484,88 @@ void SummarizeAccuracy(float avg_acc_fp32, float avg_acc_int8,
<<
std
::
setprecision
(
4
)
<<
avg_acc_int8
;
<<
std
::
setprecision
(
4
)
<<
avg_acc_int8
;
}
}
void
SummarizePerformance
(
const
char
*
title
,
float
sample
)
{
CHECK_GT
(
sample
,
0.0
);
auto
throughput
=
1000.0
/
sample
;
LOG
(
INFO
)
<<
title
<<
": avg fps: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
throughput
<<
", avg latency: "
<<
sample
<<
" ms"
;
}
void
SummarizePerformance
(
float
sample_latency_fp32
,
void
SummarizePerformance
(
float
sample_latency_fp32
,
float
sample_latency_int8
)
{
float
sample_latency_int8
)
{
// sample latency in ms
if
(
FLAGS_enable_fp32
)
SummarizePerformance
(
"FP32"
,
sample_latency_fp32
);
auto
throughput_fp32
=
1000.0
/
sample_latency_fp32
;
if
(
FLAGS_enable_int8
)
SummarizePerformance
(
"INT8"
,
sample_latency_int8
);
auto
throughput_int8
=
1000.0
/
sample_latency_int8
;
LOG
(
INFO
)
<<
"--- Performance summary --- "
;
LOG
(
INFO
)
<<
"FP32: avg fps: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
throughput_fp32
<<
", avg latency: "
<<
sample_latency_fp32
<<
" ms"
;
LOG
(
INFO
)
<<
"INT8: avg fps: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
throughput_int8
<<
", avg latency: "
<<
sample_latency_int8
<<
" ms"
;
}
}
void
CompareAccuracy
(
float
CompareAccuracyOne
(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots_quant
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots_ref
,
int
compared_idx
)
{
int
compared_idx
)
{
if
(
output_slots
_quant
.
size
()
==
0
||
output_slots_ref
.
size
()
==
0
)
if
(
output_slots
.
size
()
==
0
)
throw
std
::
invalid_argument
(
throw
std
::
invalid_argument
(
"CompareAccuracy: output_slots vector is empty."
);
"CompareAccuracy: output_slots vector is empty."
);
float
total_accs_quant
{
0
};
float
total_accs
{
0
};
float
total_accs_ref
{
0
};
for
(
size_t
i
=
0
;
i
<
output_slots_quant
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
output_slots
.
size
();
++
i
)
{
if
(
compared_idx
==
1
)
{
switch
(
compared_idx
)
{
PADDLE_ENFORCE_GE
(
case
1
:
output_slots_quant
[
i
].
size
(),
2UL
,
PADDLE_ENFORCE_GE
(
"To achieve top 1 accuracy, output_slots_quant[i].size()>=2"
);
output_slots
[
i
].
size
(),
2UL
,
PADDLE_ENFORCE_GE
(
"To achieve top 1 accuracy, output_slots_quant[i].size()>=2"
);
output_slots_ref
[
i
].
size
(),
2UL
,
break
;
"To achieve top 1 accuracy, output_slots_ref[i].size()>=2"
);
case
2
:
}
else
if
(
compared_idx
==
2
)
{
PADDLE_ENFORCE_GE
(
PADDLE_ENFORCE_GE
(
output_slots_quant
[
i
].
size
(),
3UL
,
output_slots
[
i
].
size
(),
2UL
,
"To achieve mAP, output_slots_quant[i].size()>=3"
);
"To achieve top 1 accuracy, output_slots_ref[i].size()>=2"
);
PADDLE_ENFORCE_GE
(
output_slots_ref
[
i
].
size
(),
3UL
,
break
;
"To achieve mAP, output_slots_ref[i].size()>=3"
);
default:
}
else
{
throw
std
::
invalid_argument
(
throw
std
::
invalid_argument
(
"CompareAccuracy: compared_idx is out of range."
);
"CompareAccuracy: compared_idx is out of range."
);
}
}
if
(
output_slots_quant
[
i
][
compared_idx
].
lod
.
size
()
>
0
||
if
(
output_slots
[
i
][
compared_idx
].
lod
.
size
()
>
0
)
output_slots_ref
[
i
][
compared_idx
].
lod
.
size
()
>
0
)
throw
std
::
invalid_argument
(
"CompareAccuracy: output has nonempty LoD."
);
throw
std
::
invalid_argument
(
"CompareAccuracy: output has nonempty LoD."
);
if
(
output_slots_quant
[
i
][
compared_idx
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
||
if
(
output_slots
[
i
][
compared_idx
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
)
output_slots_ref
[
i
][
compared_idx
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
)
throw
std
::
invalid_argument
(
throw
std
::
invalid_argument
(
"CompareAccuracy: output is of a wrong type."
);
"CompareAccuracy: output is of a wrong type."
);
total_accs_quant
+=
*
static_cast
<
float
*>
(
output_slots_quant
[
i
][
compared_idx
].
data
.
data
());
total_accs
+=
total_accs_ref
+=
*
static_cast
<
float
*>
(
output_slots
[
i
][
compared_idx
].
data
.
data
());
*
static_cast
<
float
*>
(
output_slots_ref
[
i
][
compared_idx
].
data
.
data
());
}
}
float
avg_acc_quant
=
total_accs_quant
/
output_slots_quant
.
size
();
float
avg_acc_ref
=
total_accs_ref
/
output_slots_ref
.
size
();
CHECK_GT
(
output_slots
.
size
(),
0
);
return
total_accs
/
output_slots
.
size
();
}
void
CompareAccuracy
(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots_quant
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots_ref
,
int
compared_idx
)
{
if
((
FLAGS_enable_fp32
&&
FLAGS_enable_int8
)
&&
(
output_slots_quant
.
size
()
==
0
||
output_slots_ref
.
size
())
==
0
)
throw
std
::
invalid_argument
(
"CompareAccuracy: output_slots vector is empty."
);
float
avg_acc_quant
=
0.0
;
float
avg_acc_ref
=
0.0
;
if
(
FLAGS_enable_int8
)
avg_acc_quant
=
CompareAccuracyOne
(
output_slots_quant
,
compared_idx
);
if
(
FLAGS_enable_fp32
)
avg_acc_ref
=
CompareAccuracyOne
(
output_slots_ref
,
compared_idx
);
SummarizeAccuracy
(
avg_acc_ref
,
avg_acc_quant
,
compared_idx
);
SummarizeAccuracy
(
avg_acc_ref
,
avg_acc_quant
,
compared_idx
);
CHECK_GT
(
avg_acc_ref
,
0.0
);
CHECK_GT
(
avg_acc_quant
,
0.0
);
if
(
FLAGS_enable_fp32
)
CHECK_GT
(
avg_acc_ref
,
0.0
);
CHECK_LE
(
avg_acc_ref
-
avg_acc_quant
,
FLAGS_quantized_accuracy
);
if
(
FLAGS_enable_int8
)
CHECK_GT
(
avg_acc_quant
,
0.0
);
if
(
FLAGS_enable_fp32
&&
FLAGS_enable_int8
)
CHECK_LE
(
avg_acc_ref
-
avg_acc_quant
,
FLAGS_quantized_accuracy
);
}
}
void
CompareDeterministic
(
void
CompareDeterministic
(
...
@@ -591,18 +613,24 @@ void CompareQuantizedAndAnalysis(
...
@@ -591,18 +613,24 @@ void CompareQuantizedAndAnalysis(
PrintConfig
(
cfg
,
true
);
PrintConfig
(
cfg
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
analysis_outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
analysis_outputs
;
float
sample_latency_fp32
{
-
1
};
float
sample_latency_fp32
{
-
1
};
TestOneThreadPrediction
(
cfg
,
inputs
,
&
analysis_outputs
,
true
,
VarType
::
FP32
,
&
sample_latency_fp32
);
if
(
FLAGS_enable_fp32
)
{
TestOneThreadPrediction
(
cfg
,
inputs
,
&
analysis_outputs
,
true
,
VarType
::
FP32
,
&
sample_latency_fp32
);
}
LOG
(
INFO
)
<<
"--- INT8 prediction start ---"
;
LOG
(
INFO
)
<<
"--- INT8 prediction start ---"
;
auto
*
qcfg
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
qconfig
);
auto
*
qcfg
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
qconfig
);
PrintConfig
(
qcfg
,
true
);
PrintConfig
(
qcfg
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
quantized_outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
quantized_outputs
;
float
sample_latency_int8
{
-
1
};
float
sample_latency_int8
{
-
1
};
TestOneThreadPrediction
(
qcfg
,
inputs
,
&
quantized_outputs
,
true
,
VarType
::
INT8
,
&
sample_latency_int8
);
if
(
FLAGS_enable_int8
)
{
TestOneThreadPrediction
(
qcfg
,
inputs
,
&
quantized_outputs
,
true
,
VarType
::
INT8
,
&
sample_latency_int8
);
}
SummarizePerformance
(
sample_latency_fp32
,
sample_latency_int8
);
SummarizePerformance
(
sample_latency_fp32
,
sample_latency_int8
);
CompareAccuracy
(
quantized_outputs
,
analysis_outputs
,
compared_idx
);
CompareAccuracy
(
quantized_outputs
,
analysis_outputs
,
compared_idx
);
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录