Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
ca5642c8
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ca5642c8
编写于
6月 16, 2019
作者:
W
Wojciech Uss
提交者:
Tao Luo
6月 16, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
unify FP32 vs. INT8 comparison tests output (#18111)
test=develop
上级
c26130f3
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
91 addition
and
46 deletion
+91
-46
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+46
-13
python/paddle/fluid/contrib/slim/tests/qat_int8_comparison.py
...on/paddle/fluid/contrib/slim/tests/qat_int8_comparison.py
+32
-24
python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py
...trib/slim/tests/test_mkldnn_int8_quantization_strategy.py
+13
-9
未找到文件。
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
ca5642c8
...
...
@@ -320,7 +320,8 @@ void PredictionRun(PaddlePredictor *predictor,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
outputs
,
int
num_threads
,
int
tid
,
const
VarType
::
Type
data_type
=
VarType
::
FP32
)
{
const
VarType
::
Type
data_type
=
VarType
::
FP32
,
float
*
sample_latency
=
nullptr
)
{
int
num_times
=
FLAGS_repeat
;
int
iterations
=
inputs
.
size
();
// process the whole dataset ...
if
(
FLAGS_iterations
>
0
&&
...
...
@@ -360,6 +361,10 @@ void PredictionRun(PaddlePredictor *predictor,
auto
batch_latency
=
elapsed_time
/
(
iterations
*
num_times
);
PrintTime
(
FLAGS_batch_size
,
num_times
,
num_threads
,
tid
,
batch_latency
,
iterations
,
data_type
);
if
(
sample_latency
!=
nullptr
)
*
sample_latency
=
batch_latency
/
FLAGS_batch_size
;
if
(
FLAGS_record_benchmark
)
{
Benchmark
benchmark
;
benchmark
.
SetName
(
FLAGS_model_name
);
...
...
@@ -373,12 +378,14 @@ void TestOneThreadPrediction(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
outputs
,
bool
use_analysis
=
true
,
const
VarType
::
Type
data_type
=
VarType
::
FP32
)
{
const
VarType
::
Type
data_type
=
VarType
::
FP32
,
float
*
sample_latency
=
nullptr
)
{
auto
predictor
=
CreateTestPredictor
(
config
,
use_analysis
);
if
(
FLAGS_warmup
)
{
PredictionWarmUp
(
predictor
.
get
(),
inputs
,
outputs
,
1
,
0
,
data_type
);
}
PredictionRun
(
predictor
.
get
(),
inputs
,
outputs
,
1
,
0
,
data_type
);
PredictionRun
(
predictor
.
get
(),
inputs
,
outputs
,
1
,
0
,
data_type
,
sample_latency
);
}
void
TestMultiThreadPrediction
(
...
...
@@ -430,6 +437,31 @@ void TestPrediction(const PaddlePredictor::Config *config,
}
}
void
SummarizeAccuracy
(
float
avg_acc1_fp32
,
float
avg_acc1_int8
)
{
LOG
(
INFO
)
<<
"--- Accuracy summary --- "
;
LOG
(
INFO
)
<<
"Accepted top1 accuracy drop threshold: "
<<
FLAGS_quantized_accuracy
<<
". (condition: (FP32_top1_acc - INT8_top1_acc) <= threshold)"
;
LOG
(
INFO
)
<<
"FP32: avg top1 accuracy: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
avg_acc1_fp32
;
LOG
(
INFO
)
<<
"INT8: avg top1 accuracy: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
avg_acc1_int8
;
}
void
SummarizePerformance
(
float
sample_latency_fp32
,
float
sample_latency_int8
)
{
// sample latency in ms
auto
throughput_fp32
=
1000.0
/
sample_latency_fp32
;
auto
throughput_int8
=
1000.0
/
sample_latency_int8
;
LOG
(
INFO
)
<<
"--- Performance summary --- "
;
LOG
(
INFO
)
<<
"FP32: avg fps: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
throughput_fp32
<<
", avg latency: "
<<
sample_latency_fp32
<<
" ms"
;
LOG
(
INFO
)
<<
"INT8: avg fps: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
throughput_int8
<<
", avg latency: "
<<
sample_latency_int8
<<
" ms"
;
}
void
CompareTopAccuracy
(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots_quant
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots_ref
)
{
...
...
@@ -459,12 +491,10 @@ void CompareTopAccuracy(
float
avg_acc1_quant
=
total_accs1_quant
/
output_slots_quant
.
size
();
float
avg_acc1_ref
=
total_accs1_ref
/
output_slots_ref
.
size
();
LOG
(
INFO
)
<<
"Avg top1 INT8 accuracy: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
avg_acc1_quant
;
LOG
(
INFO
)
<<
"Avg top1 FP32 accuracy: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
avg_acc1_ref
;
LOG
(
INFO
)
<<
"Accepted accuracy drop threshold: "
<<
FLAGS_quantized_accuracy
;
CHECK_LE
(
std
::
abs
(
avg_acc1_quant
-
avg_acc1_ref
),
FLAGS_quantized_accuracy
);
SummarizeAccuracy
(
avg_acc1_ref
,
avg_acc1_quant
);
CHECK_GT
(
avg_acc1_ref
,
0.0
);
CHECK_GT
(
avg_acc1_quant
,
0.0
);
CHECK_LE
(
avg_acc1_ref
-
avg_acc1_quant
,
FLAGS_quantized_accuracy
);
}
void
CompareDeterministic
(
...
...
@@ -510,16 +540,19 @@ void CompareQuantizedAndAnalysis(
auto
*
cfg
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
config
);
PrintConfig
(
cfg
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
analysis_outputs
;
TestOneThreadPrediction
(
cfg
,
inputs
,
&
analysis_outputs
,
true
,
VarType
::
FP32
);
float
sample_latency_fp32
{
-
1
};
TestOneThreadPrediction
(
cfg
,
inputs
,
&
analysis_outputs
,
true
,
VarType
::
FP32
,
&
sample_latency_fp32
);
LOG
(
INFO
)
<<
"--- INT8 prediction start ---"
;
auto
*
qcfg
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
qconfig
);
PrintConfig
(
qcfg
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
quantized_outputs
;
TestOneThreadPrediction
(
qcfg
,
inputs
,
&
quantized_outputs
,
true
,
VarType
::
INT8
);
float
sample_latency_int8
{
-
1
};
TestOneThreadPrediction
(
qcfg
,
inputs
,
&
quantized_outputs
,
true
,
VarType
::
INT8
,
&
sample_latency_int8
);
LOG
(
INFO
)
<<
"--- comparing outputs --- "
;
SummarizePerformance
(
sample_latency_fp32
,
sample_latency_int8
)
;
CompareTopAccuracy
(
quantized_outputs
,
analysis_outputs
);
}
...
...
python/paddle/fluid/contrib/slim/tests/qat_int8_comparison.py
浏览文件 @
ca5642c8
...
...
@@ -83,8 +83,8 @@ class TestQatInt8Comparison(unittest.TestCase):
while
step
<
num
:
fp
.
seek
(
imgs_offset
+
img_size
*
step
)
img
=
fp
.
read
(
img_size
)
img
=
struct
.
unpack_from
(
'{}f'
.
format
(
img_ch
*
img_w
*
img_h
),
img
)
img
=
struct
.
unpack_from
(
'{}f'
.
format
(
img_ch
*
img_w
*
img_h
),
img
)
img
=
np
.
array
(
img
)
img
.
shape
=
(
img_ch
,
img_w
,
img_h
)
fp
.
seek
(
labels_offset
+
label_size
*
step
)
...
...
@@ -147,6 +147,7 @@ class TestQatInt8Comparison(unittest.TestCase):
def
_predict
(
self
,
test_reader
=
None
,
model_path
=
None
,
batch_size
=
1
,
batch_num
=
1
,
skip_batch_num
=
0
,
transform_to_int8
=
False
):
...
...
@@ -199,7 +200,7 @@ class TestQatInt8Comparison(unittest.TestCase):
out
=
exe
.
run
(
inference_program
,
feed
=
{
feed_target_names
[
0
]:
images
},
fetch_list
=
fetch_targets
)
batch_time
=
time
.
time
()
-
start
batch_time
=
(
time
.
time
()
-
start
)
*
1000
# in miliseconds
outputs
.
append
(
out
[
0
])
batch_acc1
,
batch_acc5
=
self
.
_get_batch_accuracy
(
out
[
0
],
labels
)
...
...
@@ -212,14 +213,15 @@ class TestQatInt8Comparison(unittest.TestCase):
fpses
.
append
(
fps
)
iters
+=
1
appx
=
' (warm-up)'
if
iters
<=
skip_batch_num
else
''
_logger
.
info
(
'batch {0}{5}, acc1: {1:.4f}, acc5: {2:.4f}, '
'batch latency: {3:.4f} s, batch fps: {4:.2f}'
.
format
(
iters
,
batch_acc1
,
batch_acc5
,
batch_tim
e
,
fps
,
appx
))
_logger
.
info
(
'batch {0}{5}, acc1: {1:.4f}, acc5: {2:.4f}, '
'latency: {3:.4f} ms, fps: {4:.2f}'
.
format
(
iters
,
batch_acc1
,
batch_acc5
,
batch_time
/
batch_siz
e
,
fps
,
appx
))
# Postprocess benchmark data
latencies
=
batch_times
[
skip_batch_num
:]
latency_avg
=
np
.
average
(
latencies
)
batch_latencies
=
batch_times
[
skip_batch_num
:]
batch_latency_avg
=
np
.
average
(
batch_latencies
)
latency_avg
=
batch_latency_avg
/
batch_size
fpses
=
fpses
[
skip_batch_num
:]
fps_avg
=
np
.
average
(
fpses
)
infer_total_time
=
time
.
time
()
-
infer_start_time
...
...
@@ -230,13 +232,25 @@ class TestQatInt8Comparison(unittest.TestCase):
return
outputs
,
acc1_avg
,
acc5_avg
,
fps_avg
,
latency_avg
def
_summarize_performance
(
self
,
fp32_fps
,
fp32_lat
,
int8_fps
,
int8_lat
):
_logger
.
info
(
'--- Performance summary ---'
)
_logger
.
info
(
'FP32: avg fps: {0:.2f}, avg latency: {1:.4f} ms'
.
format
(
fp32_fps
,
fp32_lat
))
_logger
.
info
(
'INT8: avg fps: {0:.2f}, avg latency: {1:.4f} ms'
.
format
(
int8_fps
,
int8_lat
))
def
_compare_accuracy
(
self
,
fp32_acc1
,
fp32_acc5
,
int8_acc1
,
int8_acc5
,
threshold
):
_logger
.
info
(
'Accepted acc1 diff threshold: {0}'
.
format
(
threshold
))
_logger
.
info
(
'FP32: avg acc1: {0:.4f}, avg acc5: {1:.4f}'
.
format
(
fp32_acc1
,
fp32_acc5
))
_logger
.
info
(
'INT8: avg acc1: {0:.4f}, avg acc5: {1:.4f}'
.
format
(
int8_acc1
,
int8_acc5
))
_logger
.
info
(
'--- Accuracy summary ---'
)
_logger
.
info
(
'Accepted top1 accuracy drop threshold: {0}. (condition: (FP32_top1_acc - IN8_top1_acc) <= threshold)'
.
format
(
threshold
))
_logger
.
info
(
'FP32: avg top1 accuracy: {0:.4f}, avg top5 accuracy: {1:.4f}'
.
format
(
fp32_acc1
,
fp32_acc5
))
_logger
.
info
(
'INT8: avg top1 accuracy: {0:.4f}, avg top5 accuracy: {1:.4f}'
.
format
(
int8_acc1
,
int8_acc5
))
assert
fp32_acc1
>
0.0
assert
int8_acc1
>
0.0
assert
fp32_acc1
-
int8_acc1
<=
threshold
...
...
@@ -257,9 +271,7 @@ class TestQatInt8Comparison(unittest.TestCase):
_logger
.
info
(
'Dataset: {0}'
.
format
(
data_path
))
_logger
.
info
(
'Batch size: {0}'
.
format
(
batch_size
))
_logger
.
info
(
'Batch number: {0}'
.
format
(
batch_num
))
_logger
.
info
(
'Accuracy diff threshold: {0}. '
'(condition: (fp32_acc - int8_acc) <= threshold)'
.
format
(
acc_diff_threshold
))
_logger
.
info
(
'Accuracy drop threshold: {0}.'
.
format
(
acc_diff_threshold
))
_logger
.
info
(
'--- QAT FP32 prediction start ---'
)
val_reader
=
paddle
.
batch
(
...
...
@@ -267,6 +279,7 @@ class TestQatInt8Comparison(unittest.TestCase):
fp32_output
,
fp32_acc1
,
fp32_acc5
,
fp32_fps
,
fp32_lat
=
self
.
_predict
(
val_reader
,
qat_model_path
,
batch_size
,
batch_num
,
skip_batch_num
,
transform_to_int8
=
False
)
...
...
@@ -277,17 +290,12 @@ class TestQatInt8Comparison(unittest.TestCase):
int8_output
,
int8_acc1
,
int8_acc5
,
int8_fps
,
int8_lat
=
self
.
_predict
(
val_reader
,
qat_model_path
,
batch_size
,
batch_num
,
skip_batch_num
,
transform_to_int8
=
True
)
_logger
.
info
(
'--- Performance summary ---'
)
_logger
.
info
(
'FP32: avg fps: {0:.2f}, avg latency: {1:.4f} s'
.
format
(
fp32_fps
,
fp32_lat
))
_logger
.
info
(
'INT8: avg fps: {0:.2f}, avg latency: {1:.4f} s'
.
format
(
int8_fps
,
int8_lat
))
_logger
.
info
(
'--- Comparing accuracy ---'
)
self
.
_summarize_performance
(
fp32_fps
,
fp32_lat
,
int8_fps
,
int8_lat
)
self
.
_compare_accuracy
(
fp32_acc1
,
fp32_acc5
,
int8_acc1
,
int8_acc5
,
acc_diff_threshold
)
...
...
python/paddle/fluid/contrib/slim/tests/test_mkldnn_int8_quantization_strategy.py
浏览文件 @
ca5642c8
...
...
@@ -172,6 +172,17 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
com_pass
.
config
(
config_path
)
com_pass
.
run
()
def
_compare_accuracy
(
self
,
fp32_acc1
,
int8_acc1
,
threshold
):
_logger
.
info
(
'--- Accuracy summary ---'
)
_logger
.
info
(
'Accepted top1 accuracy drop threshold: {0}. (condition: (FP32_top1_acc - IN8_top1_acc) <= threshold)'
.
format
(
threshold
))
_logger
.
info
(
'FP32: avg top1 accuracy: {0:.4f}'
.
format
(
fp32_acc1
))
_logger
.
info
(
'INT8: avg top1 accuracy: {0:.4f}'
.
format
(
int8_acc1
))
assert
fp32_acc1
>
0.0
assert
int8_acc1
>
0.0
assert
fp32_acc1
-
int8_acc1
<=
threshold
def
test_compression
(
self
):
if
not
fluid
.
core
.
is_compiled_with_mkldnn
():
return
...
...
@@ -204,15 +215,8 @@ class TestMKLDNNPostTrainingQuantStrategy(unittest.TestCase):
self
.
_reader_creator
(
data_path
,
False
),
batch_size
=
batch_size
)
fp32_model_result
=
self
.
_predict
(
val_reader
,
fp32_model_path
)
_logger
.
info
(
'--- comparing outputs ---'
)
_logger
.
info
(
'Avg top1 INT8 accuracy: {0:.4f}'
.
format
(
int8_model_result
[
0
]))
_logger
.
info
(
'Avg top1 FP32 accuracy: {0:.4f}'
.
format
(
fp32_model_result
[
0
]))
_logger
.
info
(
'Accepted accuracy drop threshold: {0}'
.
format
(
accuracy_diff_threshold
))
assert
fp32_model_result
[
0
]
-
int8_model_result
[
0
]
<=
accuracy_diff_threshold
self
.
_compare_accuracy
(
fp32_model_result
[
0
],
int8_model_result
[
0
],
accuracy_diff_threshold
)
if
__name__
==
'__main__'
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录