Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
06545fcf
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
06545fcf
编写于
11月 29, 2019
作者:
W
Wojciech Uss
提交者:
Tao Luo
11月 29, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fp32 vs int8 qat C++ performance (#21244) (#21432)
上级
072eb5b6
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
399 addition
and
41 deletion
+399
-41
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+39
-4
paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
...ce/tests/api/analyzer_int8_image_classification_tester.cc
+16
-13
paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc
...erence/tests/api/analyzer_int8_object_detection_tester.cc
+4
-4
paddle/fluid/inference/tests/api/analyzer_qat_image_classification_tester.cc
...nce/tests/api/analyzer_qat_image_classification_tester.cc
+129
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+100
-20
python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py
...uid/contrib/slim/quantization/quantization_mkldnn_pass.py
+11
-0
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+13
-0
python/paddle/fluid/contrib/slim/tests/save_qat_model.py
python/paddle/fluid/contrib/slim/tests/save_qat_model.py
+87
-0
未找到文件。
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
06545fcf
...
...
@@ -16,6 +16,12 @@ function(download_int8_data install_dir data_file)
endif
()
endfunction
()
function
(
download_qat_data install_dir data_file
)
if
(
NOT EXISTS
${
install_dir
}
/
${
data_file
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
INFERENCE_URL
}
/int8/QAT_models
${
data_file
}
)
endif
()
endfunction
()
function
(
download_model_and_data install_dir model_name data_name
)
download_data
(
${
install_dir
}
${
model_name
}
)
download_data
(
${
install_dir
}
${
data_name
}
)
...
...
@@ -27,7 +33,7 @@ function(inference_analysis_api_test target install_dir filename)
ARGS --infer_model=
${
install_dir
}
/model --infer_data=
${
install_dir
}
/data.txt
)
endfunction
()
function
(
inference_analysis_api_
int8_
test_build TARGET_NAME filename
)
function
(
inference_analysis_api_test_build TARGET_NAME filename
)
inference_analysis_test_build
(
${
TARGET_NAME
}
SRCS
${
filename
}
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
benchmark
)
endfunction
()
...
...
@@ -73,6 +79,18 @@ function(inference_analysis_api_test_with_refer_result target install_dir filena
--refer_result=
${
install_dir
}
/result.txt
)
endfunction
()
function
(
inference_analysis_api_qat_test_run TARGET_NAME test_binary fp32_model_dir int8_model_dir data_path
)
inference_analysis_test_run
(
${
TARGET_NAME
}
COMMAND
${
test_binary
}
ARGS --fp32_model=
${
fp32_model_dir
}
--int8_model=
${
int8_model_dir
}
--infer_data=
${
data_path
}
--batch_size=50
--paddle_num_threads=
${
CPU_NUM_THREADS_ON_CI
}
--with_accuracy_layer=false
--iterations=2
)
endfunction
()
if
(
NOT APPLE AND WITH_MKLML
)
# RNN1
set
(
RNN1_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/rnn1"
)
...
...
@@ -186,9 +204,10 @@ download_data(${MOBILENET_MODEL_DIR} "mobilenet_model.tar.gz")
inference_analysis_api_test_with_fake_data_run
(
test_analyzer_mobilenet_depthwise_conv
${
IMG_CLASS_TEST_APP
}
${
MOBILENET_MODEL_DIR
}
false
)
### INT8 tests
if
(
WITH_MKLDNN
)
### INT8 tests
set
(
INT8_DATA_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/int8v2"
)
### Image classification tests
...
...
@@ -200,7 +219,7 @@ if(WITH_MKLDNN)
download_int8_data
(
${
INT8_DATA_DIR
}
"imagenet_val_100_tail.tar.gz"
)
# build test binary to be used in subsequent tests
inference_analysis_api_
int8_
test_build
(
${
INT8_IMG_CLASS_TEST_APP
}
${
INT8_IMG_CLASS_TEST_APP_SRC
}
)
inference_analysis_api_test_build
(
${
INT8_IMG_CLASS_TEST_APP
}
${
INT8_IMG_CLASS_TEST_APP_SRC
}
)
# resnet50 int8
set
(
INT8_RESNET50_MODEL_DIR
"
${
INT8_DATA_DIR
}
/resnet50"
)
...
...
@@ -249,13 +268,29 @@ if(WITH_MKLDNN)
download_int8_data
(
${
INT8_DATA_DIR
}
"pascalvoc_small.tar.gz"
)
# build test binary to be used in subsequent tests
inference_analysis_api_
int8_
test_build
(
${
INT8_OBJ_DETECT_TEST_APP
}
${
INT8_OBJ_DETECT_TEST_APP_SRC
}
)
inference_analysis_api_test_build
(
${
INT8_OBJ_DETECT_TEST_APP
}
${
INT8_OBJ_DETECT_TEST_APP_SRC
}
)
# mobilenet-ssd int8
set
(
INT8_MOBILENET_SSD_MODEL_DIR
"
${
INT8_DATA_DIR
}
/mobilenet-ssd"
)
download_int8_data
(
${
INT8_MOBILENET_SSD_MODEL_DIR
}
"mobilenet_ssd_int8_model.tar.gz"
)
inference_analysis_api_object_dection_int8_test_run
(
test_analyzer_int8_mobilenet_ssd
${
INT8_OBJ_DETECT_TEST_APP
}
${
INT8_MOBILENET_SSD_MODEL_DIR
}
${
PASCALVOC_DATA_PATH
}
)
### optimized FP32 vs. QAT INT8 tests
set
(
QAT_DATA_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/int8v2"
)
set
(
QAT_IMG_CLASS_TEST_APP
"test_analyzer_qat_image_classification"
)
set
(
QAT_IMG_CLASS_TEST_APP_SRC
"analyzer_qat_image_classification_tester.cc"
)
# build test binary to be used in subsequent tests
inference_analysis_api_test_build
(
${
QAT_IMG_CLASS_TEST_APP
}
${
QAT_IMG_CLASS_TEST_APP_SRC
}
)
# ResNet50 FP32 vs. QAT INT8
set
(
QAT2_RESNET50_MODEL_DIR
"
${
QAT_DATA_DIR
}
/ResNet50_qat_perf"
)
download_qat_data
(
${
QAT2_RESNET50_MODEL_DIR
}
"ResNet50_qat_perf.tar.gz"
)
set
(
QAT2_INT8_RESNET50_MODEL_DIR
"
${
QAT_DATA_DIR
}
/ResNet50_qat_perf_int8"
)
download_qat_data
(
${
QAT2_INT8_RESNET50_MODEL_DIR
}
"ResNet50_qat_perf_int8.tar.gz"
)
inference_analysis_api_qat_test_run
(
test_analyzer_qat_performance_benchmark
${
QAT_IMG_CLASS_TEST_APP
}
${
QAT2_RESNET50_MODEL_DIR
}
/ResNet50_qat_perf/float
${
QAT2_INT8_RESNET50_MODEL_DIR
}
/ResNet50_qat_perf_int8
${
IMAGENET_DATA_PATH
}
)
endif
()
# bert, max_len=20, embedding_dim=128
...
...
paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
浏览文件 @
06545fcf
...
...
@@ -35,8 +35,8 @@ class TensorReader {
public:
TensorReader
(
std
::
ifstream
&
file
,
size_t
beginning_offset
,
std
::
vector
<
int
>
shape
,
std
::
string
name
)
:
file_
(
file
),
position
(
beginning_offset
),
shape_
(
shape
),
name_
(
name
)
{
numel
=
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
size_t
{
1
},
:
file_
(
file
),
position
_
(
beginning_offset
),
shape_
(
shape
),
name_
(
name
)
{
numel
_
=
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
size_t
{
1
},
std
::
multiplies
<
size_t
>
());
}
...
...
@@ -45,11 +45,11 @@ class TensorReader {
tensor
.
name
=
name_
;
tensor
.
shape
=
shape_
;
tensor
.
dtype
=
GetPaddleDType
<
T
>
();
tensor
.
data
.
Resize
(
numel
*
sizeof
(
T
));
tensor
.
data
.
Resize
(
numel
_
*
sizeof
(
T
));
file_
.
seekg
(
position
);
file_
.
read
(
static_cast
<
char
*>
(
tensor
.
data
.
data
()),
numel
*
sizeof
(
T
));
position
=
file_
.
tellg
();
file_
.
seekg
(
position
_
);
file_
.
read
(
static_cast
<
char
*>
(
tensor
.
data
.
data
()),
numel
_
*
sizeof
(
T
));
position
_
=
file_
.
tellg
();
if
(
file_
.
eof
())
LOG
(
ERROR
)
<<
name_
<<
": reached end of stream"
;
if
(
file_
.
fail
())
...
...
@@ -60,10 +60,10 @@ class TensorReader {
protected:
std
::
ifstream
&
file_
;
size_t
position
;
size_t
position
_
;
std
::
vector
<
int
>
shape_
;
std
::
string
name_
;
size_t
numel
;
size_t
numel
_
;
};
std
::
shared_ptr
<
std
::
vector
<
PaddleTensor
>>
GetWarmupData
(
...
...
@@ -71,10 +71,13 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
int
num_images
=
FLAGS_warmup_batch_size
)
{
int
test_data_batch_size
=
test_data
[
0
][
0
].
shape
[
0
];
auto
iterations
=
test_data
.
size
();
PADDLE_ENFORCE
(
static_cast
<
size_t
>
(
num_images
)
<=
iterations
*
test_data_batch_size
,
"The requested quantization warmup data size "
+
std
::
to_string
(
num_images
)
+
" is bigger than all test data size."
);
auto
all_test_data_size
=
iterations
*
test_data_batch_size
;
PADDLE_ENFORCE_LE
(
static_cast
<
size_t
>
(
num_images
),
all_test_data_size
,
platform
::
errors
::
InvalidArgument
(
"The requested quantization warmup data size must be "
"smaller than the test data size. But received warmup "
"size is %d and test data size is %d"
,
num_images
,
all_test_data_size
));
PaddleTensor
images
;
images
.
name
=
"image"
;
...
...
paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc
浏览文件 @
06545fcf
...
...
@@ -50,7 +50,7 @@ template <typename T>
class
TensorReader
{
public:
TensorReader
(
std
::
ifstream
&
file
,
size_t
beginning_offset
,
std
::
string
name
)
:
file_
(
file
),
position
(
beginning_offset
),
name_
(
name
)
{}
:
file_
(
file
),
position
_
(
beginning_offset
),
name_
(
name
)
{}
PaddleTensor
NextBatch
(
std
::
vector
<
int
>
shape
,
std
::
vector
<
size_t
>
lod
)
{
int
numel
=
...
...
@@ -64,9 +64,9 @@ class TensorReader {
tensor
.
lod
.
clear
();
tensor
.
lod
.
push_back
(
lod
);
}
file_
.
seekg
(
position
);
file_
.
seekg
(
position
_
);
file_
.
read
(
reinterpret_cast
<
char
*>
(
tensor
.
data
.
data
()),
numel
*
sizeof
(
T
));
position
=
file_
.
tellg
();
position
_
=
file_
.
tellg
();
if
(
file_
.
eof
())
LOG
(
ERROR
)
<<
name_
<<
": reached end of stream"
;
if
(
file_
.
fail
())
throw
std
::
runtime_error
(
name_
+
": failed reading file."
);
...
...
@@ -75,7 +75,7 @@ class TensorReader {
protected:
std
::
ifstream
&
file_
;
size_t
position
;
size_t
position
_
;
std
::
string
name_
;
};
...
...
paddle/fluid/inference/tests/api/analyzer_qat_image_classification_tester.cc
0 → 100644
浏览文件 @
06545fcf
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
SetConfig
(
AnalysisConfig
*
cfg
,
std
::
string
model_path
)
{
cfg
->
SetModel
(
model_path
);
cfg
->
DisableGpu
();
cfg
->
SwitchIrOptim
(
false
);
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
cfg
->
EnableMKLDNN
();
}
template
<
typename
T
>
class
TensorReader
{
public:
TensorReader
(
std
::
ifstream
&
file
,
size_t
beginning_offset
,
std
::
vector
<
int
>
shape
,
std
::
string
name
)
:
file_
(
file
),
position_
(
beginning_offset
),
shape_
(
shape
),
name_
(
name
)
{
numel_
=
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
size_t
{
1
},
std
::
multiplies
<
size_t
>
());
}
PaddleTensor
NextBatch
()
{
PaddleTensor
tensor
;
tensor
.
name
=
name_
;
tensor
.
shape
=
shape_
;
tensor
.
dtype
=
GetPaddleDType
<
T
>
();
tensor
.
data
.
Resize
(
numel_
*
sizeof
(
T
));
file_
.
seekg
(
position_
);
file_
.
read
(
static_cast
<
char
*>
(
tensor
.
data
.
data
()),
numel_
*
sizeof
(
T
));
position_
=
file_
.
tellg
();
if
(
file_
.
eof
())
LOG
(
ERROR
)
<<
name_
<<
": reached end of stream"
;
if
(
file_
.
fail
())
throw
std
::
runtime_error
(
name_
+
": failed reading file."
);
return
tensor
;
}
protected:
std
::
ifstream
&
file_
;
size_t
position_
;
std
::
vector
<
int
>
shape_
;
std
::
string
name_
;
size_t
numel_
;
};
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
bool
with_accuracy_layer
=
FLAGS_with_accuracy_layer
,
int32_t
batch_size
=
FLAGS_batch_size
)
{
std
::
ifstream
file
(
FLAGS_infer_data
,
std
::
ios
::
binary
);
if
(
!
file
)
{
FAIL
()
<<
"Couldn't open file: "
<<
FLAGS_infer_data
;
}
int64_t
total_images
{
0
};
file
.
read
(
reinterpret_cast
<
char
*>
(
&
total_images
),
sizeof
(
total_images
));
LOG
(
INFO
)
<<
"Total images in file: "
<<
total_images
;
std
::
vector
<
int
>
image_batch_shape
{
batch_size
,
3
,
224
,
224
};
std
::
vector
<
int
>
label_batch_shape
{
batch_size
,
1
};
auto
images_offset_in_file
=
static_cast
<
size_t
>
(
file
.
tellg
());
TensorReader
<
float
>
image_reader
(
file
,
images_offset_in_file
,
image_batch_shape
,
"image"
);
auto
iterations_max
=
total_images
/
batch_size
;
auto
iterations
=
iterations_max
;
if
(
FLAGS_iterations
>
0
&&
FLAGS_iterations
<
iterations_max
)
{
iterations
=
FLAGS_iterations
;
}
auto
labels_offset_in_file
=
images_offset_in_file
+
sizeof
(
float
)
*
total_images
*
3
*
224
*
224
;
TensorReader
<
int64_t
>
label_reader
(
file
,
labels_offset_in_file
,
label_batch_shape
,
"label"
);
for
(
auto
i
=
0
;
i
<
iterations
;
i
++
)
{
auto
images
=
image_reader
.
NextBatch
();
std
::
vector
<
PaddleTensor
>
tmp_vec
;
tmp_vec
.
push_back
(
std
::
move
(
images
));
if
(
with_accuracy_layer
)
{
auto
labels
=
label_reader
.
NextBatch
();
tmp_vec
.
push_back
(
std
::
move
(
labels
));
}
inputs
->
push_back
(
std
::
move
(
tmp_vec
));
}
}
TEST
(
Analyzer_qat_image_classification
,
quantization
)
{
AnalysisConfig
fp32_cfg
;
SetConfig
(
&
fp32_cfg
,
FLAGS_fp32_model
);
AnalysisConfig
int8_cfg
;
SetConfig
(
&
int8_cfg
,
FLAGS_int8_model
);
// read data from file and prepare batches with test data
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
// 0 is avg_cost, 1 is top1_accuracy, 2 is top5_accuracy or mAP
CompareAnalysisAndAnalysis
(
&
fp32_cfg
,
&
int8_cfg
,
input_slots_all
,
FLAGS_with_accuracy_layer
,
1
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
06545fcf
...
...
@@ -39,9 +39,13 @@
DEFINE_string
(
model_name
,
""
,
"model name"
);
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
fp32_model
,
""
,
"FP32 model path"
);
DEFINE_string
(
int8_model
,
""
,
"INT8 model path"
);
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_string
(
refer_result
,
""
,
"reference result for comparison"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size"
);
DEFINE_bool
(
with_accuracy_layer
,
true
,
"Calculate the accuracy while label is in the input"
);
DEFINE_bool
(
enable_fp32
,
true
,
"Enable FP32 type prediction"
);
DEFINE_bool
(
enable_int8
,
true
,
"Enable INT8 type prediction"
);
DEFINE_int32
(
warmup_batch_size
,
100
,
"batch size for quantization warmup"
);
...
...
@@ -238,7 +242,11 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
const
std
::
vector
<
std
::
string
>
*
feed_names
=
nullptr
,
const
int
continuous_inuput_index
=
0
)
{
// Set fake_image_data
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
"Only have single batch of data."
);
PADDLE_ENFORCE_EQ
(
FLAGS_test_all_data
,
0
,
platform
::
errors
::
InvalidArgument
(
"In SetFakeImageInput, expected test_all_data = false, "
"but now test_all_data="
,
FLAGS_test_all_data
));
std
::
vector
<
std
::
vector
<
int64_t
>>
feed_target_shapes
=
GetFeedTargetShapes
(
dirname
,
is_combined
,
model_filename
,
params_filename
);
std
::
ostringstream
os
;
...
...
@@ -251,7 +259,13 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
}
LOG
(
INFO
)
<<
os
.
str
();
if
(
feed_names
)
{
PADDLE_ENFORCE_EQ
(
feed_names
->
size
(),
feed_target_shapes
.
size
());
PADDLE_ENFORCE_EQ
(
feed_names
->
size
(),
feed_target_shapes
.
size
(),
platform
::
errors
::
InvalidArgument
(
"The size of feeds_names and size of "
"feed_target_shapes must be equal, but now feeds_names "
"size is %d and feed_target_shapes size is %d"
,
feed_names
->
size
(),
feed_target_shapes
.
size
()));
}
std
::
vector
<
PaddleTensor
>
input_slots
(
feed_target_shapes
.
size
());
for
(
size_t
i
=
0
;
i
<
feed_target_shapes
.
size
();
++
i
)
{
...
...
@@ -466,12 +480,20 @@ void TestPrediction(const PaddlePredictor::Config *config,
void
SummarizeAccuracy
(
float
avg_acc_fp32
,
float
avg_acc_int8
,
int
compared_idx
)
{
PADDLE_ENFORCE_LE
(
compared_idx
,
2
,
"Compare either top1 accuracy or mAP (top5), the "
"compared_idx is out of range"
);
PADDLE_ENFORCE_GE
(
compared_idx
,
1
,
"Compare either top1 accuracy or mAP (top5), the "
"compared_idx is out of range"
);
PADDLE_ENFORCE_LE
(
compared_idx
,
2
,
platform
::
errors
::
InvalidArgument
(
"The compared_idx should be <= 2. But received compared_idx = %d. "
"For top1 accuracy, set compared_idx = 1; For top5 accuracy or mean "
"Average Precision (mAP), set compared_idx = 2."
,
compared_idx
));
PADDLE_ENFORCE_GE
(
compared_idx
,
1
,
platform
::
errors
::
InvalidArgument
(
"The compared_idx should be >= 1. But received compared_idx = %d. "
"For top1 accuracy, set compared_idx = 1; For top5 accuracy or mean "
"Average Precision (mAP), set compared_idx = 2."
,
compared_idx
));
std
::
string
prefix
=
(
compared_idx
==
1
)
?
"top1_accuracy "
:
"mAP "
;
LOG
(
INFO
)
<<
"--- Accuracy summary --- "
;
LOG
(
INFO
)
<<
"Accepted "
<<
prefix
...
...
@@ -501,9 +523,10 @@ void SummarizePerformance(float sample_latency_fp32,
float
CompareAccuracyOne
(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots
,
int
compared_idx
)
{
if
(
output_slots
.
size
()
==
0
)
throw
std
::
invalid_argument
(
"CompareAccuracy: output_slots vector is empty."
);
PADDLE_ENFORCE_GT
(
output_slots
.
size
(),
0
,
platform
::
errors
::
InvalidArgument
(
"The accuracy vector is empty. The accuracy vector "
"size should be bigger than 0"
));
float
total_accs
{
0
};
...
...
@@ -512,12 +535,19 @@ float CompareAccuracyOne(
case
1
:
PADDLE_ENFORCE_GE
(
output_slots
[
i
].
size
(),
2UL
,
"To achieve top 1 accuracy, output_slots_quant[i].size()>=2"
);
platform
::
errors
::
InvalidArgument
(
"To achieve top 1 accuracy, output_slots size "
"must be bigger than or equal to 2, but now the size is %d"
,
output_slots
[
i
].
size
()));
break
;
case
2
:
PADDLE_ENFORCE_GE
(
output_slots
[
i
].
size
(),
2UL
,
"To achieve top 1 accuracy, output_slots_ref[i].size()>=2"
);
output_slots
[
i
].
size
(),
3UL
,
platform
::
errors
::
InvalidArgument
(
"To achieve top 5 accuracy or mean Average "
"Precision (mAP), output_slots size must be "
"bigger than or equal to 3, but now the size is %d"
,
output_slots
[
i
].
size
()));
break
;
default:
throw
std
::
invalid_argument
(
...
...
@@ -535,8 +565,6 @@ float CompareAccuracyOne(
*
static_cast
<
float
*>
(
output_slots
[
i
][
compared_idx
].
data
.
data
());
}
CHECK_GT
(
output_slots
.
size
(),
0
);
return
total_accs
/
output_slots
.
size
();
}
...
...
@@ -594,8 +622,14 @@ void CompareNativeAndAnalysis(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
native_outputs
,
analysis_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
PADDLE_ENFORCE_GT
(
native_outputs
.
size
(),
0
,
"Native output is empty."
);
PADDLE_ENFORCE_GT
(
analysis_outputs
.
size
(),
0
,
"Analysis output is empty."
);
PADDLE_ENFORCE_GT
(
native_outputs
.
size
(),
0
,
platform
::
errors
::
InvalidArgument
(
"The native outputs is empty. The native outputs "
"vector size must be bigger than 0"
));
PADDLE_ENFORCE_GT
(
analysis_outputs
.
size
(),
0
,
platform
::
errors
::
InvalidArgument
(
"The analysis outputs is empty. The analysis outputs "
"vector size must be bigger than 0"
));
CompareResult
(
analysis_outputs
.
back
(),
native_outputs
.
back
());
}
...
...
@@ -603,8 +637,12 @@ void CompareQuantizedAndAnalysis(
const
AnalysisConfig
*
config
,
const
AnalysisConfig
*
qconfig
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
int
compared_idx
=
1
)
{
PADDLE_ENFORCE_EQ
(
inputs
[
0
][
0
].
shape
[
0
],
FLAGS_batch_size
,
"Input data has to be packed batch by batch."
);
PADDLE_ENFORCE_EQ
(
inputs
[
0
][
0
].
shape
[
0
],
FLAGS_batch_size
,
platform
::
errors
::
InvalidArgument
(
"Input data has to be packed batch by batch. The batchsize is set to "
"%d, but the real input is packed with batchsize = %d"
,
FLAGS_batch_size
,
inputs
[
0
][
0
].
shape
[
0
]));
LOG
(
INFO
)
<<
"FP32 & INT8 prediction run: batch_size "
<<
FLAGS_batch_size
<<
", warmup batch size "
<<
FLAGS_warmup_batch_size
<<
"."
;
...
...
@@ -634,6 +672,48 @@ void CompareQuantizedAndAnalysis(
CompareAccuracy
(
quantized_outputs
,
analysis_outputs
,
compared_idx
);
}
void
CompareAnalysisAndAnalysis
(
const
AnalysisConfig
*
config1
,
const
AnalysisConfig
*
config2
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
bool
with_accuracy_layer
=
FLAGS_with_accuracy_layer
,
const
int
compared_idx
=
1
)
{
PADDLE_ENFORCE_EQ
(
inputs
[
0
][
0
].
shape
[
0
],
FLAGS_batch_size
,
platform
::
errors
::
InvalidArgument
(
"Input data has to be packed batch by batch. The batchsize is set to "
"%d, but the real input is packed with batchsize = %d"
,
FLAGS_batch_size
,
inputs
[
0
][
0
].
shape
[
0
]));
LOG
(
INFO
)
<<
"FP32 & INT8 prediction run: batch_size "
<<
FLAGS_batch_size
<<
", warmup batch size "
<<
FLAGS_warmup_batch_size
<<
"."
;
LOG
(
INFO
)
<<
"--- FP32 prediction start ---"
;
auto
*
cfg1
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
config1
);
PrintConfig
(
cfg1
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
analysis_outputs
;
float
sample_latency_fp32
{
-
1
};
if
(
FLAGS_enable_fp32
)
{
TestOneThreadPrediction
(
cfg1
,
inputs
,
&
analysis_outputs
,
true
,
VarType
::
FP32
,
&
sample_latency_fp32
);
}
LOG
(
INFO
)
<<
"--- INT8 prediction start ---"
;
auto
*
cfg2
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
config2
);
PrintConfig
(
cfg2
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
int8_outputs
;
float
sample_latency_int8
{
-
1
};
if
(
FLAGS_enable_int8
)
{
TestOneThreadPrediction
(
cfg2
,
inputs
,
&
int8_outputs
,
true
,
VarType
::
INT8
,
&
sample_latency_int8
);
}
SummarizePerformance
(
sample_latency_fp32
,
sample_latency_int8
);
if
(
with_accuracy_layer
)
{
CompareAccuracy
(
int8_outputs
,
analysis_outputs
,
compared_idx
);
}
}
void
CompareNativeAndAnalysis
(
PaddlePredictor
*
native_pred
,
PaddlePredictor
*
analysis_pred
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
...
...
python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py
浏览文件 @
06545fcf
...
...
@@ -330,6 +330,17 @@ class FakeQAT2MkldnnINT8PerfPass(object):
graph
=
self
.
_remove_unused_var_nodes
(
graph
)
return
graph
def
apply_fp32
(
self
,
graph
):
assert
isinstance
(
graph
,
IrGraph
),
'graph must be the instance of IrGraph.'
graph
=
self
.
_gather_scales
(
graph
)
graph
=
self
.
_remove_fake_ops
(
graph
)
graph
=
self
.
_dequantize_weights
(
graph
)
graph
=
self
.
_optimize_fp32_graph
(
graph
)
graph
=
self
.
_remove_unused_var_nodes
(
graph
)
return
graph
def
_convert_scale2tensor
(
self
,
scale
):
tensor
=
core
.
LoDTensor
()
tensor
.
set
(
scale
,
core
.
CPUPlace
())
...
...
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
浏览文件 @
06545fcf
...
...
@@ -45,6 +45,12 @@ function(inference_qat2_int8_test target model_dir data_dir test_script use_mkld
--qat2
)
endfunction
()
function
(
save_qat_model_test target qat_model_dir fp32_model_save_path int8_model_save_path test_script
)
py_test
(
${
target
}
SRCS
${
test_script
}
ARGS --qat_model_path
${
qat_model_dir
}
--fp32_model_save_path
${
fp32_model_save_path
}
--int8_model_save_path
${
int8_model_save_path
}
)
endfunction
()
if
(
WIN32
)
list
(
REMOVE_ITEM TEST_OPS test_light_nas
)
...
...
@@ -169,6 +175,13 @@ if(LINUX AND WITH_MKLDNN)
endif
()
inference_qat2_int8_test
(
test_qat2_int8_mobilenetv1_mkldnn
${
QAT2_MOBILENETV1_MODEL_DIR
}
/MobileNet_qat_perf
${
DATASET_DIR
}
${
MKLDNN_QAT_TEST_FILE_PATH
}
true
)
# Save qat2 fp32 model or qat2 int8 model
set
(
QAT2_INT8_SAVE_PATH
"
${
QAT_DATA_DIR
}
/ResNet50_qat2_int8"
)
set
(
QAT2_FP32_SAVE_PATH
"
${
QAT_DATA_DIR
}
/ResNet50_qat2_fp32"
)
set
(
SAVE_QAT2_MODEL_SCRIPT
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/save_qat_model.py"
)
save_qat_model_test
(
save_qat2_model_resnet50
${
QAT2_RESNET50_MODEL_DIR
}
/ResNet50_qat_perf/float
${
QAT2_FP32_SAVE_PATH
}
${
QAT2_INT8_SAVE_PATH
}
${
SAVE_QAT2_MODEL_SCRIPT
}
true
)
endif
()
# Since the test for QAT FP32 & INT8 comparison supports only testing on Linux
...
...
python/paddle/fluid/contrib/slim/tests/save_qat_model.py
0 → 100644
浏览文件 @
06545fcf
# copyright (c) 2019 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import
unittest
import
os
import
sys
import
argparse
import
logging
import
struct
import
six
import
numpy
as
np
import
time
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.framework
import
IrGraph
from
paddle.fluid.contrib.slim.quantization
import
FakeQAT2MkldnnINT8PerfPass
from
paddle.fluid
import
core
def
parse_args
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'--qat_model_path'
,
type
=
str
,
default
=
''
,
help
=
'A path to a QAT model.'
)
parser
.
add_argument
(
'--fp32_model_save_path'
,
type
=
str
,
default
=
''
,
help
=
'Saved optimized fp32 model'
)
parser
.
add_argument
(
'--int8_model_save_path'
,
type
=
str
,
default
=
''
,
help
=
'Saved optimized and quantized INT8 model'
)
test_args
,
args
=
parser
.
parse_known_args
(
namespace
=
unittest
)
return
test_args
,
sys
.
argv
[:
1
]
+
args
def
transform_and_save_model
(
original_path
,
save_path
,
save_type
):
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
inference_scope
=
fluid
.
executor
.
global_scope
()
with
fluid
.
scope_guard
(
inference_scope
):
if
os
.
path
.
exists
(
os
.
path
.
join
(
original_path
,
'__model__'
)):
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
fluid
.
io
.
load_inference_model
(
original_path
,
exe
)
else
:
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
fluid
.
io
.
load_inference_model
(
original_path
,
exe
,
'model'
,
'params'
)
transform_to_mkldnn_int8_pass
=
FakeQAT2MkldnnINT8PerfPass
(
_scope
=
inference_scope
,
_place
=
place
,
_core
=
core
)
graph
=
IrGraph
(
core
.
Graph
(
inference_program
.
desc
),
for_test
=
True
)
if
save_type
==
'FP32'
:
graph
=
transform_to_mkldnn_int8_pass
.
apply_fp32
(
graph
)
elif
save_type
==
'INT8'
:
graph
=
transform_to_mkldnn_int8_pass
.
apply
(
graph
)
inference_program
=
graph
.
to_program
()
with
fluid
.
scope_guard
(
inference_scope
):
fluid
.
io
.
save_inference_model
(
save_path
,
feed_target_names
,
fetch_targets
,
exe
,
inference_program
)
print
(
"Success! Transformed QAT_{0} model can be found at {1}
\n
"
.
format
(
save_type
,
save_path
))
if
__name__
==
'__main__'
:
global
test_args
test_args
,
remaining_args
=
parse_args
()
if
test_args
.
fp32_model_save_path
:
transform_and_save_model
(
test_args
.
qat_model_path
,
test_args
.
fp32_model_save_path
,
'FP32'
)
if
test_args
.
int8_model_save_path
:
transform_and_save_model
(
test_args
.
qat_model_path
,
test_args
.
int8_model_save_path
,
'INT8'
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录