Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
ddcd1b53
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ddcd1b53
编写于
10月 13, 2020
作者:
J
joanna.wozna.intel
提交者:
GitHub
10月 13, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add bfloat16 resnet50 test (#27755)
上级
6da7a745
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
267 addition
and
125 deletion
+267
-125
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+4
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+5
-0
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+4
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+9
-0
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+26
-0
paddle/fluid/inference/tests/api/analyzer_bfloat16_image_classification_tester.cc
...ests/api/analyzer_bfloat16_image_classification_tester.cc
+49
-0
paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
...ce/tests/api/analyzer_int8_image_classification_tester.cc
+2
-119
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+167
-6
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+1
-0
未找到文件。
paddle/fluid/inference/analysis/argument.h
浏览文件 @
ddcd1b53
...
@@ -178,6 +178,10 @@ struct Argument {
...
@@ -178,6 +178,10 @@ struct Argument {
// Scales for variables to be quantized
// Scales for variables to be quantized
DECL_ARGUMENT_FIELD
(
quant_var_scales
,
QuantVarScales
,
VarQuantScale
);
DECL_ARGUMENT_FIELD
(
quant_var_scales
,
QuantVarScales
,
VarQuantScale
);
// A set of op types to enable their bfloat16 kernels
DECL_ARGUMENT_FIELD
(
bfloat16_enabled_op_types
,
Bfloat16EnabledOpTypes
,
std
::
unordered_set
<
std
::
string
>
);
#endif
#endif
// Passed from config.
// Passed from config.
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
ddcd1b53
...
@@ -125,6 +125,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
...
@@ -125,6 +125,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER
(
use_mkldnn_
);
CP_MEMBER
(
use_mkldnn_
);
CP_MEMBER
(
mkldnn_enabled_op_types_
);
CP_MEMBER
(
mkldnn_enabled_op_types_
);
CP_MEMBER
(
mkldnn_cache_capacity_
);
CP_MEMBER
(
mkldnn_cache_capacity_
);
// Bfloat16 related.
CP_MEMBER
(
use_mkldnn_bfloat16_
);
CP_MEMBER
(
bfloat16_enabled_op_types_
);
// Quantization related.
// Quantization related.
CP_MEMBER
(
use_mkldnn_quantizer_
);
CP_MEMBER
(
use_mkldnn_quantizer_
);
CP_MEMBER
(
mkldnn_quantizer_config_
);
CP_MEMBER
(
mkldnn_quantizer_config_
);
...
@@ -417,6 +420,8 @@ std::string AnalysisConfig::SerializeInfoCache() {
...
@@ -417,6 +420,8 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss
<<
use_mkldnn_quantizer_
;
ss
<<
use_mkldnn_quantizer_
;
ss
<<
use_mkldnn_bfloat16_
;
ss
<<
use_mkldnn_bfloat16_
;
for
(
auto
&
item
:
bfloat16_enabled_op_types_
)
ss
<<
item
;
ss
<<
";"
;
ss
<<
model_from_memory_
;
ss
<<
model_from_memory_
;
ss
<<
with_profile_
;
ss
<<
with_profile_
;
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
ddcd1b53
...
@@ -501,6 +501,10 @@ void AnalysisPredictor::PrepareArgument() {
...
@@ -501,6 +501,10 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetQuantizeExcludedOpIds
(
argument_
.
SetQuantizeExcludedOpIds
(
config_
.
mkldnn_quantizer_config
()
->
excluded_op_ids
());
config_
.
mkldnn_quantizer_config
()
->
excluded_op_ids
());
}
}
if
(
config_
.
use_mkldnn_bfloat16_
)
{
LOG
(
INFO
)
<<
"Bfloat16 is enabled"
;
argument_
.
SetBfloat16EnabledOpTypes
(
config_
.
bfloat16_enabled_op_types_
);
}
#endif
#endif
auto
passes
=
config_
.
pass_builder
()
->
AllPasses
();
auto
passes
=
config_
.
pass_builder
()
->
AllPasses
();
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
ddcd1b53
...
@@ -414,6 +414,14 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -414,6 +414,14 @@ struct PD_INFER_DECL AnalysisConfig {
///
///
bool
mkldnn_bfloat16_enabled
()
const
{
return
use_mkldnn_bfloat16_
;
}
bool
mkldnn_bfloat16_enabled
()
const
{
return
use_mkldnn_bfloat16_
;
}
/// \brief Specify the operator type list to use Bfloat16 acceleration.
///
/// \param op_list The operator type list.
///
void
SetBfloat16Op
(
std
::
unordered_set
<
std
::
string
>
op_list
)
{
bfloat16_enabled_op_types_
=
op_list
;
}
///
///
/// \brief A boolean state telling whether the thread local CUDA stream is
/// \brief A boolean state telling whether the thread local CUDA stream is
/// enabled.
/// enabled.
...
@@ -606,6 +614,7 @@ struct PD_INFER_DECL AnalysisConfig {
...
@@ -606,6 +614,7 @@ struct PD_INFER_DECL AnalysisConfig {
bool
use_mkldnn_quantizer_
{
false
};
bool
use_mkldnn_quantizer_
{
false
};
std
::
shared_ptr
<
MkldnnQuantizerConfig
>
mkldnn_quantizer_config_
;
std
::
shared_ptr
<
MkldnnQuantizerConfig
>
mkldnn_quantizer_config_
;
bool
use_mkldnn_bfloat16_
{
false
};
bool
use_mkldnn_bfloat16_
{
false
};
std
::
unordered_set
<
std
::
string
>
bfloat16_enabled_op_types_
;
// If the config is already used on a predictor, it becomes invalid.
// If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor.
// Any config can only be used with one predictor.
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
ddcd1b53
...
@@ -21,6 +21,12 @@ function(download_int8_data install_dir data_file)
...
@@ -21,6 +21,12 @@ function(download_int8_data install_dir data_file)
endif
()
endif
()
endfunction
()
endfunction
()
function
(
download_bfloat16_data install_dir data_file
)
if
(
NOT EXISTS
${
install_dir
}
/
${
data_file
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
INFERENCE_URL
}
/bfloat16
${
data_file
}
)
endif
()
endfunction
()
function
(
download_GRU_data install_dir data_file
)
function
(
download_GRU_data install_dir data_file
)
if
(
NOT EXISTS
${
install_dir
}
/
${
data_file
}
)
if
(
NOT EXISTS
${
install_dir
}
/
${
data_file
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
INFERENCE_URL
}
/gru
${
data_file
}
)
inference_download_and_uncompress
(
${
install_dir
}
${
INFERENCE_URL
}
/gru
${
data_file
}
)
...
@@ -69,6 +75,16 @@ function(inference_analysis_api_int8_test_run_custom_warmup_batch_size TARGET_NA
...
@@ -69,6 +75,16 @@ function(inference_analysis_api_int8_test_run_custom_warmup_batch_size TARGET_NA
inference_analysis_api_int8_test_run
(
${
TARGET_NAME
}
${
test_binary
}
${
model_dir
}
${
data_path
}
)
inference_analysis_api_int8_test_run
(
${
TARGET_NAME
}
${
test_binary
}
${
model_dir
}
${
data_path
}
)
endfunction
()
endfunction
()
function
(
inference_analysis_api_bfloat16_test_run TARGET_NAME test_binary model_dir data_path
)
inference_analysis_test_run
(
${
TARGET_NAME
}
COMMAND
${
test_binary
}
ARGS --infer_model=
${
model_dir
}
/model
--infer_data=
${
data_path
}
--batch_size=50
--paddle_num_threads=
${
CPU_NUM_THREADS_ON_CI
}
--iterations=2
)
endfunction
()
function
(
inference_analysis_api_object_dection_int8_test_run TARGET_NAME test_binary model_dir data_path
)
function
(
inference_analysis_api_object_dection_int8_test_run TARGET_NAME test_binary model_dir data_path
)
inference_analysis_test_run
(
${
TARGET_NAME
}
inference_analysis_test_run
(
${
TARGET_NAME
}
COMMAND
${
test_binary
}
COMMAND
${
test_binary
}
...
@@ -346,6 +362,16 @@ if(WITH_MKLDNN)
...
@@ -346,6 +362,16 @@ if(WITH_MKLDNN)
download_int8_data
(
${
INT8_GOOGLENET_MODEL_DIR
}
"GoogleNet_int8_model.tar.gz"
)
download_int8_data
(
${
INT8_GOOGLENET_MODEL_DIR
}
"GoogleNet_int8_model.tar.gz"
)
inference_analysis_api_int8_test_run_custom_warmup_batch_size
(
test_analyzer_int8_googlenet
${
INT8_IMG_CLASS_TEST_APP
}
${
INT8_GOOGLENET_MODEL_DIR
}
${
IMAGENET_DATA_PATH
}
10
)
inference_analysis_api_int8_test_run_custom_warmup_batch_size
(
test_analyzer_int8_googlenet
${
INT8_IMG_CLASS_TEST_APP
}
${
INT8_GOOGLENET_MODEL_DIR
}
${
IMAGENET_DATA_PATH
}
10
)
### BFLOAT16 tests
# build test binary to be used in subsequent tests
set
(
BF16_IMG_CLASS_TEST_APP
"test_analyzer_bfloat16_image_classification"
)
set
(
BF16_IMG_CLASS_TEST_APP_SRC
"analyzer_bfloat16_image_classification_tester.cc"
)
inference_analysis_api_test_build
(
${
BF16_IMG_CLASS_TEST_APP
}
${
BF16_IMG_CLASS_TEST_APP_SRC
}
)
# resnet50 bfloat16
inference_analysis_api_bfloat16_test_run
(
test_analyzer_bfloat16_resnet50
${
BF16_IMG_CLASS_TEST_APP
}
${
INT8_RESNET50_MODEL_DIR
}
${
IMAGENET_DATA_PATH
}
)
### Object detection models
### Object detection models
set
(
PASCALVOC_DATA_PATH
"
${
INT8_DATA_DIR
}
/pascalvoc_val_head_300.bin"
)
set
(
PASCALVOC_DATA_PATH
"
${
INT8_DATA_DIR
}
/pascalvoc_val_head_300.bin"
)
set
(
INT8_OBJ_DETECT_TEST_APP
"test_analyzer_int8_object_detection"
)
set
(
INT8_OBJ_DETECT_TEST_APP
"test_analyzer_int8_object_detection"
)
...
...
paddle/fluid/inference/tests/api/analyzer_bfloat16_image_classification_tester.cc
0 → 100644
浏览文件 @
ddcd1b53
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
void
SetConfig
(
AnalysisConfig
*
cfg
)
{
cfg
->
SetModel
(
FLAGS_infer_model
);
cfg
->
DisableGpu
();
cfg
->
SwitchIrOptim
();
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_num_threads
);
cfg
->
EnableMKLDNN
();
}
TEST
(
Analyzer_int8_image_classification
,
bfloat16
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
AnalysisConfig
q_cfg
;
SetConfig
(
&
q_cfg
);
// read data from file and prepare batches with test data
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInputs
(
&
input_slots_all
);
q_cfg
.
SwitchIrDebug
();
q_cfg
.
EnableMkldnnBfloat16
();
q_cfg
.
SetBfloat16Op
({
"conv2d"
});
CompareBFloat16AndAnalysis
(
&
cfg
,
&
q_cfg
,
input_slots_all
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
浏览文件 @
ddcd1b53
...
@@ -30,123 +30,6 @@ void SetConfig(AnalysisConfig *cfg) {
...
@@ -30,123 +30,6 @@ void SetConfig(AnalysisConfig *cfg) {
cfg
->
EnableMKLDNN
();
cfg
->
EnableMKLDNN
();
}
}
template
<
typename
T
>
class
TensorReader
{
public:
TensorReader
(
std
::
ifstream
&
file
,
size_t
beginning_offset
,
std
::
vector
<
int
>
shape
,
std
::
string
name
)
:
file_
(
file
),
position_
(
beginning_offset
),
shape_
(
shape
),
name_
(
name
)
{
numel_
=
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
size_t
{
1
},
std
::
multiplies
<
size_t
>
());
}
PaddleTensor
NextBatch
()
{
PaddleTensor
tensor
;
tensor
.
name
=
name_
;
tensor
.
shape
=
shape_
;
tensor
.
dtype
=
GetPaddleDType
<
T
>
();
tensor
.
data
.
Resize
(
numel_
*
sizeof
(
T
));
file_
.
seekg
(
position_
);
file_
.
read
(
static_cast
<
char
*>
(
tensor
.
data
.
data
()),
numel_
*
sizeof
(
T
));
position_
=
file_
.
tellg
();
if
(
file_
.
eof
())
LOG
(
ERROR
)
<<
name_
<<
": reached end of stream"
;
if
(
file_
.
fail
())
throw
std
::
runtime_error
(
name_
+
": failed reading file."
);
return
tensor
;
}
protected:
std
::
ifstream
&
file_
;
size_t
position_
;
std
::
vector
<
int
>
shape_
;
std
::
string
name_
;
size_t
numel_
;
};
std
::
shared_ptr
<
std
::
vector
<
PaddleTensor
>>
GetWarmupData
(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
test_data
,
int
num_images
=
FLAGS_warmup_batch_size
)
{
int
test_data_batch_size
=
test_data
[
0
][
0
].
shape
[
0
];
auto
iterations
=
test_data
.
size
();
auto
all_test_data_size
=
iterations
*
test_data_batch_size
;
PADDLE_ENFORCE_LE
(
static_cast
<
size_t
>
(
num_images
),
all_test_data_size
,
platform
::
errors
::
InvalidArgument
(
"The requested quantization warmup data size must be "
"lower or equal to the test data size. But received"
"warmup size is %d and test data size is %d. Please "
"use --warmup_batch_size parameter to set smaller "
"warmup batch size."
,
num_images
,
all_test_data_size
));
PaddleTensor
images
;
images
.
name
=
"image"
;
images
.
shape
=
{
num_images
,
3
,
224
,
224
};
images
.
dtype
=
PaddleDType
::
FLOAT32
;
images
.
data
.
Resize
(
sizeof
(
float
)
*
num_images
*
3
*
224
*
224
);
PaddleTensor
labels
;
labels
.
name
=
"label"
;
labels
.
shape
=
{
num_images
,
1
};
labels
.
dtype
=
PaddleDType
::
INT64
;
labels
.
data
.
Resize
(
sizeof
(
int64_t
)
*
num_images
);
for
(
int
i
=
0
;
i
<
num_images
;
i
++
)
{
auto
batch
=
i
/
test_data_batch_size
;
auto
element_in_batch
=
i
%
test_data_batch_size
;
std
::
copy_n
(
static_cast
<
float
*>
(
test_data
[
batch
][
0
].
data
.
data
())
+
element_in_batch
*
3
*
224
*
224
,
3
*
224
*
224
,
static_cast
<
float
*>
(
images
.
data
.
data
())
+
i
*
3
*
224
*
224
);
std
::
copy_n
(
static_cast
<
int64_t
*>
(
test_data
[
batch
][
1
].
data
.
data
())
+
element_in_batch
,
1
,
static_cast
<
int64_t
*>
(
labels
.
data
.
data
())
+
i
);
}
auto
warmup_data
=
std
::
make_shared
<
std
::
vector
<
PaddleTensor
>>
(
2
);
(
*
warmup_data
)[
0
]
=
std
::
move
(
images
);
(
*
warmup_data
)[
1
]
=
std
::
move
(
labels
);
return
warmup_data
;
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
int32_t
batch_size
=
FLAGS_batch_size
)
{
std
::
ifstream
file
(
FLAGS_infer_data
,
std
::
ios
::
binary
);
if
(
!
file
)
{
FAIL
()
<<
"Couldn't open file: "
<<
FLAGS_infer_data
;
}
int64_t
total_images
{
0
};
file
.
read
(
reinterpret_cast
<
char
*>
(
&
total_images
),
sizeof
(
total_images
));
LOG
(
INFO
)
<<
"Total images in file: "
<<
total_images
;
std
::
vector
<
int
>
image_batch_shape
{
batch_size
,
3
,
224
,
224
};
std
::
vector
<
int
>
label_batch_shape
{
batch_size
,
1
};
auto
images_offset_in_file
=
static_cast
<
size_t
>
(
file
.
tellg
());
auto
labels_offset_in_file
=
images_offset_in_file
+
sizeof
(
float
)
*
total_images
*
3
*
224
*
224
;
TensorReader
<
float
>
image_reader
(
file
,
images_offset_in_file
,
image_batch_shape
,
"image"
);
TensorReader
<
int64_t
>
label_reader
(
file
,
labels_offset_in_file
,
label_batch_shape
,
"label"
);
auto
iterations_max
=
total_images
/
batch_size
;
auto
iterations
=
iterations_max
;
if
(
FLAGS_iterations
>
0
&&
FLAGS_iterations
<
iterations_max
)
{
iterations
=
FLAGS_iterations
;
}
for
(
auto
i
=
0
;
i
<
iterations
;
i
++
)
{
auto
images
=
image_reader
.
NextBatch
();
auto
labels
=
label_reader
.
NextBatch
();
inputs
->
emplace_back
(
std
::
vector
<
PaddleTensor
>
{
std
::
move
(
images
),
std
::
move
(
labels
)});
}
}
TEST
(
Analyzer_int8_image_classification
,
quantization
)
{
TEST
(
Analyzer_int8_image_classification
,
quantization
)
{
AnalysisConfig
cfg
;
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
...
@@ -156,13 +39,13 @@ TEST(Analyzer_int8_image_classification, quantization) {
...
@@ -156,13 +39,13 @@ TEST(Analyzer_int8_image_classification, quantization) {
// read data from file and prepare batches with test data
// read data from file and prepare batches with test data
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
SetInput
s
(
&
input_slots_all
);
if
(
FLAGS_enable_int8
)
{
if
(
FLAGS_enable_int8
)
{
// prepare warmup batch from input data read earlier
// prepare warmup batch from input data read earlier
// warmup batch size can be different than batch size
// warmup batch size can be different than batch size
std
::
shared_ptr
<
std
::
vector
<
PaddleTensor
>>
warmup_data
=
std
::
shared_ptr
<
std
::
vector
<
PaddleTensor
>>
warmup_data
=
GetWarmupData
(
input_slots_all
);
paddle
::
inference
::
GetWarmupData
(
input_slots_all
);
// configure quantizer
// configure quantizer
q_cfg
.
EnableMkldnnQuantizer
();
q_cfg
.
EnableMkldnnQuantizer
();
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
ddcd1b53
...
@@ -17,10 +17,12 @@
...
@@ -17,10 +17,12 @@
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include <algorithm>
#include <algorithm>
#include <functional>
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <thread> // NOLINT
#include <thread> // NOLINT
#include <unordered_map>
#include <unordered_map>
#include <utility>
#include <vector>
#include <vector>
#ifdef WITH_GPERFTOOLS
#ifdef WITH_GPERFTOOLS
#include <gperftools/profiler.h>
#include <gperftools/profiler.h>
...
@@ -48,6 +50,7 @@ DEFINE_bool(ernie_large, false, "Test ernie large");
...
@@ -48,6 +50,7 @@ DEFINE_bool(ernie_large, false, "Test ernie large");
DEFINE_bool
(
with_accuracy_layer
,
true
,
DEFINE_bool
(
with_accuracy_layer
,
true
,
"Calculate the accuracy while label is in the input"
);
"Calculate the accuracy while label is in the input"
);
DEFINE_bool
(
enable_fp32
,
true
,
"Enable FP32 type prediction"
);
DEFINE_bool
(
enable_fp32
,
true
,
"Enable FP32 type prediction"
);
DEFINE_bool
(
enable_bf16
,
true
,
"Enable BF16 type prediction"
);
DEFINE_bool
(
enable_int8
,
true
,
"Enable INT8 type prediction"
);
DEFINE_bool
(
enable_int8
,
true
,
"Enable INT8 type prediction"
);
DEFINE_int32
(
warmup_batch_size
,
100
,
"batch size for quantization warmup"
);
DEFINE_int32
(
warmup_batch_size
,
100
,
"batch size for quantization warmup"
);
// setting iterations to 0 means processing the whole dataset
// setting iterations to 0 means processing the whole dataset
...
@@ -124,6 +127,123 @@ class Barrier {
...
@@ -124,6 +127,123 @@ class Barrier {
std
::
size_t
_count
;
std
::
size_t
_count
;
};
};
template
<
typename
T
>
class
TensorReader
{
public:
TensorReader
(
std
::
ifstream
&
file
,
size_t
beginning_offset
,
std
::
vector
<
int
>
shape
,
std
::
string
name
)
:
file_
(
file
),
position_
(
beginning_offset
),
shape_
(
shape
),
name_
(
name
)
{
numel_
=
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
size_t
{
1
},
std
::
multiplies
<
size_t
>
());
}
PaddleTensor
NextBatch
()
{
PaddleTensor
tensor
;
tensor
.
name
=
name_
;
tensor
.
shape
=
shape_
;
tensor
.
dtype
=
GetPaddleDType
<
T
>
();
tensor
.
data
.
Resize
(
numel_
*
sizeof
(
T
));
file_
.
seekg
(
position_
);
file_
.
read
(
static_cast
<
char
*>
(
tensor
.
data
.
data
()),
numel_
*
sizeof
(
T
));
position_
=
file_
.
tellg
();
if
(
file_
.
eof
())
LOG
(
ERROR
)
<<
name_
<<
": reached end of stream"
;
if
(
file_
.
fail
())
throw
std
::
runtime_error
(
name_
+
": failed reading file."
);
return
tensor
;
}
protected:
std
::
ifstream
&
file_
;
size_t
position_
;
std
::
vector
<
int
>
shape_
;
std
::
string
name_
;
size_t
numel_
;
};
std
::
shared_ptr
<
std
::
vector
<
PaddleTensor
>>
GetWarmupData
(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
test_data
,
int
num_images
=
FLAGS_warmup_batch_size
)
{
int
test_data_batch_size
=
test_data
[
0
][
0
].
shape
[
0
];
auto
iterations
=
test_data
.
size
();
auto
all_test_data_size
=
iterations
*
test_data_batch_size
;
PADDLE_ENFORCE_LE
(
static_cast
<
size_t
>
(
num_images
),
all_test_data_size
,
platform
::
errors
::
InvalidArgument
(
"The requested quantization warmup data size must be "
"lower or equal to the test data size. But received"
"warmup size is %d and test data size is %d. Please "
"use --warmup_batch_size parameter to set smaller "
"warmup batch size."
,
num_images
,
all_test_data_size
));
PaddleTensor
images
;
images
.
name
=
"image"
;
images
.
shape
=
{
num_images
,
3
,
224
,
224
};
images
.
dtype
=
PaddleDType
::
FLOAT32
;
images
.
data
.
Resize
(
sizeof
(
float
)
*
num_images
*
3
*
224
*
224
);
PaddleTensor
labels
;
labels
.
name
=
"label"
;
labels
.
shape
=
{
num_images
,
1
};
labels
.
dtype
=
PaddleDType
::
INT64
;
labels
.
data
.
Resize
(
sizeof
(
int64_t
)
*
num_images
);
for
(
int
i
=
0
;
i
<
num_images
;
i
++
)
{
auto
batch
=
i
/
test_data_batch_size
;
auto
element_in_batch
=
i
%
test_data_batch_size
;
std
::
copy_n
(
static_cast
<
float
*>
(
test_data
[
batch
][
0
].
data
.
data
())
+
element_in_batch
*
3
*
224
*
224
,
3
*
224
*
224
,
static_cast
<
float
*>
(
images
.
data
.
data
())
+
i
*
3
*
224
*
224
);
std
::
copy_n
(
static_cast
<
int64_t
*>
(
test_data
[
batch
][
1
].
data
.
data
())
+
element_in_batch
,
1
,
static_cast
<
int64_t
*>
(
labels
.
data
.
data
())
+
i
);
}
auto
warmup_data
=
std
::
make_shared
<
std
::
vector
<
PaddleTensor
>>
(
2
);
(
*
warmup_data
)[
0
]
=
std
::
move
(
images
);
(
*
warmup_data
)[
1
]
=
std
::
move
(
labels
);
return
warmup_data
;
}
void
SetInputs
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
,
int32_t
batch_size
=
FLAGS_batch_size
)
{
std
::
ifstream
file
(
FLAGS_infer_data
,
std
::
ios
::
binary
);
if
(
!
file
)
{
FAIL
()
<<
"Couldn't open file: "
<<
FLAGS_infer_data
;
}
int64_t
total_images
{
0
};
file
.
read
(
reinterpret_cast
<
char
*>
(
&
total_images
),
sizeof
(
total_images
));
LOG
(
INFO
)
<<
"Total images in file: "
<<
total_images
;
std
::
vector
<
int
>
image_batch_shape
{
batch_size
,
3
,
224
,
224
};
std
::
vector
<
int
>
label_batch_shape
{
batch_size
,
1
};
auto
images_offset_in_file
=
static_cast
<
size_t
>
(
file
.
tellg
());
auto
labels_offset_in_file
=
images_offset_in_file
+
sizeof
(
float
)
*
total_images
*
3
*
224
*
224
;
TensorReader
<
float
>
image_reader
(
file
,
images_offset_in_file
,
image_batch_shape
,
"image"
);
TensorReader
<
int64_t
>
label_reader
(
file
,
labels_offset_in_file
,
label_batch_shape
,
"label"
);
auto
iterations_max
=
total_images
/
batch_size
;
auto
iterations
=
iterations_max
;
if
(
FLAGS_iterations
>
0
&&
FLAGS_iterations
<
iterations_max
)
{
iterations
=
FLAGS_iterations
;
}
for
(
auto
i
=
0
;
i
<
iterations
;
i
++
)
{
auto
images
=
image_reader
.
NextBatch
();
auto
labels
=
label_reader
.
NextBatch
();
inputs
->
emplace_back
(
std
::
vector
<
PaddleTensor
>
{
std
::
move
(
images
),
std
::
move
(
labels
)});
}
}
// Compare result between two PaddleTensor
// Compare result between two PaddleTensor
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
ref_outputs
)
{
const
std
::
vector
<
PaddleTensor
>
&
ref_outputs
)
{
...
@@ -555,10 +675,10 @@ void SummarizePerformance(const char *title, float sample) {
...
@@ -555,10 +675,10 @@ void SummarizePerformance(const char *title, float sample) {
<<
" ms"
;
<<
" ms"
;
}
}
void
SummarizePerformance
(
float
sample_latency_fp32
,
void
SummarizePerformance
(
const
char
*
title_fp32
,
float
sample_latency_fp32
,
float
sample_latency_int8
)
{
const
char
*
title
,
float
sample_latency
)
{
if
(
FLAGS_enable_fp32
)
SummarizePerformance
(
"FP32"
,
sample_latency_fp32
);
SummarizePerformance
(
title_fp32
,
sample_latency_fp32
);
if
(
FLAGS_enable_int8
)
SummarizePerformance
(
"INT8"
,
sample_latency_int8
);
SummarizePerformance
(
title
,
sample_latency
);
}
}
float
CompareAccuracyOne
(
float
CompareAccuracyOne
(
...
@@ -708,11 +828,51 @@ void CompareQuantizedAndAnalysis(
...
@@ -708,11 +828,51 @@ void CompareQuantizedAndAnalysis(
TestOneThreadPrediction
(
qcfg
,
inputs
,
&
quantized_outputs
,
true
,
TestOneThreadPrediction
(
qcfg
,
inputs
,
&
quantized_outputs
,
true
,
VarType
::
INT8
,
&
sample_latency_int8
);
VarType
::
INT8
,
&
sample_latency_int8
);
}
}
SummarizePerformance
(
sample_latency_fp32
,
sample_latency_int8
);
SummarizePerformance
(
"FP32"
,
sample_latency_fp32
,
"INT8"
,
sample_latency_int8
);
CompareAccuracy
(
quantized_outputs
,
analysis_outputs
,
compared_idx
);
CompareAccuracy
(
quantized_outputs
,
analysis_outputs
,
compared_idx
);
}
}
void
CompareBFloat16AndAnalysis
(
const
AnalysisConfig
*
config
,
const
AnalysisConfig
*
qconfig
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
int
compared_idx
=
1
)
{
PADDLE_ENFORCE_EQ
(
inputs
[
0
][
0
].
shape
[
0
],
FLAGS_batch_size
,
platform
::
errors
::
InvalidArgument
(
"Input data has to be packed batch by batch. The batchsize is set to "
"%d, but the real input is packed with batchsize = %d"
,
FLAGS_batch_size
,
inputs
[
0
][
0
].
shape
[
0
]));
LOG
(
INFO
)
<<
"FP32 & BF16 prediction run: batch_size "
<<
FLAGS_batch_size
;
LOG
(
INFO
)
<<
"--- FP32 prediction start ---"
;
auto
*
cfg
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
config
);
PrintConfig
(
cfg
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
analysis_outputs
;
float
sample_latency_fp32
{
-
1
};
if
(
FLAGS_enable_fp32
)
{
TestOneThreadPrediction
(
cfg
,
inputs
,
&
analysis_outputs
,
true
,
VarType
::
FP32
,
&
sample_latency_fp32
);
}
LOG
(
INFO
)
<<
"--- BF16 prediction start ---"
;
auto
*
qcfg
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
qconfig
);
PrintConfig
(
qcfg
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
bf16_outputs
;
float
sample_latency_bf16
{
-
1
};
if
(
FLAGS_enable_bf16
)
{
TestOneThreadPrediction
(
qcfg
,
inputs
,
&
bf16_outputs
,
true
,
VarType
::
FP32
,
&
sample_latency_bf16
);
}
SummarizePerformance
(
"FP32"
,
sample_latency_fp32
,
"BF16"
,
sample_latency_bf16
);
CompareAccuracy
(
bf16_outputs
,
analysis_outputs
,
compared_idx
);
}
void
CompareAnalysisAndAnalysis
(
void
CompareAnalysisAndAnalysis
(
const
AnalysisConfig
*
config1
,
const
AnalysisConfig
*
config2
,
const
AnalysisConfig
*
config1
,
const
AnalysisConfig
*
config2
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
...
@@ -749,7 +909,8 @@ void CompareAnalysisAndAnalysis(
...
@@ -749,7 +909,8 @@ void CompareAnalysisAndAnalysis(
TestOneThreadPrediction
(
cfg2
,
inputs
,
&
int8_outputs
,
true
,
VarType
::
INT8
,
TestOneThreadPrediction
(
cfg2
,
inputs
,
&
int8_outputs
,
true
,
VarType
::
INT8
,
&
sample_latency_int8
);
&
sample_latency_int8
);
}
}
SummarizePerformance
(
sample_latency_fp32
,
sample_latency_int8
);
SummarizePerformance
(
"FP32"
,
sample_latency_fp32
,
"INT8"
,
sample_latency_int8
);
if
(
with_accuracy_layer
)
{
if
(
with_accuracy_layer
)
{
CompareAccuracy
(
int8_outputs
,
analysis_outputs
,
compared_idx
);
CompareAccuracy
(
int8_outputs
,
analysis_outputs
,
compared_idx
);
}
}
...
...
paddle/fluid/pybind/inference_api.cc
浏览文件 @
ddcd1b53
...
@@ -502,6 +502,7 @@ void BindAnalysisConfig(py::module *m) {
...
@@ -502,6 +502,7 @@ void BindAnalysisConfig(py::module *m) {
py
::
return_value_policy
::
reference
)
py
::
return_value_policy
::
reference
)
.
def
(
"set_mkldnn_cache_capacity"
,
&
AnalysisConfig
::
SetMkldnnCacheCapacity
,
.
def
(
"set_mkldnn_cache_capacity"
,
&
AnalysisConfig
::
SetMkldnnCacheCapacity
,
py
::
arg
(
"capacity"
)
=
0
)
py
::
arg
(
"capacity"
)
=
0
)
.
def
(
"set_bfloat16_op"
,
&
AnalysisConfig
::
SetBfloat16Op
)
#endif
#endif
.
def
(
"set_mkldnn_op"
,
&
AnalysisConfig
::
SetMKLDNNOp
)
.
def
(
"set_mkldnn_op"
,
&
AnalysisConfig
::
SetMKLDNNOp
)
.
def
(
"set_model_buffer"
,
&
AnalysisConfig
::
SetModelBuffer
)
.
def
(
"set_model_buffer"
,
&
AnalysisConfig
::
SetModelBuffer
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录