Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
23231af8
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
23231af8
编写于
4月 10, 2020
作者:
C
cc
提交者:
GitHub
4月 10, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Optimize weight quantizaion (#3374)
* Optimize weight quantizaion, test=develop
上级
40a31442
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
213 addition
and
115 deletion
+213
-115
lite/api/benchmark.cc
lite/api/benchmark.cc
+91
-27
lite/api/light_api.cc
lite/api/light_api.cc
+68
-47
lite/core/mir/fusion/conv_bn_fuser.cc
lite/core/mir/fusion/conv_bn_fuser.cc
+1
-2
lite/core/mir/weight_quantization_preprocess_pass.cc
lite/core/mir/weight_quantization_preprocess_pass.cc
+30
-4
lite/core/mir/weight_quantization_preprocess_pass.h
lite/core/mir/weight_quantization_preprocess_pass.h
+3
-2
lite/tools/benchmark.sh
lite/tools/benchmark.sh
+20
-33
未找到文件。
lite/api/benchmark.cc
浏览文件 @
23231af8
...
@@ -27,6 +27,9 @@
...
@@ -27,6 +27,9 @@
#include "lite/utils/cp_logging.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/string.h"
#include "lite/utils/string.h"
DEFINE_string
(
optimized_model_path
,
""
,
"the path of the model that is optimized by opt."
);
DEFINE_string
(
model_dir
,
DEFINE_string
(
model_dir
,
""
,
""
,
"the path of the model, the model and param files is under "
"the path of the model, the model and param files is under "
...
@@ -61,10 +64,7 @@ DEFINE_int32(threads, 1, "threads num");
...
@@ -61,10 +64,7 @@ DEFINE_int32(threads, 1, "threads num");
DEFINE_string
(
result_filename
,
DEFINE_string
(
result_filename
,
"result.txt"
,
"result.txt"
,
"save the inference time to the file."
);
"save the inference time to the file."
);
DEFINE_bool
(
run_model_optimize
,
DEFINE_bool
(
show_output
,
false
,
"Wether to show the output in shell."
);
false
,
"if set true, apply model_optimize_tool to "
"model and use optimized model to test. "
);
namespace
paddle
{
namespace
paddle
{
namespace
lite_api
{
namespace
lite_api
{
...
@@ -100,15 +100,23 @@ void OutputOptModel(const std::string& save_optimized_model_dir) {
...
@@ -100,15 +100,23 @@ void OutputOptModel(const std::string& save_optimized_model_dir) {
LOG
(
INFO
)
<<
"Save optimized model to "
<<
save_optimized_model_dir
;
LOG
(
INFO
)
<<
"Save optimized model to "
<<
save_optimized_model_dir
;
}
}
int64_t
ShapeProduction
(
const
std
::
vector
<
int64_t
>&
shape
)
{
int64_t
num
=
1
;
for
(
auto
i
:
shape
)
{
num
*=
i
;
}
return
num
;
}
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
void
Run
(
const
std
::
vector
<
int64_t
>&
input_shape
,
void
Run
(
const
std
::
vector
<
int64_t
>&
input_shape
,
const
std
::
string
&
model_
dir
,
const
std
::
string
&
model_
path
,
const
std
::
string
model_name
)
{
const
std
::
string
model_name
)
{
// set config and create predictor
// set config and create predictor
lite_api
::
MobileConfig
config
;
lite_api
::
MobileConfig
config
;
config
.
set_threads
(
FLAGS_threads
);
config
.
set_threads
(
FLAGS_threads
);
config
.
set_power_mode
(
static_cast
<
PowerMode
>
(
FLAGS_power_mode
));
config
.
set_power_mode
(
static_cast
<
PowerMode
>
(
FLAGS_power_mode
));
config
.
set_model_from_file
(
model_
dir
+
".nb"
);
config
.
set_model_from_file
(
model_
path
);
auto
predictor
=
lite_api
::
CreatePaddlePredictor
(
config
);
auto
predictor
=
lite_api
::
CreatePaddlePredictor
(
config
);
...
@@ -116,10 +124,7 @@ void Run(const std::vector<int64_t>& input_shape,
...
@@ -116,10 +124,7 @@ void Run(const std::vector<int64_t>& input_shape,
auto
input_tensor
=
predictor
->
GetInput
(
0
);
auto
input_tensor
=
predictor
->
GetInput
(
0
);
input_tensor
->
Resize
(
input_shape
);
input_tensor
->
Resize
(
input_shape
);
auto
input_data
=
input_tensor
->
mutable_data
<
float
>
();
auto
input_data
=
input_tensor
->
mutable_data
<
float
>
();
int
input_num
=
1
;
int64_t
input_num
=
ShapeProduction
(
input_shape
);
for
(
size_t
i
=
0
;
i
<
input_shape
.
size
();
++
i
)
{
input_num
*=
input_shape
[
i
];
}
if
(
FLAGS_input_img_path
.
empty
())
{
if
(
FLAGS_input_img_path
.
empty
())
{
for
(
int
i
=
0
;
i
<
input_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
input_num
;
++
i
)
{
input_data
[
i
]
=
1.
f
;
input_data
[
i
]
=
1.
f
;
...
@@ -167,26 +172,73 @@ void Run(const std::vector<int64_t>& input_shape,
...
@@ -167,26 +172,73 @@ void Run(const std::vector<int64_t>& input_shape,
ofs
<<
"average = "
<<
std
::
setw
(
12
)
<<
avg_res
;
ofs
<<
"average = "
<<
std
::
setw
(
12
)
<<
avg_res
;
ofs
<<
std
::
endl
;
ofs
<<
std
::
endl
;
ofs
.
close
();
ofs
.
close
();
if
(
FLAGS_show_output
)
{
auto
out_tensor
=
predictor
->
GetOutput
(
0
);
auto
*
out_data
=
out_tensor
->
data
<
float
>
();
int64_t
output_num
=
ShapeProduction
(
out_tensor
->
shape
());
float
max_value
=
out_data
[
0
];
int
max_index
=
0
;
for
(
int
i
=
0
;
i
<
output_num
;
i
++
)
{
if
(
max_value
<
out_data
[
i
])
{
max_value
=
out_data
[
i
];
max_index
=
i
;
}
}
LOG
(
INFO
)
<<
"max_value:"
<<
max_value
;
LOG
(
INFO
)
<<
"max_index:"
<<
max_index
;
}
}
}
#endif
#endif
}
// namespace lite_api
}
// namespace lite_api
}
// namespace paddle
}
// namespace paddle
void
print_usage
()
{
std
::
string
help_info
=
"Usage:
\n
"
"./benchmark_bin
\n
"
" --optimized_model_path (the path of the model that is optimized
\n
"
" by opt.) type: string
\n
"
" --model_dir (the path of the model that is not optimized by opt,
\n
"
" the model and param files is under model_dir.) type: string
\n
"
" --model_filename (the filename of model file. When the model is
\n
"
" combined formate, please set model_file. Otherwise, it is not
\n
"
" necessary to set it.) type: string
\n
"
" --param_filename (the filename of param file, set param_file when
\n
"
" the model is combined formate. Otherwise, it is not necessary
\n
"
" to set it.) type: string
\n
"
" --input_shape (set input shapes according to the model, separated by
\n
"
" colon and comma, such as 1,3,244,244) type: string
\n
"
" default: 1,3,224,224
\n
"
" --input_img_path (the path of input image, if not set
\n
"
" input_img_path, the input will be 1.0.) type: string
\n
"
" --power_mode (arm power mode: 0 for big cluster, 1 for little
\n
"
" cluster, 2 for all cores, 3 for no bind) type: int32 default: 3
\n
"
" --repeats (repeats times) type: int32 default: 1
\n
"
" --result_filename (save the inference time to the file.) type:
\n
"
" string default: result.txt
\n
"
" --threads (threads num) type: int32 default: 1
\n
"
" --warmup (warmup times) type: int32 default: 0
\n
"
"Note that:
\n
"
" If load the optimized model, set optimized_model_path, or set
\n
"
" model_dir, model_filename and param_filename according to the
\n
"
" model.
\n
"
;
LOG
(
INFO
)
<<
help_info
;
}
int
main
(
int
argc
,
char
**
argv
)
{
int
main
(
int
argc
,
char
**
argv
)
{
// Check inputs
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
gflags
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
if
(
FLAGS_model_dir
==
""
)
{
bool
is_opt_model
=
(
FLAGS_optimized_model_path
!=
""
);
LOG
(
INFO
)
<<
"Please run ./benchmark_bin --help to obtain usage."
;
bool
is_origin_model
=
(
FLAGS_model_dir
!=
""
);
if
(
!
is_origin_model
&&
!
is_opt_model
)
{
LOG
(
INFO
)
<<
"Input error, the model path should not be empty.
\n
"
;
print_usage
();
exit
(
0
);
exit
(
0
);
}
}
if
(
FLAGS_model_dir
.
back
()
==
'/'
)
{
// Get input shape
FLAGS_model_dir
.
pop_back
();
}
std
::
size_t
found
=
FLAGS_model_dir
.
find_last_of
(
"/"
);
std
::
string
model_name
=
FLAGS_model_dir
.
substr
(
found
+
1
);
std
::
string
save_optimized_model_dir
=
FLAGS_model_dir
+
"_opt2"
;
auto
get_shape
=
[](
const
std
::
string
&
str_shape
)
->
std
::
vector
<
int64_t
>
{
auto
get_shape
=
[](
const
std
::
string
&
str_shape
)
->
std
::
vector
<
int64_t
>
{
std
::
vector
<
int64_t
>
shape
;
std
::
vector
<
int64_t
>
shape
;
std
::
string
tmp_str
=
str_shape
;
std
::
string
tmp_str
=
str_shape
;
...
@@ -202,19 +254,31 @@ int main(int argc, char** argv) {
...
@@ -202,19 +254,31 @@ int main(int argc, char** argv) {
}
}
return
shape
;
return
shape
;
};
};
std
::
vector
<
int64_t
>
input_shape
=
get_shape
(
FLAGS_input_shape
);
std
::
vector
<
int64_t
>
input_shape
=
get_shape
(
FLAGS_input_shape
);
// Output optimized model if needed
// Get model_name and run_model_path
if
(
FLAGS_run_model_optimize
)
{
std
::
string
model_name
;
paddle
::
lite_api
::
OutputOptModel
(
save_optimized_model_dir
);
std
::
string
run_model_path
;
if
(
is_origin_model
)
{
if
(
FLAGS_model_dir
.
back
()
==
'/'
)
{
FLAGS_model_dir
.
pop_back
();
}
std
::
size_t
found
=
FLAGS_model_dir
.
find_last_of
(
"/"
);
model_name
=
FLAGS_model_dir
.
substr
(
found
+
1
);
std
::
string
optimized_model_path
=
FLAGS_model_dir
+
"_opt2"
;
paddle
::
lite_api
::
OutputOptModel
(
optimized_model_path
);
run_model_path
=
optimized_model_path
+
".nb"
;
}
else
{
size_t
found1
=
FLAGS_optimized_model_path
.
find_last_of
(
"/"
);
size_t
found2
=
FLAGS_optimized_model_path
.
find_last_of
(
"."
);
size_t
len
=
found2
-
found1
-
1
;
model_name
=
FLAGS_optimized_model_path
.
substr
(
found1
+
1
,
len
);
run_model_path
=
FLAGS_optimized_model_path
;
}
}
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
// Run inference using optimized model
// Run test
std
::
string
run_model_dir
=
paddle
::
lite_api
::
Run
(
input_shape
,
run_model_path
,
model_name
);
FLAGS_run_model_optimize
?
save_optimized_model_dir
:
FLAGS_model_dir
;
paddle
::
lite_api
::
Run
(
input_shape
,
run_model_dir
,
model_name
);
#endif
#endif
return
0
;
return
0
;
}
}
lite/api/light_api.cc
浏览文件 @
23231af8
...
@@ -29,7 +29,10 @@ void LightPredictor::Build(const std::string& lite_model_file,
...
@@ -29,7 +29,10 @@ void LightPredictor::Build(const std::string& lite_model_file,
LoadModelNaiveFromFile
(
lite_model_file
,
scope_
.
get
(),
&
cpp_program_desc_
);
LoadModelNaiveFromFile
(
lite_model_file
,
scope_
.
get
(),
&
cpp_program_desc_
);
}
}
// For weight quantization of post training, load the int8/16 weights
// for optimized model, and dequant it to fp32.
DequantizeWeight
();
DequantizeWeight
();
BuildRuntimeProgram
(
cpp_program_desc_
);
BuildRuntimeProgram
(
cpp_program_desc_
);
PrepareFeedFetch
();
PrepareFeedFetch
();
}
}
...
@@ -182,58 +185,76 @@ void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) {
...
@@ -182,58 +185,76 @@ void LightPredictor::BuildRuntimeProgram(const cpp::ProgramDesc& prog) {
}
}
void
LightPredictor
::
DequantizeWeight
()
{
void
LightPredictor
::
DequantizeWeight
()
{
#define PROCESS_CONV2D_DATA() \
#define PROCESS_CONV2D_DATA()
\
for (int64_t i = 0; i <
h; ++i) {
\
for (int64_t i = 0; i <
ch; ++i) {
\
for (int64_t j = 0; j <
w; ++j) {
\
for (int64_t j = 0; j <
offset; ++j) {
\
fp_data[i *
w + j] = scale_list[i] * int_data[i * w
+ j]; \
fp_data[i *
offset + j] = scale_list[i] * int_data[i * offset
+ j]; \
} \
}
\
}
}
#define PROCESS_FC_DATA() \
#define PROCESS_FC_DATA()
\
for (int
i = 0; i < input_tensor->numel(); i++) {
\
for (int
64_t i = 0; i < chin; i++) {
\
*fp_data = scale_list[0] * (*int_data);
\
for (int64_t j = 0; j < chout; j++) {
\
++fp_data;
\
fp_data[i * chout + j] = scale_list[j] * int_data[i * chout + j];
\
++int_data;
\
}
\
}
}
auto
is_weight_quantized_op
=
[](
const
cpp
::
OpDesc
*
op_desc
)
{
bool
result
=
false
;
if
(
op_desc
->
HasAttr
(
"quantization_type"
))
{
std
::
string
type
=
op_desc
->
GetAttr
<
std
::
string
>
(
"quantization_type"
);
result
=
(
type
==
"post_weight_abs_max"
)
||
(
type
==
"post_weight_channel_wise_abs_max"
);
}
else
{
result
=
op_desc
->
HasAttr
(
"quantize_weight_bits"
);
}
return
result
;
};
Tensor
tmp_tensor
;
Tensor
tmp_tensor
;
CHECK
(
cpp_program_desc_
.
BlocksSize
());
for
(
size_t
i
=
0
;
i
<
cpp_program_desc_
.
BlocksSize
();
i
++
)
{
auto
*
main_block
=
cpp_program_desc_
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
auto
*
block
=
cpp_program_desc_
.
GetBlock
<
cpp
::
BlockDesc
>
(
i
);
for
(
size_t
k
=
0
;
k
<
main_block
->
OpsSize
();
++
k
)
{
for
(
size_t
k
=
0
;
k
<
block
->
OpsSize
();
++
k
)
{
auto
*
op_desc
=
main_block
->
GetOp
<
cpp
::
OpDesc
>
(
k
);
auto
*
op_desc
=
block
->
GetOp
<
cpp
::
OpDesc
>
(
k
);
if
(
op_desc
->
HasAttr
(
"quantize_weight_bits"
))
{
// weight quantized op
if
(
is_weight_quantized_op
(
op_desc
))
{
auto
input_names
=
op_desc
->
input_vars
();
auto
input_names
=
op_desc
->
input_vars
();
for
(
auto
&
input_name
:
input_names
)
{
for
(
auto
&
input_name
:
input_names
)
{
std
::
string
input_scale_name
=
input_name
+
"_quant_scale"
;
std
::
string
input_scale_name
=
input_name
+
"_quant_scale"
;
if
(
op_desc
->
HasAttr
(
input_scale_name
))
{
// the input is quantized
if
(
op_desc
->
HasAttr
(
input_scale_name
))
{
// the input is quantized
auto
input_tensor
=
auto
input_tensor
=
scope_
->
FindVar
(
input_name
)
->
GetMutable
<
lite
::
Tensor
>
();
scope_
->
FindVar
(
input_name
)
->
GetMutable
<
lite
::
Tensor
>
();
tmp_tensor
.
CopyDataFrom
(
*
input_tensor
);
tmp_tensor
.
CopyDataFrom
(
*
input_tensor
);
auto
scale_list
=
auto
scale_list
=
op_desc
->
GetAttr
<
std
::
vector
<
float
>>
(
input_scale_name
);
op_desc
->
GetAttr
<
std
::
vector
<
float
>>
(
input_scale_name
);
int
quantize_weight_bits
=
op_desc
->
GetAttr
<
int
>
(
"quantize_weight_bits"
);
int
quantize_weight_bits
=
float
*
fp_data
=
input_tensor
->
mutable_data
<
float
>
();
op_desc
->
GetAttr
<
int
>
(
"quantize_weight_bits"
);
CHECK
(
quantize_weight_bits
==
8
||
quantize_weight_bits
==
16
);
std
::
string
op_type
=
op_desc
->
Type
();
float
*
fp_data
=
input_tensor
->
mutable_data
<
float
>
();
if
(
op_type
==
"conv2d"
||
op_type
==
"depthwise_conv2d"
)
{
int64_t
h
=
input_tensor
->
dims
()[
0
];
std
::
string
op_type
=
op_desc
->
Type
();
int64_t
w
=
input_tensor
->
numel
()
/
h
;
if
(
op_type
==
"conv2d"
||
op_type
==
"depthwise_conv2d"
)
{
CHECK_EQ
(
scale_list
.
size
(),
h
);
int64_t
ch
=
input_tensor
->
dims
()[
0
];
if
(
quantize_weight_bits
==
8
)
{
int64_t
offset
=
input_tensor
->
numel
()
/
ch
;
const
int8_t
*
int_data
=
tmp_tensor
.
data
<
int8_t
>
();
CHECK_EQ
(
scale_list
.
size
(),
ch
);
PROCESS_CONV2D_DATA
()
if
(
quantize_weight_bits
==
8
)
{
}
else
{
const
int8_t
*
int_data
=
tmp_tensor
.
data
<
int8_t
>
();
const
int16_t
*
int_data
=
tmp_tensor
.
data
<
int16_t
>
();
PROCESS_CONV2D_DATA
()
PROCESS_CONV2D_DATA
()
}
else
{
}
const
int16_t
*
int_data
=
tmp_tensor
.
data
<
int16_t
>
();
}
else
if
(
op_type
==
"fc"
||
op_type
==
"mul"
)
{
PROCESS_CONV2D_DATA
()
if
(
quantize_weight_bits
==
8
)
{
}
const
int8_t
*
int_data
=
tmp_tensor
.
data
<
int8_t
>
();
}
else
if
(
op_type
==
"fc"
||
op_type
==
"mul"
)
{
PROCESS_FC_DATA
()
int64_t
chin
=
input_tensor
->
dims
()[
0
];
}
else
{
int64_t
chout
=
input_tensor
->
dims
()[
1
];
const
int16_t
*
int_data
=
tmp_tensor
.
data
<
int16_t
>
();
CHECK_EQ
(
scale_list
.
size
(),
chout
);
PROCESS_FC_DATA
()
if
(
quantize_weight_bits
==
8
)
{
const
int8_t
*
int_data
=
tmp_tensor
.
data
<
int8_t
>
();
PROCESS_FC_DATA
()
}
else
{
const
int16_t
*
int_data
=
tmp_tensor
.
data
<
int16_t
>
();
PROCESS_FC_DATA
()
}
}
}
}
}
}
}
...
...
lite/core/mir/fusion/conv_bn_fuser.cc
浏览文件 @
23231af8
...
@@ -116,8 +116,7 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
...
@@ -116,8 +116,7 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
}
}
size_t
weight_num
=
conv_weight_t
->
data_size
();
size_t
weight_num
=
conv_weight_t
->
data_size
();
bool
enable_int8
=
conv_op_desc
->
HasAttr
(
"enable_int8"
)
?
true
:
false
;
bool
enable_int8
=
conv_op_desc
->
HasAttr
(
"enable_int8"
)
?
true
:
false
;
bool
is_weight_quantization
=
bool
is_weight_quantization
=
conv_op_desc
->
HasAttr
(
"quantize_weight_bits"
);
conv_op_desc
->
HasAttr
(
"quantize_weight_bits"
)
?
true
:
false
;
// comupte BN alpha and beta
// comupte BN alpha and beta
Tensor
alpha_tensor
,
beta_tensor
;
Tensor
alpha_tensor
,
beta_tensor
;
...
...
lite/core/mir/weight_quantization_preprocess_pass.cc
浏览文件 @
23231af8
...
@@ -22,9 +22,29 @@ namespace paddle {
...
@@ -22,9 +22,29 @@ namespace paddle {
namespace
lite
{
namespace
lite
{
namespace
mir
{
namespace
mir
{
bool
IsAbsMaxQuantizedOp
(
const
OpInfo
&
op_info
)
{
bool
result
=
false
;
if
(
op_info
.
HasAttr
(
"quantization_type"
)
&&
op_info
.
GetAttr
<
std
::
string
>
(
"quantization_type"
)
==
"post_weight_abs_max"
)
{
result
=
true
;
}
else
if
(
!
op_info
.
HasAttr
(
"quantization_type"
)
&&
op_info
.
HasAttr
(
"quantize_weight_bits"
))
{
// Support older model,
// save this for now
result
=
true
;
}
return
result
;
}
/*
* For abs_max method in WeightQuantization, this pass obtains the scale value
* of conv2d, depthwise_conv2d and mul, expands the scale list, and save the
* list in the quantized ops.
*/
void
WeightQuantizationPreprocessPass
::
Apply
(
void
WeightQuantizationPreprocessPass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
std
::
vector
<
std
::
string
>
weight_quantized_op
=
{
"conv2d"
,
"depthwise_conv2d"
};
std
::
vector
<
std
::
string
>
weight_quantized_op
=
{
"conv2d"
,
"depthwise_conv2d"
,
"mul"
};
for
(
auto
&
node
:
graph
->
StmtTopologicalOrder
())
{
for
(
auto
&
node
:
graph
->
StmtTopologicalOrder
())
{
if
(
node
->
IsStmt
()
&&
if
(
node
->
IsStmt
()
&&
std
::
find
(
weight_quantized_op
.
begin
(),
std
::
find
(
weight_quantized_op
.
begin
(),
...
@@ -32,14 +52,20 @@ void WeightQuantizationPreprocessPass::Apply(
...
@@ -32,14 +52,20 @@ void WeightQuantizationPreprocessPass::Apply(
node
->
AsStmt
().
op_type
())
!=
weight_quantized_op
.
end
())
{
node
->
AsStmt
().
op_type
())
!=
weight_quantized_op
.
end
())
{
auto
*
scope
=
node
->
stmt
()
->
op
()
->
scope
();
auto
*
scope
=
node
->
stmt
()
->
op
()
->
scope
();
auto
*
op_desc
=
node
->
stmt
()
->
mutable_op_info
();
auto
*
op_desc
=
node
->
stmt
()
->
mutable_op_info
();
if
(
op_desc
->
HasAttr
(
"quantize_weight_bits"
))
{
if
(
IsAbsMaxQuantizedOp
(
*
op_desc
))
{
for
(
auto
&
input_name
:
op_desc
->
input_vars
())
{
for
(
auto
&
input_name
:
op_desc
->
input_vars
())
{
std
::
string
scale_name
=
input_name
+
"_quant_scale"
;
std
::
string
scale_name
=
input_name
+
"_quant_scale"
;
if
(
op_desc
->
HasAttr
(
scale_name
))
{
if
(
op_desc
->
HasAttr
(
scale_name
))
{
VLOG
(
5
)
<<
"op:"
<<
op_desc
->
Type
()
<<
" input_name:"
<<
input_name
;
VLOG
(
0
)
<<
" WeightQuantizationPreprocessPass op:"
<<
op_desc
->
Type
()
<<
" input_name:"
<<
input_name
;
auto
input_tensor
=
auto
input_tensor
=
scope
->
FindVar
(
input_name
)
->
GetMutable
<
lite
::
Tensor
>
();
scope
->
FindVar
(
input_name
)
->
GetMutable
<
lite
::
Tensor
>
();
int
weight_out_channel
=
static_cast
<
int
>
(
input_tensor
->
dims
()[
0
]);
int
weight_out_channel
;
if
(
op_desc
->
Type
()
==
"mul"
)
{
weight_out_channel
=
static_cast
<
int
>
(
input_tensor
->
dims
()[
1
]);
}
else
{
weight_out_channel
=
static_cast
<
int
>
(
input_tensor
->
dims
()[
0
]);
}
auto
input_scale
=
op_desc
->
GetAttr
<
std
::
vector
<
float
>>
(
scale_name
);
auto
input_scale
=
op_desc
->
GetAttr
<
std
::
vector
<
float
>>
(
scale_name
);
// scale length is equal to weight out channel
// scale length is equal to weight out channel
std
::
vector
<
float
>
scale_list
(
weight_out_channel
,
input_scale
[
0
]);
std
::
vector
<
float
>
scale_list
(
weight_out_channel
,
input_scale
[
0
]);
...
...
lite/core/mir/weight_quantization_preprocess_pass.h
浏览文件 @
23231af8
...
@@ -25,8 +25,9 @@ namespace mir {
...
@@ -25,8 +25,9 @@ namespace mir {
* If the model is quantized by WeightQuantization in PostTrainingQuantization,
* If the model is quantized by WeightQuantization in PostTrainingQuantization,
* the data type of the weight in quantized ops (conv2d, depthwise_conv2d) is
* the data type of the weight in quantized ops (conv2d, depthwise_conv2d) is
* int, and the scale is save in the quantized ops.
* int, and the scale is save in the quantized ops.
* WeightQuantizationPreprocessPass obtains the scale value, expands the
* For abs_max method in WeightQuantization, WeightQuantizationPreprocessPass
* scale value to a list, and save the list in the quantized ops.
* obtains the scale value of conv2d, depthwise_conv2d and mul, expands the
* scale list, and save the list in the quantized ops.
*/
*/
class
WeightQuantizationPreprocessPass
:
public
ProgramPass
{
class
WeightQuantizationPreprocessPass
:
public
ProgramPass
{
public:
public:
...
...
lite/tools/benchmark.sh
浏览文件 @
23231af8
...
@@ -2,12 +2,12 @@
...
@@ -2,12 +2,12 @@
set
-e
set
-e
# Check input
# Check input
if
[
$#
-lt
2
]
;
if
[
$#
-lt
3
]
;
then
then
echo
"Input error"
echo
"Input error"
echo
"Usage:"
echo
"Usage:"
echo
" sh benchmark.sh
benchmark_bin_path benchmark_models_path <result_filename> <input_shape> <power_mode: [0|1|2|3]> <is_run_model_optimize: [true|false]> <is_run_quantized_model: [trur|false]
>"
echo
" sh benchmark.sh
<benchmark_bin_path> <benchmark_models_path> <result_filename
>"
echo
"
\n
power_mode refer: 0 for big cluster, 1 for little cluster, 2 for all cores, 3 for no bind.
"
echo
"
sh benchmark.sh <benchmark_bin_path> <benchmark_models_path> <result_filename> <is_run_model_optimize: [true|false]>
"
exit
exit
fi
fi
...
@@ -15,10 +15,8 @@ fi
...
@@ -15,10 +15,8 @@ fi
ANDROID_DIR
=
/data/local/tmp
ANDROID_DIR
=
/data/local/tmp
BENCHMARK_BIN
=
$1
BENCHMARK_BIN
=
$1
MODELS_DIR
=
$2
MODELS_DIR
=
$2
RESULT_FILENAME
=
$3
RESULT_FILENAME
=
result.txt
INPUT_SHAPE
=
1,3,244,244
POWER_MODE
=
3
WARMUP
=
10
WARMUP
=
10
REPEATS
=
30
REPEATS
=
30
IS_RUN_MODEL_OPTIMIZE
=
false
IS_RUN_MODEL_OPTIMIZE
=
false
...
@@ -27,25 +25,9 @@ NUM_THREADS_LIST=(1 2 4)
...
@@ -27,25 +25,9 @@ NUM_THREADS_LIST=(1 2 4)
MODELS_LIST
=
$(
ls
$MODELS_DIR
)
MODELS_LIST
=
$(
ls
$MODELS_DIR
)
# Check input
# Check input
if
[
$#
-gt
2
]
;
then
RESULT_FILENAME
=
$3
fi
if
[
$#
-gt
3
]
;
if
[
$#
-gt
3
]
;
then
then
INPUT_SHAPE
=
$4
IS_RUN_MODEL_OPTIMIZE
=
$4
fi
if
[
$#
-gt
4
]
;
then
POWER_MODE
=
$5
fi
if
[
$#
-gt
5
]
;
then
IS_RUN_MODEL_OPTIMIZE
=
$6
fi
if
[
$#
-gt
6
]
;
then
IS_RUN_QUANTIZED_MODEL
=
$7
fi
fi
# Adb push benchmark_bin, models
# Adb push benchmark_bin, models
...
@@ -54,26 +36,31 @@ adb shell chmod +x $ANDROID_DIR/benchmark_bin
...
@@ -54,26 +36,31 @@ adb shell chmod +x $ANDROID_DIR/benchmark_bin
adb push
$MODELS_DIR
$ANDROID_DIR
adb push
$MODELS_DIR
$ANDROID_DIR
# Run benchmark
# Run benchmark
adb shell
"echo 'PaddleLite Benchmark
(in ms)
\n
' >
$ANDROID_DIR
/
$RESULT_FILENAME
"
adb shell
"echo 'PaddleLite Benchmark' >
$ANDROID_DIR
/
$RESULT_FILENAME
"
for
threads
in
${
NUM_THREADS_LIST
[@]
}
;
do
for
threads
in
${
NUM_THREADS_LIST
[@]
}
;
do
adb shell
"echo
threads=
$threads
warmup=
$WARMUP
repeats=
$REPEATS
input_shape=
$INPUT_SHAPE
power_mode=
$POWER_MODE
>>
$ANDROID_DIR
/
$RESULT_FILENAME
"
adb shell
"echo
Threads=
$threads
Warmup=
$WARMUP
Repeats=
$REPEATS
>>
$ANDROID_DIR
/
$RESULT_FILENAME
"
for
model_name
in
${
MODELS_LIST
[@]
}
;
do
for
model_name
in
${
MODELS_LIST
[@]
}
;
do
echo
"Model=
$model_name
Threads=
$threads
"
echo
"Model=
$model_name
Threads=
$threads
"
adb shell
"
$ANDROID_DIR
/benchmark_bin
\
if
[
"
$IS_RUN_MODEL_OPTIMIZE
"
=
true
]
;
then
adb shell
"
$ANDROID_DIR
/benchmark_bin
\
--model_dir=
$ANDROID_DIR
/
${
MODELS_DIR
}
/
$model_name
\
--model_dir=
$ANDROID_DIR
/
${
MODELS_DIR
}
/
$model_name
\
--input_shape=
$INPUT_SHAPE
\
--warmup=
$WARMUP
\
--warmup=
$WARMUP
\
--repeats=
$REPEATS
\
--repeats=
$REPEATS
\
--threads=
$threads
\
--threads=
$threads
\
--power_mode=
$POWER_MODE
\
--result_filename=
$ANDROID_DIR
/
$RESULT_FILENAME
"
--result_filename=
$ANDROID_DIR
/
$RESULT_FILENAME
\
else
--run_model_optimize=
$IS_RUN_MODEL_OPTIMIZE
\
adb shell
"
$ANDROID_DIR
/benchmark_bin
\
--is_quantized_model=
$IS_RUN_QUANTIZED_MODEL
"
--optimized_model_path=
$ANDROID_DIR
/
${
MODELS_DIR
}
/
$model_name
\
--warmup=
$WARMUP
\
--repeats=
$REPEATS
\
--threads=
$threads
\
--result_filename=
$ANDROID_DIR
/
$RESULT_FILENAME
"
fi
done
done
adb shell
"echo >>
$ANDROID_DIR
/
$RESULT_FILENAME
"
adb shell
"echo >>
$ANDROID_DIR
/
$RESULT_FILENAME
"
done
done
adb shell
"echo >>
$ANDROID_DIR
/
$RESULT_FILENAME
"
adb shell
"echo power_mode refer: 0 for big cluster, 1 for little cluster, 2 for all cores, 3 for no bind >>
$ANDROID_DIR
/
$RESULT_FILENAME
"
# Adb pull benchmark result, show result
# Adb pull benchmark result, show result
adb pull
$ANDROID_DIR
/
$RESULT_FILENAME
.
adb pull
$ANDROID_DIR
/
$RESULT_FILENAME
.
echo
"
\n
--------------------------------------"
echo
"
\n
--------------------------------------"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录