Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2bd0f3c7
P
Paddle
项目概览
PaddlePaddle
/
Paddle
11 个月 前同步成功
通知
2292
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
2bd0f3c7
编写于
12月 07, 2021
作者:
Z
Zuza
提交者:
GitHub
12月 07, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Quantize slice op (#37630)
* quantize slice op * correct test * fix code formatting
上级
c9a3c669
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
450 addition
and
138 deletion
+450
-138
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+21
-1
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+14
-0
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
+52
-0
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
+1
-0
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+111
-0
paddle/fluid/inference/api/mkldnn_quantizer.cc
paddle/fluid/inference/api/mkldnn_quantizer.cc
+10
-0
paddle/fluid/inference/api/mkldnn_quantizer_config.cc
paddle/fluid/inference/api/mkldnn_quantizer_config.cc
+3
-0
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+16
-3
paddle/fluid/inference/tests/api/analyzer_ernie_int8_tester.cc
...e/fluid/inference/tests/api/analyzer_ernie_int8_tester.cc
+54
-0
paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
+5
-130
paddle/fluid/inference/tests/api/analyzer_ernie_tester.h
paddle/fluid/inference/tests/api/analyzer_ernie_tester.h
+152
-0
paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
+2
-0
paddle/fluid/operators/slice_op.cc
paddle/fluid/operators/slice_op.cc
+1
-1
python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
...luid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
+7
-2
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+1
-1
未找到文件。
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
2bd0f3c7
...
...
@@ -1619,6 +1619,26 @@ PDNode *patterns::Reshape::operator()() {
return
reshape_out
;
}
PDNode
*
patterns
::
Slice
::
operator
()()
{
auto
prev_op
=
pattern
->
NewNode
(
prev_op_repr
())
->
assert_is_op
();
auto
slice_op
=
pattern
->
NewNode
(
slice_op_repr
())
->
assert_is_op
(
"slice"
);
auto
slice_in
=
pattern
->
NewNode
(
slice_in_repr
())
->
AsInput
()
->
assert_is_op_input
(
"slice"
,
"Input"
);
auto
slice_out
=
pattern
->
NewNode
(
slice_out_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"slice"
,
"Out"
);
auto
next_op
=
pattern
->
NewNode
(
next_op_repr
())
->
assert_is_op
();
prev_op
->
LinksTo
({
slice_in
});
slice_op
->
LinksFrom
({
slice_in
}).
LinksTo
({
slice_out
});
next_op
->
LinksFrom
({
slice_out
});
return
slice_out
;
}
PDNode
*
patterns
::
Matmul
::
operator
()()
{
auto
matmul_op
=
pattern
->
NewNode
(
matmul_op_repr
())
->
assert_is_op
(
"matmul"
);
...
...
@@ -2315,7 +2335,7 @@ PDNode *patterns::QuantizePlacement::operator()(
std
::
unordered_set
<
std
::
string
>
({
"concat"
,
"conv2d"
,
"elementwise_add"
,
"fc"
,
"matmul"
,
"pool2d"
,
"prior_box"
,
"reshape2"
,
"transpose2"
,
"fusion_gru"
,
"fusion_lstm"
,
"multi_gru"
});
"fusion_lstm"
,
"multi_gru"
,
"slice"
});
if
(
!
quantize_enabled_op_types
.
empty
())
{
supported_op_types
=
quantize_enabled_op_types
;
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
2bd0f3c7
...
...
@@ -980,6 +980,20 @@ struct Reshape : public PatternBase {
PATTERN_DECL_NODE
(
reshape_out
);
PATTERN_DECL_NODE
(
next_op
);
};
// Slice op
// Forward pass for slice.
// slice_out is a result of the operator.
struct
Slice
:
public
PatternBase
{
Slice
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"slice"
)
{}
PDNode
*
operator
()();
PATTERN_DECL_NODE
(
prev_op
);
PATTERN_DECL_NODE
(
slice_in
);
PATTERN_DECL_NODE
(
slice_op
);
PATTERN_DECL_NODE
(
slice_out
);
PATTERN_DECL_NODE
(
next_op
);
};
// Matmul op
// Forward pass for matmul.
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
浏览文件 @
2bd0f3c7
...
...
@@ -676,6 +676,57 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
PrettyLogDetail
(
"--- quantized %d reshape ops"
,
quantize_reshape_count
);
}
void
CPUQuantizePass
::
QuantizeSlice
(
Graph
*
graph
)
const
{
GraphPatternDetector
gpd
;
auto
pattern
=
gpd
.
mutable_pattern
();
patterns
::
Slice
slice_pattern
{
pattern
,
name_scope_
};
slice_pattern
();
int
quantize_slice_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"Quantize slice op"
;
GET_IR_NODE_FROM_SUBGRAPH
(
slice_op
,
slice_op
,
slice_pattern
);
// skip if should not be quantized
if
(
!
platform
::
HasOpINT8DataType
(
slice_op
->
Op
()))
{
LogQuantizationDisabled
(
slice_op
);
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
prev_op
,
prev_op
,
slice_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
next_op
,
next_op
,
slice_pattern
);
// skip if prev op and next op is not quantized
if
(
!
IsOpDequantized
(
prev_op
)
&&
!
IsOpQuantized
(
next_op
))
{
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
slice_in
,
slice_in
,
slice_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
slice_out
,
slice_out
,
slice_pattern
);
if
(
!
AreScalesPresentForNodes
({
slice_out
}))
{
LogCannotQuantizeOp
(
slice_op
);
return
;
}
bool
is_input_unsigned
{
false
};
auto
input_scale
=
GetScaleValueForNode
(
slice_out
,
&
is_input_unsigned
);
QuantizeInput
(
g
,
slice_op
,
slice_in
,
"Input"
,
input_scale
,
is_input_unsigned
);
bool
is_output_unsigned
{
false
};
auto
output_scale
=
GetScaleValueForNode
(
slice_out
,
&
is_output_unsigned
);
DequantizeOutput
(
g
,
slice_op
,
slice_out
,
"Out"
,
output_scale
,
is_output_unsigned
);
++
quantize_slice_count
;
};
gpd
(
graph
,
handler
);
AddStatis
(
quantize_slice_count
);
PrettyLogDetail
(
"--- quantized %d slice ops"
,
quantize_slice_count
);
}
void
CPUQuantizePass
::
QuantizeMatmul
(
Graph
*
graph
)
const
{
GraphPatternDetector
gpd
;
auto
pattern
=
gpd
.
mutable_pattern
();
...
...
@@ -1024,6 +1075,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizeFusionGru
(
graph
);
QuantizeMultiGru
(
graph
);
QuantizeFusionLSTM
(
graph
);
QuantizeSlice
(
graph
);
}
}
// namespace ir
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
浏览文件 @
2bd0f3c7
...
...
@@ -61,6 +61,7 @@ class CPUQuantizePass : public FusePassBase {
void
QuantizeFusionGru
(
Graph
*
graph
)
const
;
void
QuantizeMultiGru
(
Graph
*
graph
)
const
;
void
QuantizeFusionLSTM
(
Graph
*
graph
)
const
;
void
QuantizeSlice
(
Graph
*
graph
)
const
;
void
QuantizeInput
(
Graph
*
g
,
Node
*
op
,
Node
*
input
,
std
::
string
input_name
,
double
scale_to_one
,
bool
is_input_unsigned
,
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
浏览文件 @
2bd0f3c7
...
...
@@ -55,6 +55,10 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op
->
SetInput
(
"X"
,
{
inputs
[
0
]});
op
->
SetOutput
(
"Out"
,
{
outputs
[
0
]});
op
->
SetAttr
(
"mkldnn_data_type"
,
mkldnn_data_type
);
}
else
if
(
type
==
"slice"
)
{
op
->
SetInput
(
"Input"
,
{
inputs
[
0
]});
op
->
SetOutput
(
"Out"
,
{
outputs
[
0
]});
op
->
SetAttr
(
"mkldnn_data_type"
,
mkldnn_data_type
);
}
else
if
(
type
==
"dropout"
)
{
op
->
SetInput
(
"X"
,
{
inputs
[
0
]});
op
->
SetOutput
(
"Out"
,
{
outputs
[
0
]});
...
...
@@ -784,6 +788,113 @@ TEST(CpuQuantizePass, reshapeBetweenNonQuantizedOp) {
added_nodes_count
,
2.0
f
*
127
);
}
static
const
std
::
initializer_list
<
std
::
string
>
variable_names_slice
=
{
"a"
,
"b"
,
"c"
,
"d"
};
// a->Dequantize->b
// b->Slice->c
// c->Dropout->d
ProgramDesc
BuildProgramDescSlice
()
{
ProgramDesc
prog
;
for
(
auto
&
v
:
variable_names_slice
)
{
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
}
SetOp
(
&
prog
,
"dequantize"
,
"Dequantize1"
,
{
"a"
},
{
"b"
},
true
);
SetOp
(
&
prog
,
"slice"
,
"Slice"
,
{
"b"
},
{
"c"
},
true
,
"int8"
);
SetOp
(
&
prog
,
"dropout"
,
"Dropout"
,
{
"c"
},
{
"d"
},
true
,
"float32"
);
return
prog
;
}
// a->Transpose->b
// b->slice->c
// c->Dropout->d
ProgramDesc
BuildProgramDescSliceBetweenNonQuantizedOp
()
{
ProgramDesc
prog
;
for
(
auto
&
v
:
variable_names_slice
)
{
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
}
SetOp
(
&
prog
,
"transpose2"
,
"Transpose2"
,
{
"a"
},
{
"b"
},
true
,
"float32"
);
SetOp
(
&
prog
,
"slice"
,
"Slice"
,
{
"b"
},
{
"c"
},
true
,
"int8"
);
SetOp
(
&
prog
,
"dropout"
,
"Dropout"
,
{
"c"
},
{
"d"
},
true
,
"float32"
);
return
prog
;
}
void
MainTestSlice
(
const
ProgramDesc
&
prog
,
int
transpose_count
,
int
slice_count
,
int
quant_count
,
int
dequant_count
,
int
added_nodes_count
,
float
scale
)
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
int
original_nodes_num
,
current_nodes_num
;
PreparePass
(
&
graph
,
prog
,
variable_names_slice
,
&
original_nodes_num
,
&
current_nodes_num
);
float
quant_scale
=
1.0
f
;
float
dequant_scale
=
1.0
f
;
int
quantize_nodes_count
=
0
;
int
dequantize_nodes_count
=
0
;
int
transpose_nodes_count
=
0
;
int
slice_nodes_count
=
0
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
())
{
auto
*
op
=
node
->
Op
();
if
(
op
->
Type
()
==
"transpose2"
)
{
transpose_nodes_count
++
;
}
else
if
(
op
->
Type
()
==
"slice"
)
{
slice_nodes_count
++
;
}
else
if
(
op
->
Type
()
==
"quantize"
)
{
quantize_nodes_count
++
;
quant_scale
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"Scale"
));
EXPECT_EQ
(
quant_scale
,
scale
)
<<
"Scale for node '"
+
op
->
Type
()
+
"'."
;
}
else
if
(
op
->
Type
()
==
"dequantize"
)
{
dequantize_nodes_count
++
;
auto
op_name
=
op
->
GetAttrIfExists
<
std
::
string
>
(
"name"
);
VLOG
(
3
)
<<
op_name
<<
"
\n
"
;
if
(
op_name
!=
"Dequantize1"
)
{
dequant_scale
=
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"Scale"
));
EXPECT_EQ
(
dequant_scale
,
scale
)
<<
"Scale for node '"
+
op
->
Type
()
+
"'."
;
}
}
}
}
EXPECT_EQ
(
transpose_nodes_count
,
transpose_count
);
EXPECT_EQ
(
slice_nodes_count
,
slice_count
);
EXPECT_EQ
(
quantize_nodes_count
,
quant_count
);
EXPECT_EQ
(
dequantize_nodes_count
,
dequant_count
);
EXPECT_EQ
(
original_nodes_num
+
added_nodes_count
,
current_nodes_num
);
}
TEST
(
CpuQuantizePass
,
slice
)
{
// a->Dequantize->b
// b2->Quant->b3->slice->c1->Dequant->c2
// c2->Dropout->d
int
slice_count
=
1
;
int
transpose_count
=
0
;
int
quant_count
=
1
;
int
dequant_count
=
2
;
// 1 Quant + 1 IN + 1 DeQuant + 1 OUT
int
added_nodes_count
=
4
;
MainTestSlice
(
BuildProgramDescSlice
(),
transpose_count
,
slice_count
,
quant_count
,
dequant_count
,
added_nodes_count
,
2.0
f
*
127
);
}
TEST
(
CpuQuantizePass
,
sliceBetweenNonQuantizedOp
)
{
// a->Transpos2->b
// b->slice->c
// c->Dropout->d
int
slice_count
=
1
;
int
transpose_count
=
1
;
int
quant_count
=
0
;
int
dequant_count
=
0
;
// 0 Quant + 0 IN + 0 DeQuant + 0 OUT
int
added_nodes_count
=
0
;
MainTestSlice
(
BuildProgramDescSliceBetweenNonQuantizedOp
(),
transpose_count
,
slice_count
,
quant_count
,
dequant_count
,
added_nodes_count
,
2.0
f
*
127
);
}
static
const
std
::
initializer_list
<
std
::
string
>
variable_names_matmul
=
{
"a"
,
"b"
,
"c"
,
"d"
,
"e"
,
"f"
};
...
...
paddle/fluid/inference/api/mkldnn_quantizer.cc
浏览文件 @
2bd0f3c7
...
...
@@ -134,6 +134,16 @@ void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForOpOutputs(
scales_
[
var_name
]
=
scales_
[
input_var_name
];
}
compute_scale
=
false
;
}
else
if
(
op
->
Type
()
==
"slice"
)
{
auto
input_var_name
=
op
->
Input
(
"Input"
)[
0
];
PADDLE_ENFORCE_NE
(
scales_
.
find
(
input_var_name
),
scales_
.
end
(),
platform
::
errors
::
PreconditionNotMet
(
"Input scales must be calculated before the "
"output scales to infer if output is unsigned."
));
if
(
scales_
.
find
(
input_var_name
)
!=
scales_
.
end
())
{
scales_
[
var_name
]
=
scales_
[
input_var_name
];
}
compute_scale
=
false
;
}
else
if
(
op
->
Type
()
==
"concat"
)
{
// output of ops with unsigned input must be unsigned
is_unsigned
=
true
;
...
...
paddle/fluid/inference/api/mkldnn_quantizer_config.cc
浏览文件 @
2bd0f3c7
...
...
@@ -42,6 +42,9 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
rules_
[
"transpose2"
][
"X"
]
=
ScaleAlgo
::
KL
;
rules_
[
"transpose2"
][
"Out"
]
=
ScaleAlgo
::
NONE
;
rules_
[
"slice"
][
"Input"
]
=
ScaleAlgo
::
KL
;
rules_
[
"slice"
][
"Out"
]
=
ScaleAlgo
::
NONE
;
rules_
[
"fc"
][
"Input"
]
=
ScaleAlgo
::
KL
;
rules_
[
"fc"
][
"W"
]
=
ScaleAlgo
::
MAX_CH_T
;
rules_
[
"fc"
][
"Bias"
]
=
ScaleAlgo
::
NONE
;
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
2bd0f3c7
...
...
@@ -94,6 +94,17 @@ function(inference_analysis_api_test target install_dir filename)
ARGS --infer_model=
${
install_dir
}
/model --infer_data=
${
install_dir
}
/data.txt --refer_result=
${
install_dir
}
/result.txt
)
endfunction
()
function
(
inference_analysis_api_int8_test target install_dir filename
)
inference_analysis_test
(
${
target
}
SRCS
${
filename
}
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS --infer_model=
${
install_dir
}
/model
--infer_data=
${
install_dir
}
/data.txt
--refer_result=
${
install_dir
}
/result.txt
--accuracy=0.8
--batch_size=5
--enable_int8=true
)
endfunction
()
function
(
inference_multiple_models_analysis_api_test target install_dir filename
)
inference_analysis_test
(
${
target
}
SRCS
${
filename
}
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
...
...
@@ -284,13 +295,14 @@ set(PYRAMID_DNN_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/pyramid_dnn")
download_model_and_data_without_verify
(
${
PYRAMID_DNN_INSTALL_DIR
}
"PyramidDNN_model.tar.gz"
"PyramidDNN_data.txt.tar.gz"
)
inference_analysis_api_test
(
test_analyzer_pyramid_dnn
${
PYRAMID_DNN_INSTALL_DIR
}
analyzer_pyramid_dnn_tester.cc
)
#Ernie
#
Ernie
set
(
ERNIE_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/Ernie"
)
download_model_and_data
(
${
ERNIE_INSTALL_DIR
}
"Ernie_model.tar.gz"
aa59192dd41ed377f9f168e3a1309fa6
"Ernie_data.txt.tar.gz"
5396e63548edad7ca561e7e26a9476d1
)
download_result
(
${
ERNIE_INSTALL_DIR
}
"Ernie_result.txt.tar.gz"
73beea65abda2edb61c1662cd3180c62
)
inference_analysis_api_test
(
test_analyzer_ernie
${
ERNIE_INSTALL_DIR
}
analyzer_ernie_tester.cc
)
inference_analysis_api_int8_test
(
test_analyzer_ernie_int8
${
ERNIE_INSTALL_DIR
}
analyzer_ernie_int8_tester.cc
)
#Ernie large
#
Ernie large
set
(
ERNIE_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/Ernie_Large"
)
download_model_and_data
(
${
ERNIE_INSTALL_DIR
}
"Ernie_large_model.tar.gz"
af7715245ed32cc77374625d4c80f7ef
"Ernie_large_data.txt.tar.gz"
edb2113eec93783cad56ed76d47ba57f
)
download_result
(
${
ERNIE_INSTALL_DIR
}
"Ernie_large_result.txt.tar.gz"
1facda98eef1085dc9d435ebf3f23a73
)
...
...
@@ -426,7 +438,7 @@ if(WITH_MKLDNN)
# TODO(grygielski) Enable after MKL-DNN 1.0 merge
set
(
INT8_VGG16_MODEL_DIR
"
${
INT8_DATA_DIR
}
/vgg16"
)
download_int8_data_without_verify
(
${
INT8_VGG16_MODEL_DIR
}
"VGG16_int8_model.tar.gz"
)
#
inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH})
# inference_analysis_api_int8_test_run(test_analyzer_int8_vgg16 ${INT8_IMG_CLASS_TEST_APP} ${INT8_VGG16_MODEL_DIR} ${IMAGENET_DATA_PATH})
# vgg19 int8
# TODO(grygielski) Enable after MKL-DNN 1.0 merge
...
...
@@ -730,6 +742,7 @@ set_tests_properties(test_analyzer_mobilenet_transpose PROPERTIES TIMEOUT 120)
set_tests_properties
(
test_analyzer_resnet50 PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_ner PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_ernie PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_ernie_int8 PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_googlenet PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_small_dam PROPERTIES TIMEOUT 120
)
set_tests_properties
(
test_analyzer_transformer PROPERTIES TIMEOUT 120
)
...
...
paddle/fluid/inference/tests/api/analyzer_ernie_int8_tester.cc
0 → 100644
浏览文件 @
2bd0f3c7
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/tests/api/analyzer_ernie_tester.h"
namespace
paddle
{
namespace
inference
{
using
paddle
::
PaddleTensor
;
#ifdef PADDLE_WITH_MKLDNN
void
SetInt8Config
(
AnalysisConfig
*
cfg
,
std
::
vector
<
paddle
::
PaddleTensor
>
data
)
{
cfg
->
SetModel
(
FLAGS_infer_model
);
cfg
->
EnableMKLDNN
();
cfg
->
EnableMkldnnQuantizer
();
auto
warmup_data
=
std
::
make_shared
<
std
::
vector
<
PaddleTensor
>>
(
data
);
cfg
->
mkldnn_quantizer_config
()
->
SetWarmupData
(
warmup_data
);
cfg
->
mkldnn_quantizer_config
()
->
SetWarmupBatchSize
(
FLAGS_batch_size
);
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
();
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_cpu_num_threads
);
}
// Compare result of NativeConfig and AnalysisConfig
void
compare_int8
(
bool
use_mkldnn
=
false
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
;
LoadInputData
(
&
inputs
);
AnalysisConfig
cfg
;
SetInt8Config
(
&
cfg
,
inputs
[
0
]);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
inputs
);
}
TEST
(
Analyzer_ernie
,
compare_int8_mkldnn
)
{
compare_int8
(
true
/* use_mkldnn */
);
}
#endif
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/tests/api/analyzer_ernie_tester.cc
浏览文件 @
2bd0f3c7
...
...
@@ -12,142 +12,16 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/tests/api/
tester_help
er.h"
#include "paddle/fluid/inference/tests/api/
analyzer_ernie_test
er.h"
namespace
paddle
{
namespace
inference
{
using
paddle
::
PaddleTensor
;
template
<
typename
T
>
void
GetValueFromStream
(
std
::
stringstream
*
ss
,
T
*
t
)
{
(
*
ss
)
>>
(
*
t
);
}
template
<
>
void
GetValueFromStream
<
std
::
string
>
(
std
::
stringstream
*
ss
,
std
::
string
*
t
)
{
*
t
=
ss
->
str
();
}
// Split string to vector
template
<
typename
T
>
void
Split
(
const
std
::
string
&
line
,
char
sep
,
std
::
vector
<
T
>
*
v
)
{
std
::
stringstream
ss
;
T
t
;
for
(
auto
c
:
line
)
{
if
(
c
!=
sep
)
{
ss
<<
c
;
}
else
{
GetValueFromStream
<
T
>
(
&
ss
,
&
t
);
v
->
push_back
(
std
::
move
(
t
));
ss
.
str
({});
ss
.
clear
();
}
}
if
(
!
ss
.
str
().
empty
())
{
GetValueFromStream
<
T
>
(
&
ss
,
&
t
);
v
->
push_back
(
std
::
move
(
t
));
ss
.
str
({});
ss
.
clear
();
}
}
// Parse tensor from string
template
<
typename
T
>
bool
ParseTensor
(
const
std
::
string
&
field
,
paddle
::
PaddleTensor
*
tensor
)
{
std
::
vector
<
std
::
string
>
data
;
Split
(
field
,
':'
,
&
data
);
if
(
data
.
size
()
<
2
)
return
false
;
std
::
string
shape_str
=
data
[
0
];
std
::
vector
<
int
>
shape
;
Split
(
shape_str
,
' '
,
&
shape
);
std
::
string
mat_str
=
data
[
1
];
std
::
vector
<
T
>
mat
;
Split
(
mat_str
,
' '
,
&
mat
);
tensor
->
shape
=
shape
;
auto
size
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int
>
())
*
sizeof
(
T
);
tensor
->
data
.
Resize
(
size
);
std
::
copy
(
mat
.
begin
(),
mat
.
end
(),
static_cast
<
T
*>
(
tensor
->
data
.
data
()));
tensor
->
dtype
=
GetPaddleDType
<
T
>
();
return
true
;
}
// Parse input tensors from string
bool
ParseLine
(
const
std
::
string
&
line
,
std
::
vector
<
paddle
::
PaddleTensor
>
*
tensors
)
{
std
::
vector
<
std
::
string
>
fields
;
Split
(
line
,
';'
,
&
fields
);
tensors
->
clear
();
tensors
->
reserve
(
4
);
int
i
=
0
;
auto
input_name
=
FLAGS_ernie_large
?
"eval_placeholder_"
:
"placeholder_"
;
for
(;
i
<
3
;
i
++
)
{
paddle
::
PaddleTensor
temp
;
ParseTensor
<
int64_t
>
(
fields
[
i
],
&
temp
);
temp
.
name
=
input_name
+
std
::
to_string
(
i
);
tensors
->
push_back
(
temp
);
}
// input_mask
paddle
::
PaddleTensor
input_mask
;
ParseTensor
<
float
>
(
fields
[
i
],
&
input_mask
);
input_mask
.
name
=
input_name
+
std
::
to_string
(
i
);
tensors
->
push_back
(
input_mask
);
return
true
;
}
bool
LoadInputData
(
std
::
vector
<
std
::
vector
<
paddle
::
PaddleTensor
>>
*
inputs
)
{
if
(
FLAGS_infer_data
.
empty
())
{
LOG
(
ERROR
)
<<
"please set input data path"
;
return
false
;
}
std
::
ifstream
fin
(
FLAGS_infer_data
);
std
::
string
line
;
int
sample
=
0
;
// The unit-test dataset only have 10 samples, each sample have 5 feeds.
while
(
std
::
getline
(
fin
,
line
))
{
std
::
vector
<
paddle
::
PaddleTensor
>
feed_data
;
ParseLine
(
line
,
&
feed_data
);
inputs
->
push_back
(
std
::
move
(
feed_data
));
sample
++
;
if
(
!
FLAGS_test_all_data
&&
sample
==
FLAGS_batch_size
)
break
;
}
LOG
(
INFO
)
<<
"number of samples: "
<<
sample
;
return
true
;
}
void
SetConfig
(
AnalysisConfig
*
cfg
,
bool
use_mkldnn
=
false
,
bool
use_gpu
=
false
)
{
cfg
->
SetModel
(
FLAGS_infer_model
);
if
(
use_mkldnn
)
{
cfg
->
EnableMKLDNN
();
}
if
(
use_gpu
)
{
cfg
->
EnableUseGpu
(
100
,
0
);
}
else
{
cfg
->
DisableGpu
();
}
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
();
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_cpu_num_threads
);
}
void
profile
(
bool
use_mkldnn
=
false
,
bool
use_gpu
=
false
)
{
AnalysisConfig
config
;
SetConfig
(
&
config
,
use_mkldnn
,
use_gpu
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
outputs
;
...
...
@@ -189,11 +63,12 @@ TEST(Analyzer_Ernie, fuse_statis) {
// Compare result of NativeConfig and AnalysisConfig
void
compare
(
bool
use_mkldnn
=
false
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
;
LoadInputData
(
&
inputs
);
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
,
use_mkldnn
,
false
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
;
LoadInputData
(
&
inputs
);
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
inputs
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_ernie_tester.h
0 → 100644
浏览文件 @
2bd0f3c7
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/inference/tests/api/tester_helper.h"
namespace
paddle
{
namespace
inference
{
using
paddle
::
PaddleTensor
;
template
<
typename
T
>
void
GetValueFromStream
(
std
::
stringstream
*
ss
,
T
*
t
)
{
(
*
ss
)
>>
(
*
t
);
}
template
<
>
void
GetValueFromStream
<
std
::
string
>
(
std
::
stringstream
*
ss
,
std
::
string
*
t
)
{
*
t
=
ss
->
str
();
}
// Split string to vector
template
<
typename
T
>
void
Split
(
const
std
::
string
&
line
,
char
sep
,
std
::
vector
<
T
>
*
v
)
{
std
::
stringstream
ss
;
T
t
;
for
(
auto
c
:
line
)
{
if
(
c
!=
sep
)
{
ss
<<
c
;
}
else
{
GetValueFromStream
<
T
>
(
&
ss
,
&
t
);
v
->
push_back
(
std
::
move
(
t
));
ss
.
str
({});
ss
.
clear
();
}
}
if
(
!
ss
.
str
().
empty
())
{
GetValueFromStream
<
T
>
(
&
ss
,
&
t
);
v
->
push_back
(
std
::
move
(
t
));
ss
.
str
({});
ss
.
clear
();
}
}
// Parse tensor from string
template
<
typename
T
>
bool
ParseTensor
(
const
std
::
string
&
field
,
paddle
::
PaddleTensor
*
tensor
)
{
std
::
vector
<
std
::
string
>
data
;
Split
(
field
,
':'
,
&
data
);
if
(
data
.
size
()
<
2
)
return
false
;
std
::
string
shape_str
=
data
[
0
];
std
::
vector
<
int
>
shape
;
Split
(
shape_str
,
' '
,
&
shape
);
std
::
string
mat_str
=
data
[
1
];
std
::
vector
<
T
>
mat
;
Split
(
mat_str
,
' '
,
&
mat
);
tensor
->
shape
=
shape
;
auto
size
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
std
::
multiplies
<
int
>
())
*
sizeof
(
T
);
tensor
->
data
.
Resize
(
size
);
std
::
copy
(
mat
.
begin
(),
mat
.
end
(),
static_cast
<
T
*>
(
tensor
->
data
.
data
()));
tensor
->
dtype
=
GetPaddleDType
<
T
>
();
return
true
;
}
// Parse input tensors from string
bool
ParseLine
(
const
std
::
string
&
line
,
std
::
vector
<
paddle
::
PaddleTensor
>
*
tensors
)
{
std
::
vector
<
std
::
string
>
fields
;
Split
(
line
,
';'
,
&
fields
);
tensors
->
clear
();
tensors
->
reserve
(
4
);
int
i
=
0
;
auto
input_name
=
FLAGS_ernie_large
?
"eval_placeholder_"
:
"placeholder_"
;
for
(;
i
<
3
;
i
++
)
{
paddle
::
PaddleTensor
temp
;
ParseTensor
<
int64_t
>
(
fields
[
i
],
&
temp
);
temp
.
name
=
input_name
+
std
::
to_string
(
i
);
tensors
->
push_back
(
temp
);
}
// input_mask
paddle
::
PaddleTensor
input_mask
;
ParseTensor
<
float
>
(
fields
[
i
],
&
input_mask
);
input_mask
.
name
=
input_name
+
std
::
to_string
(
i
);
tensors
->
push_back
(
input_mask
);
return
true
;
}
bool
LoadInputData
(
std
::
vector
<
std
::
vector
<
paddle
::
PaddleTensor
>>
*
inputs
)
{
if
(
FLAGS_infer_data
.
empty
())
{
LOG
(
ERROR
)
<<
"please set input data path"
;
return
false
;
}
std
::
ifstream
fin
(
FLAGS_infer_data
);
std
::
string
line
;
int
sample
=
0
;
// The unit-test dataset only have 10 samples, each sample have 5 feeds.
while
(
std
::
getline
(
fin
,
line
))
{
std
::
vector
<
paddle
::
PaddleTensor
>
feed_data
;
ParseLine
(
line
,
&
feed_data
);
inputs
->
push_back
(
std
::
move
(
feed_data
));
sample
++
;
if
(
!
FLAGS_test_all_data
&&
sample
==
FLAGS_batch_size
)
break
;
}
LOG
(
INFO
)
<<
"number of samples: "
<<
sample
;
return
true
;
}
void
SetConfig
(
AnalysisConfig
*
cfg
,
bool
use_mkldnn
=
false
,
bool
use_gpu
=
false
)
{
cfg
->
SetModel
(
FLAGS_infer_model
);
if
(
use_mkldnn
)
{
cfg
->
EnableMKLDNN
();
}
if
(
use_gpu
)
{
cfg
->
EnableUseGpu
(
100
,
0
);
}
else
{
cfg
->
DisableGpu
();
}
cfg
->
SwitchSpecifyInputNames
();
cfg
->
SwitchIrOptim
();
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_cpu_num_threads
);
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/operators/mkldnn/slice_mkldnn_op.cc
浏览文件 @
2bd0f3c7
...
...
@@ -227,6 +227,8 @@ class SliceGradMKLDNNKernel : public framework::OpKernel<T> {
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
slice
,
MKLDNN
,
paddle
::
platform
::
CPUPlace
,
ops
::
SliceMKLDNNKernel
<
float
>
,
ops
::
SliceMKLDNNKernel
<
int8_t
>
,
ops
::
SliceMKLDNNKernel
<
uint8_t
>
,
ops
::
SliceMKLDNNKernel
<
paddle
::
platform
::
bfloat16
>
);
namespace
ops
=
paddle
::
operators
;
...
...
paddle/fluid/operators/slice_op.cc
浏览文件 @
2bd0f3c7
...
...
@@ -244,7 +244,7 @@ class SliceOpMaker : public framework::OpProtoAndCheckerMaker {
"mkldnn_data_type"
,
"(string, default
\"
float32
\"
). Data type of mkldnn kernel"
)
.
SetDefault
(
"float32"
)
.
InEnum
({
"float32"
,
"bfloat16"
})
.
InEnum
({
"float32"
,
"
int8"
,
"
bfloat16"
})
.
AsExtra
();
AddComment
(
R"DOC(
Slice Operator.
...
...
python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
浏览文件 @
2bd0f3c7
...
...
@@ -62,7 +62,9 @@ class Quant2Int8MkldnnPass(object):
self
.
_ops_to_quantize
=
_ops_to_quantize
self
.
_op_ids_to_skip
=
_op_ids_to_skip
if
_op_ids_to_skip
is
not
None
else
set
(
[
-
1
])
self
.
_scale_immutable_ops
=
[
'transpose2'
,
'reshape2'
,
'pool2d'
]
self
.
_scale_immutable_ops
=
[
'transpose2'
,
'reshape2'
,
'pool2d'
,
'slice'
]
self
.
_scale_ops
=
[
'scale'
]
self
.
_conv_ops
=
[
'conv2d'
,
'depthwise_conv2d'
]
self
.
_pool_ops
=
[
'pool2d'
]
...
...
@@ -241,7 +243,10 @@ class Quant2Int8MkldnnPass(object):
waiting_for_scale
=
set
()
for
op
in
graph
.
all_op_nodes
():
if
op
.
name
()
in
self
.
_scale_immutable_ops
:
input_name
=
op
.
input
(
"X"
)[
0
]
if
op
.
name
()
==
'slice'
:
input_name
=
op
.
input
(
"Input"
)[
0
]
else
:
input_name
=
op
.
input
(
"X"
)[
0
]
output_name
=
op
.
output
(
"Out"
)[
0
]
tensor_names
=
[
input_name
,
output_name
]
...
...
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
浏览文件 @
2bd0f3c7
...
...
@@ -253,7 +253,7 @@ if(LINUX AND WITH_MKLDNN)
set
(
FP32_ERNIE_MODEL_ARCHIVE
"ernie_fp32_model.tar.gz"
)
set
(
FP32_ERNIE_MODEL_DIR
"
${
QUANT_INSTALL_DIR
}
/Ernie_float"
)
download_quant_fp32_model
(
${
FP32_ERNIE_MODEL_DIR
}
${
FP32_ERNIE_MODEL_ARCHIVE
}
114f38804a3ef8c45e7259e68bbd838b
)
set
(
QUANT2_ERNIE_OPS_TO_QUANTIZE
"fc,reshape2,transpose2,matmul,elementwise_add"
)
set
(
QUANT2_ERNIE_OPS_TO_QUANTIZE
"fc,reshape2,transpose2,matmul,elementwise_add
,slice
"
)
inference_quant2_int8_nlp_test
(
test_quant2_int8_ernie_mkldnn
${
QUANT2_ERNIE_MODEL_DIR
}
/Ernie_qat/float
${
FP32_ERNIE_MODEL_DIR
}
/ernie_fp32_model
${
NLP_DATA_PATH
}
${
NLP_LABLES_PATH
}
${
QUANT2_ERNIE_OPS_TO_QUANTIZE
}
)
# Quant2 GRU
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录