Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4fd4095d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4fd4095d
编写于
11月 30, 2020
作者:
W
Wojciech Uss
提交者:
GitHub
11月 30, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add quantization of multi_gru op and tests (#28615)
上级
4adddcc8
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
334 addition
and
63 deletion
+334
-63
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+14
-0
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+15
-0
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
+131
-27
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
+8
-2
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+125
-3
python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
...luid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
+40
-30
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+1
-1
未找到文件。
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
4fd4095d
...
...
@@ -2645,6 +2645,20 @@ PDNode *patterns::MultiGruSeq::operator()() {
return
h2
;
}
PDNode
*
patterns
::
MultiGru
::
operator
()()
{
auto
x
=
pattern
->
NewNode
(
x_repr
())
->
AsInput
()
->
assert_is_op_input
(
"multi_gru"
,
"X"
);
auto
gru
=
pattern
->
NewNode
(
gru_repr
())
->
assert_is_op
(
"multi_gru"
);
auto
wx
=
pattern
->
NewNode
(
wx_repr
())
->
AsInput
()
->
assert_is_op_nth_input
(
"multi_gru"
,
"WeightX"
,
0
);
auto
wh
=
pattern
->
NewNode
(
wh_repr
())
->
AsInput
()
->
assert_is_op_nth_input
(
"multi_gru"
,
"WeightH"
,
0
);
auto
h
=
pattern
->
NewNode
(
h_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"multi_gru"
,
"Hidden"
);
gru
->
LinksFrom
({
x
,
wx
,
wh
}).
LinksTo
({
h
});
return
h
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
4fd4095d
...
...
@@ -1490,6 +1490,21 @@ struct MultiGruSeq : public PatternBase {
PATTERN_DECL_NODE
(
h2
);
};
// multi_gru op
// Quantization pass for multi_gru op.
// Hidden of the multi_gru op is a result of the operator().
struct
MultiGru
:
public
PatternBase
{
MultiGru
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"multi_gru"
)
{}
PDNode
*
operator
()();
PATTERN_DECL_NODE
(
x
);
PATTERN_DECL_NODE
(
gru
);
PATTERN_DECL_NODE
(
wx
);
PATTERN_DECL_NODE
(
wh
);
PATTERN_DECL_NODE
(
h
);
};
}
// namespace patterns
// Link two ir::Nodes from each other.
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
浏览文件 @
4fd4095d
...
...
@@ -26,6 +26,8 @@ namespace framework {
namespace
ir
{
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
double
,
Eigen
::
Dynamic
,
1
>>
;
using
EigenVectorArrayMapFloat
=
Eigen
::
Map
<
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
1
>>
;
using
string
::
PrettyLogDetail
;
namespace
{
...
...
@@ -45,9 +47,12 @@ void LogCannotQuantizeOp(Node* op, const char* details = nullptr) {
PrettyLogDetail
(
msg_ss
.
str
().
c_str
());
}
void
LogScaleIsMissingForVar
(
Node
*
var
)
{
VLOG
(
4
)
<<
"Quantization scale for the variable "
<<
var
->
Name
()
<<
" is missing."
;
void
LogScaleIsMissingForVarName
(
const
std
::
string
&
name
)
{
VLOG
(
4
)
<<
"Quantization scale for the variable "
<<
name
<<
" is missing."
;
}
void
LogScaleIsMissingForVarNode
(
Node
*
node
)
{
LogScaleIsMissingForVarName
(
node
->
Name
());
}
void
LogQuantizationDisabled
(
Node
*
op
)
{
...
...
@@ -202,23 +207,45 @@ void CPUQuantizePass::DequantizeOutput(Graph* g, Node* op, Node* output,
if
(
!
scale_attr_name
.
empty
())
op
->
Op
()
->
SetAttr
(
scale_attr_name
,
scale
);
}
bool
CPUQuantizePass
::
AreScalesPresentForVarNames
(
std
::
vector
<
std
::
string
>
names
)
const
{
auto
&
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
bool
present
=
true
;
for
(
auto
name
:
names
)
{
if
(
scales
.
find
(
name
)
==
scales
.
end
())
{
present
=
false
;
LogScaleIsMissingForVarName
(
name
);
}
}
return
present
;
}
bool
CPUQuantizePass
::
AreScalesPresentForNodes
(
const
Node
*
op_node
,
std
::
initializer_list
<
Node
*>
nodes
)
const
{
std
::
initializer_list
<
Node
*>
nodes
)
const
{
auto
&
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
bool
present
=
true
;
for
(
auto
node
:
nodes
)
{
if
(
scales
.
count
(
node
->
Name
())
==
0
)
{
present
=
false
;
LogScaleIsMissingForVar
(
node
);
LogScaleIsMissingForVar
Node
(
node
);
}
}
return
present
;
}
std
::
pair
<
bool
,
LoDTensor
>
CPUQuantizePass
::
GetScaleDataByName
(
const
std
::
string
&
name
)
const
{
auto
&
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
return
scales
.
at
(
name
);
}
std
::
pair
<
bool
,
LoDTensor
>
CPUQuantizePass
::
GetScaleDataForNode
(
const
Node
*
node
)
const
{
auto
&
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
return
scales
[
node
->
Name
()];
return
GetScaleDataByName
(
node
->
Name
());
}
LoDTensor
CPUQuantizePass
::
GetScaleTensorByName
(
const
std
::
string
&
name
)
const
{
return
GetScaleDataByName
(
name
).
second
;
}
LoDTensor
CPUQuantizePass
::
GetScaleTensorForNode
(
const
Node
*
node
)
const
{
...
...
@@ -265,7 +292,7 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
GET_IR_NODE_FROM_SUBGRAPH
(
conv_input
,
conv_input
,
conv_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
conv_output
,
conv_output
,
conv_pattern
);
auto
has_output_scale
=
AreScalesPresentForNodes
(
conv_op
,
{
conv_output
});
auto
has_output_scale
=
AreScalesPresentForNodes
({
conv_output
});
if
(
with_residual_data
&&
!
has_output_scale
)
{
LogCannotQuantizeOp
(
conv_op
,
"Conv op with ResidualData input cannot be quantized "
...
...
@@ -277,7 +304,7 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
GET_IR_NODE_FROM_SUBGRAPH
(
conv_residual_data
,
conv_residual_data
,
conv_pattern
);
if
(
!
AreScalesPresentForNodes
(
conv_op
,
{
conv_input
,
conv_filter
,
conv_residual_data
}))
{
{
conv_input
,
conv_filter
,
conv_residual_data
}))
{
LogCannotQuantizeOp
(
conv_op
);
return
;
}
...
...
@@ -289,7 +316,7 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
QuantizeInput
(
g
,
conv_op
,
conv_residual_data
,
"ResidualData"
,
residual_scale
,
is_residual_unsigned
,
"Scale_in_eltwise"
);
}
else
{
if
(
!
AreScalesPresentForNodes
(
conv_op
,
{
conv_input
,
conv_filter
}))
{
if
(
!
AreScalesPresentForNodes
({
conv_input
,
conv_filter
}))
{
LogCannotQuantizeOp
(
conv_op
);
return
;
}
...
...
@@ -302,7 +329,7 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
auto
filter_scale_tensor
=
GetScaleTensorForNode
(
conv_filter
);
EigenVectorArrayMap
eigen_tensor
{
filter_scale_tensor
.
data
<
double
>
(),
filter_scale_tensor
.
numel
()
,
1
};
filter_scale_tensor
.
numel
()};
eigen_tensor
*=
static_cast
<
double
>
(
S8_MAX
);
std
::
vector
<
float
>
filter_scale
{
filter_scale_tensor
.
data
<
double
>
(),
...
...
@@ -372,7 +399,7 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
input
,
input
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
output
,
output
,
fc_pattern
);
if
(
!
AreScalesPresentForNodes
(
fc
,
{
input
,
weights
}))
{
if
(
!
AreScalesPresentForNodes
({
input
,
weights
}))
{
LogCannotQuantizeOp
(
fc
);
return
;
}
...
...
@@ -384,7 +411,7 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
auto
weight_scale_tensor
=
GetScaleTensorForNode
(
weights
);
EigenVectorArrayMap
eigen_tensor
{
weight_scale_tensor
.
data
<
double
>
(),
weight_scale_tensor
.
numel
()
,
1
};
weight_scale_tensor
.
numel
()};
eigen_tensor
*=
static_cast
<
double
>
(
S8_MAX
);
std
::
vector
<
float
>
filter_scale
{
weight_scale_tensor
.
data
<
double
>
(),
...
...
@@ -393,7 +420,7 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
fc
->
Op
()
->
SetAttr
(
"Scale_weights"
,
filter_scale
);
// if quantization scale is missing for output tensor, return fp32 data
if
(
AreScalesPresentForNodes
(
fc
,
{
output
}))
{
if
(
AreScalesPresentForNodes
({
output
}))
{
bool
is_output_unsigned
{
false
};
auto
output_scale
=
GetScaleValueForNode
(
output
,
&
is_output_unsigned
);
DequantizeOutput
(
g
,
fc
,
output
,
"Out"
,
output_scale
,
is_output_unsigned
,
...
...
@@ -434,7 +461,7 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
pool_input
,
pool_input
,
pool_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
pool_output
,
pool_output
,
pool_pattern
);
if
(
!
AreScalesPresentForNodes
(
pool_op
,
{
pool_input
,
pool_output
}))
{
if
(
!
AreScalesPresentForNodes
({
pool_input
,
pool_output
}))
{
LogCannotQuantizeOp
(
pool_op
);
return
;
}
...
...
@@ -477,7 +504,7 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
concat_out
,
concat_out
,
concat_pattern
);
if
(
!
AreScalesPresentForNodes
(
concat_op
,
{
concat_out
}))
{
if
(
!
AreScalesPresentForNodes
({
concat_out
}))
{
LogCannotQuantizeOp
(
concat_op
);
return
;
}
...
...
@@ -523,7 +550,7 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
prior_box_input
,
prior_box_input
,
prior_box_pattern
);
if
(
!
AreScalesPresentForNodes
(
prior_box_op
,
{
prior_box_input
}))
{
if
(
!
AreScalesPresentForNodes
({
prior_box_input
}))
{
LogCannotQuantizeOp
(
prior_box_op
);
return
;
}
...
...
@@ -571,8 +598,7 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
transpose_in
,
transpose_in
,
transpose_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
transpose_out
,
transpose_out
,
transpose_pattern
);
if
(
!
AreScalesPresentForNodes
(
transpose_op
,
{
transpose_in
,
transpose_out
}))
{
if
(
!
AreScalesPresentForNodes
({
transpose_in
,
transpose_out
}))
{
LogCannotQuantizeOp
(
transpose_op
);
return
;
}
...
...
@@ -626,7 +652,7 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
reshape_in
,
reshape_in
,
reshape_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
reshape_out
,
reshape_out
,
reshape_pattern
);
if
(
!
AreScalesPresentForNodes
(
reshape_op
,
{
reshape_in
,
reshape_out
}))
{
if
(
!
AreScalesPresentForNodes
({
reshape_in
,
reshape_out
}))
{
LogCannotQuantizeOp
(
reshape_op
);
return
;
}
...
...
@@ -678,7 +704,7 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
matmul_in_y
,
matmul_in_y
,
matmul_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
matmul_out
,
matmul_out
,
matmul_pattern
);
if
(
!
AreScalesPresentForNodes
(
matmul_op
,
{
matmul_in_x
,
matmul_in_y
}))
{
if
(
!
AreScalesPresentForNodes
({
matmul_in_x
,
matmul_in_y
}))
{
LogCannotQuantizeOp
(
matmul_op
);
return
;
}
...
...
@@ -698,7 +724,7 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
"Scale_y"
);
// if quantization scale is missing for output tensor, return fp32 data
if
(
AreScalesPresentForNodes
(
matmul_op
,
{
matmul_out
}))
{
if
(
AreScalesPresentForNodes
({
matmul_out
}))
{
bool
is_output_unsigned
{
false
};
auto
output_scale
=
GetScaleValueForNode
(
matmul_out
,
&
is_output_unsigned
);
DequantizeOutput
(
g
,
matmul_op
,
matmul_out
,
"Out"
,
output_scale
,
...
...
@@ -744,8 +770,7 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add_out
,
elementwise_add_out
,
elementwise_add_pattern
);
if
(
!
AreScalesPresentForNodes
(
elementwise_add_op
,
{
elementwise_add_x
,
elementwise_add_y
}))
{
if
(
!
AreScalesPresentForNodes
({
elementwise_add_x
,
elementwise_add_y
}))
{
LogCannotQuantizeOp
(
elementwise_add_op
);
return
;
}
...
...
@@ -769,7 +794,7 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const {
is_y_unsigned
,
"Scale_y"
);
// if quantization scale is missing for output tensor, return fp32 data
if
(
AreScalesPresentForNodes
(
elementwise_add_op
,
{
elementwise_add_out
}))
{
if
(
AreScalesPresentForNodes
({
elementwise_add_out
}))
{
bool
is_output_unsigned
{
false
};
auto
output_scale
=
GetScaleValueForNode
(
elementwise_add_out
,
&
is_output_unsigned
);
...
...
@@ -810,7 +835,7 @@ void CPUQuantizePass::QuantizeFusionGru(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
weight_x
,
weight_x
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
out
,
out
,
pattern
);
if
(
!
AreScalesPresentForNodes
(
op
,
{
x
,
weight_h
,
weight_x
}))
{
if
(
!
AreScalesPresentForNodes
({
x
,
weight_h
,
weight_x
}))
{
LogCannotQuantizeOp
(
op
);
return
;
}
...
...
@@ -826,7 +851,7 @@ void CPUQuantizePass::QuantizeFusionGru(Graph* graph) const {
auto
weight_scale_tensor
=
GetScaleTensorForNode
(
weight_x
);
EigenVectorArrayMap
eigen_tensor
{
weight_scale_tensor
.
data
<
double
>
(),
weight_scale_tensor
.
numel
()
,
1
};
weight_scale_tensor
.
numel
()};
eigen_tensor
*=
static_cast
<
double
>
(
S8_MAX
);
std
::
vector
<
float
>
scale_weights
{
weight_scale_tensor
.
data
<
double
>
(),
...
...
@@ -844,6 +869,84 @@ void CPUQuantizePass::QuantizeFusionGru(Graph* graph) const {
PrettyLogDetail
(
"--- quantized %d fusion_gru ops"
,
quantize_count
);
}
void
CPUQuantizePass
::
QuantizeMultiGru
(
Graph
*
graph
)
const
{
GraphPatternDetector
gpd
;
patterns
::
MultiGru
pattern
{
gpd
.
mutable_pattern
(),
name_scope_
};
pattern
();
int
quantize_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"Quantize multi_gru op"
;
GET_IR_NODE_FROM_SUBGRAPH
(
gru
,
gru
,
pattern
);
// skip if should not be quantized
if
(
!
platform
::
HasOpINT8DataType
(
gru
->
Op
()))
{
LogQuantizationDisabled
(
gru
);
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
x
,
x
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
wx
,
wx
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
h
,
h
,
pattern
);
auto
wx_names
=
gru
->
Op
()
->
Input
(
"WeightX"
);
if
(
!
AreScalesPresentForNodes
({
x
})
||
!
AreScalesPresentForVarNames
(
wx_names
))
{
LogCannotQuantizeOp
(
gru
);
return
;
}
bool
is_x_unsigned
{
false
};
auto
input_x_scale
=
GetScaleValueForNode
(
x
,
&
is_x_unsigned
);
double
input_x_shift
{
128.
};
if
(
is_x_unsigned
)
input_x_shift
=
0.
;
QuantizeInput
(
g
,
gru
,
x
,
"X"
,
input_x_scale
,
is_x_unsigned
,
"Scale_data"
,
input_x_shift
,
"Shift_data"
);
auto
*
scope
=
param_scope
();
int
wx_size
=
wx_names
.
size
();
std
::
vector
<
std
::
string
>
w_scale_var_names
;
for
(
int
i
=
0
;
i
<
wx_size
;
++
i
)
{
auto
scale_tensor_src
=
GetScaleTensorByName
(
wx_names
[
i
]);
EigenVectorArrayMap
eigen_tensor_src
{
scale_tensor_src
.
data
<
double
>
(),
scale_tensor_src
.
numel
()};
VarDesc
scale_var_desc
(
patterns
::
PDNodeName
(
"multi_gru"
,
"w_scale"
));
scale_var_desc
.
SetShape
(
framework
::
vectorize
(
scale_tensor_src
.
dims
()));
scale_var_desc
.
SetDataType
(
proto
::
VarType
::
FP32
);
scale_var_desc
.
SetLoDLevel
(
scale_tensor_src
.
lod
().
size
());
scale_var_desc
.
SetPersistable
(
true
);
auto
*
w_scale_node
=
g
->
CreateVarNode
(
&
scale_var_desc
);
auto
*
w_scale_tensor_dst
=
scope
->
Var
(
w_scale_node
->
Name
())
->
GetMutable
<
LoDTensor
>
();
w_scale_tensor_dst
->
Resize
(
scale_tensor_src
.
dims
());
auto
*
dst_data
=
w_scale_tensor_dst
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
EigenVectorArrayMapFloat
eigen_tensor_dst
{
dst_data
,
w_scale_tensor_dst
->
numel
()};
eigen_tensor_dst
=
eigen_tensor_src
.
cast
<
float
>
()
*
static_cast
<
float
>
(
S8_MAX
);
w_scale_var_names
.
push_back
(
w_scale_node
->
Name
());
IR_NODE_LINK_TO
(
w_scale_node
,
gru
);
}
gru
->
Op
()
->
SetInput
(
"Scale_weights"
,
w_scale_var_names
);
// return fp32 data
gru
->
Op
()
->
SetAttr
(
"force_fp32_output"
,
true
);
++
quantize_count
;
};
gpd
(
graph
,
handler
);
AddStatis
(
quantize_count
);
PrettyLogDetail
(
"--- quantized %d multi_gru ops"
,
quantize_count
);
}
void
CPUQuantizePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
3
)
<<
"Quantizing the graph."
;
PADDLE_ENFORCE_NOT_NULL
(
...
...
@@ -864,6 +967,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizeMatmul
(
graph
);
QuantizeElementwiseAdd
(
graph
);
QuantizeFusionGru
(
graph
);
QuantizeMultiGru
(
graph
);
}
}
// namespace ir
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
浏览文件 @
4fd4095d
...
...
@@ -18,6 +18,7 @@
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
...
...
@@ -58,6 +59,7 @@ class CPUQuantizePass : public FusePassBase {
void
QuantizeMatmul
(
Graph
*
graph
)
const
;
void
QuantizeElementwiseAdd
(
Graph
*
graph
)
const
;
void
QuantizeFusionGru
(
Graph
*
graph
)
const
;
void
QuantizeMultiGru
(
Graph
*
graph
)
const
;
void
QuantizeInput
(
Graph
*
g
,
Node
*
op
,
Node
*
input
,
std
::
string
input_name
,
double
scale_to_one
,
bool
is_input_unsigned
,
...
...
@@ -75,10 +77,14 @@ class CPUQuantizePass : public FusePassBase {
bool
is_unsigned
,
std
::
string
scale_attr_name
=
""
)
const
;
bool
AreScalesPresentForNodes
(
const
Node
*
op_node
,
std
::
initializer_list
<
Node
*>
nodes
)
const
;
bool
AreScalesPresentForVarNames
(
std
::
vector
<
std
::
string
>
names
)
const
;
bool
AreScalesPresentForNodes
(
std
::
initializer_list
<
Node
*>
nodes
)
const
;
std
::
pair
<
bool
,
LoDTensor
>
GetScaleDataByName
(
const
std
::
string
&
name
)
const
;
std
::
pair
<
bool
,
LoDTensor
>
GetScaleDataForNode
(
const
Node
*
node
)
const
;
LoDTensor
GetScaleTensorByName
(
const
std
::
string
&
name
)
const
;
LoDTensor
GetScaleTensorForNode
(
const
Node
*
node
)
const
;
double
GetScaleValueByName
(
const
std
::
string
&
name
,
bool
*
is_unsigned
=
nullptr
)
const
;
double
GetScaleValueForNode
(
const
Node
*
node
,
bool
*
is_unsigned
=
nullptr
)
const
;
bool
IsOpDequantized
(
const
Node
*
node
)
const
;
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
浏览文件 @
4fd4095d
...
...
@@ -112,7 +112,7 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
}
void
PreparePass
(
std
::
unique_ptr
<
ir
::
Graph
>*
graph
,
const
ProgramDesc
&
prog
,
const
std
::
initializer_list
<
std
::
string
>
variable_names
,
const
std
::
vector
<
std
::
string
>
variable_names
,
int
*
original_nodes_num
,
int
*
current_nodes_num
,
std
::
string
var_without_scale
=
""
,
std
::
string
var_signed
=
""
)
{
...
...
@@ -402,7 +402,7 @@ TEST(CpuQuantizePass, transpose) {
static
const
std
::
initializer_list
<
std
::
string
>
variable_names_fusion_gru
=
{
"x"
,
"wx"
,
"wh"
,
"b"
,
"h"
};
//
x
->Fusion_gru->h
//
(x, wx, wh, b)
->Fusion_gru->h
ProgramDesc
BuildProgramDescFusionGru
()
{
ProgramDesc
prog
;
for
(
auto
&
v
:
variable_names_transpose
)
{
...
...
@@ -460,7 +460,7 @@ void MainTestFusionGru(const ProgramDesc& prog, int gru_count, int quant_count,
}
TEST
(
CpuQuantizePass
,
fusion_gru
)
{
//
x
->Fusion_gru->h
//
(x, wx, wh, b)
->Fusion_gru->h
int
gru_count
=
1
;
int
quant_count
=
1
;
int
dequant_count
=
0
;
...
...
@@ -470,6 +470,128 @@ TEST(CpuQuantizePass, fusion_gru) {
dequant_count
,
added_nodes_count
,
2.
*
127
,
128.
);
}
const
std
::
vector
<
std
::
string
>
churn_out_vars
(
ProgramDesc
*
prog
,
const
std
::
string
&
prefix
,
int
number
)
{
auto
v
=
std
::
vector
<
std
::
string
>
();
for
(
int
i
=
0
;
i
<
number
;
++
i
)
{
auto
name
=
prefix
+
std
::
to_string
(
i
);
prog
->
MutableBlock
(
0
)
->
Var
(
name
);
v
.
push_back
(
name
);
}
return
v
;
}
void
create_vars
(
ProgramDesc
*
prog
,
const
std
::
initializer_list
<
std
::
string
>&
names
)
{
for
(
auto
name
:
names
)
prog
->
MutableBlock
(
0
)
->
Var
(
name
);
}
void
SetMultiGruOp
(
ProgramDesc
*
prog
,
const
std
::
string
x
,
const
std
::
vector
<
std
::
string
>
wx
,
const
std
::
vector
<
std
::
string
>
wh
,
const
std
::
vector
<
std
::
string
>
b
,
const
std
::
string
h
,
int
layers
)
{
auto
*
op
=
prog
->
MutableBlock
(
0
)
->
AppendOp
();
op
->
SetType
(
"multi_gru"
);
op
->
SetInput
(
"X"
,
{
x
});
op
->
SetInput
(
"WeightX"
,
wx
);
op
->
SetInput
(
"WeightH"
,
wh
);
op
->
SetInput
(
"Bias"
,
b
);
op
->
SetOutput
(
"Hidden"
,
{
h
});
op
->
SetAttr
(
"layers"
,
layers
);
op
->
SetAttr
(
"origin_mode"
,
false
);
op
->
SetAttr
(
"use_mkldnn"
,
true
);
op
->
SetAttr
(
"name"
,
std
::
string
(
"Multi_gru"
));
op
->
SetAttr
(
"mkldnn_data_type"
,
std
::
string
(
"int8"
));
op
->
SetAttr
(
"Scale_data"
,
1.0
f
);
op
->
SetAttr
(
"Shift_data"
,
0.0
f
);
}
void
MainTestMultiGru
(
int
layers
)
{
ProgramDesc
prog
;
// Create variables
create_vars
(
&
prog
,
{
"x"
,
"h"
});
const
std
::
vector
<
std
::
string
>
wx
=
churn_out_vars
(
&
prog
,
"wx"
,
2
*
layers
);
const
std
::
vector
<
std
::
string
>
wh
=
churn_out_vars
(
&
prog
,
"wh"
,
2
*
layers
);
const
std
::
vector
<
std
::
string
>
b
=
churn_out_vars
(
&
prog
,
"b"
,
2
*
layers
);
std
::
vector
<
std
::
string
>
all_vars
;
all_vars
.
reserve
(
wx
.
size
()
+
wh
.
size
()
+
b
.
size
()
+
2
);
all_vars
.
insert
(
all_vars
.
end
(),
wx
.
begin
(),
wx
.
end
());
all_vars
.
insert
(
all_vars
.
end
(),
wh
.
begin
(),
wh
.
end
());
all_vars
.
insert
(
all_vars
.
end
(),
b
.
begin
(),
b
.
end
());
all_vars
.
push_back
(
"x"
);
all_vars
.
push_back
(
"h"
);
// Prepare program descriptor
SetMultiGruOp
(
&
prog
,
"x"
,
wx
,
wh
,
b
,
"h"
,
layers
);
// Prepare and run the pass
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
int
original_nodes_num
,
current_nodes_num
;
PreparePass
(
&
graph
,
prog
,
all_vars
,
&
original_nodes_num
,
&
current_nodes_num
);
// Verify graph after quantization
float
scale
=
2
*
127
;
float
shift
=
128
;
int
quantize_nodes_count
=
0
;
int
dequantize_nodes_count
=
0
;
int
multi_gru_nodes_count
=
0
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
())
{
auto
*
op
=
node
->
Op
();
if
(
op
->
Type
()
==
"multi_gru"
)
{
multi_gru_nodes_count
++
;
auto
op_name
=
BOOST_GET_CONST
(
std
::
string
,
op
->
GetAttr
(
"name"
));
EXPECT_EQ
(
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"Scale_data"
)),
scale
)
<<
"Scale_data for node '"
+
op_name
+
"'."
;
EXPECT_EQ
(
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"Shift_data"
)),
shift
)
<<
"Shift_data for node '"
+
op_name
+
"'."
;
EXPECT_EQ
(
op
->
Input
(
"Scale_weights"
).
size
(),
2u
*
layers
)
<<
"Scale_weights for node '"
+
op_name
+
"'."
;
EXPECT_EQ
(
BOOST_GET_CONST
(
bool
,
op
->
GetAttr
(
"force_fp32_output"
)),
true
)
<<
"force_fp32_output for node '"
+
op_name
+
"'."
;
}
else
if
(
op
->
Type
()
==
"quantize"
)
{
quantize_nodes_count
++
;
}
else
if
(
op
->
Type
()
==
"dequantize"
)
{
dequantize_nodes_count
++
;
}
}
}
int
multi_gru_count
=
1
;
int
quant_count
=
1
;
int
quant_out_count
=
1
;
int
dequant_count
=
0
;
int
dequant_out_count
=
0
;
int
scale_weights_count
=
2
*
layers
;
int
added_nodes_count
=
quant_count
+
quant_out_count
+
scale_weights_count
+
dequant_count
+
dequant_out_count
;
EXPECT_EQ
(
multi_gru_nodes_count
,
multi_gru_count
);
EXPECT_EQ
(
quantize_nodes_count
,
quant_count
);
EXPECT_EQ
(
dequantize_nodes_count
,
dequant_count
);
EXPECT_EQ
(
original_nodes_num
+
added_nodes_count
,
current_nodes_num
);
}
TEST
(
CpuQuantizePass
,
multi_gru_1
)
{
int
layers
=
1
;
MainTestMultiGru
(
layers
);
}
TEST
(
CpuQuantizePass
,
multi_gru_2
)
{
int
layers
=
2
;
MainTestMultiGru
(
layers
);
}
TEST
(
CpuQuantizePass
,
multi_gru_3
)
{
int
layers
=
3
;
MainTestMultiGru
(
layers
);
}
static
const
std
::
initializer_list
<
std
::
string
>
variable_names_reshape
=
{
"a"
,
"w1"
,
"b"
,
"c"
,
"d"
,
"e"
,
"f"
};
...
...
python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
浏览文件 @
4fd4095d
...
...
@@ -66,7 +66,7 @@ class Quant2Int8MkldnnPass(object):
self
.
_fc_ops
=
[
'fc'
]
self
.
_relu_ops
=
[
'relu'
,
'relu6'
]
self
.
_matmul_ops
=
[
'matmul'
]
self
.
_gru_ops
=
[
'fusion_gru'
]
self
.
_gru_ops
=
[
'fusion_gru'
,
'multi_gru'
]
self
.
_weight_scales
=
{}
# Collect the Input and Output sclaes from Fake quant models
self
.
_var_quant_scales
=
{}
...
...
@@ -352,6 +352,8 @@ class Quant2Int8MkldnnPass(object):
graph
=
self
.
_apply_pass
(
graph
,
'mul_lstm_fuse_pass'
)
graph
=
self
.
_apply_pass
(
graph
,
'fc_gru_fuse_pass'
)
graph
=
self
.
_apply_pass
(
graph
,
'mul_gru_fuse_pass'
)
graph
=
self
.
_apply_pass
(
graph
,
'multi_gru_fuse_pass'
)
graph
=
self
.
_apply_pass
(
graph
,
'multi_gru_seq_fuse_pass'
)
graph
=
self
.
_apply_pass
(
graph
,
'seq_concat_fc_fuse_pass'
)
graph
=
self
.
_apply_pass
(
graph
,
'squared_mat_sub_fuse_pass'
)
graph
=
self
.
_apply_pass
(
graph
,
'is_test_pass'
)
...
...
@@ -450,38 +452,46 @@ class Quant2Int8MkldnnPass(object):
self
.
_var_quant_scales
[
weight_var_name
]
=
(
use_unsigned_int
,
lod_tensor
)
def
_compute_single_gru_weight_scales
(
wx_var_name
,
wh_var_name
):
wx
=
np
.
array
(
self
.
_load_param
(
self
.
_scope
,
wx_var_name
))
wh
=
np
.
array
(
self
.
_load_param
(
self
.
_scope
,
wh_var_name
))
OC
=
wh
.
shape
[
0
]
scale_ur
=
1.0
/
np
.
max
(
np
.
abs
(
np
.
concatenate
(
[
wx
[:,
:
2
*
OC
],
wh
.
flatten
()[:
2
*
OC
*
OC
].
reshape
(
OC
,
2
*
OC
)
],
axis
=
0
)),
axis
=
0
)
scale_o
=
1.0
/
np
.
max
(
np
.
abs
(
np
.
concatenate
(
[
wx
[:,
2
*
OC
:],
wh
.
flatten
()[
2
*
OC
*
OC
:].
reshape
(
OC
,
OC
)
],
axis
=
0
)),
axis
=
0
)
gru_weights_scale
=
np
.
concatenate
([
scale_ur
,
scale_o
]).
astype
(
'float'
)
return
self
.
_convert_scale2tensor
(
gru_weights_scale
)
def
_compute_gru_weight_scales
(
wx_name
,
wh_name
):
for
op
in
graph
.
all_op_nodes
():
if
op
.
op
().
type
()
in
self
.
_gru_ops
:
wx_var_name
=
op
.
input
(
wx_name
)[
0
]
wh_var_name
=
op
.
input
(
wh_name
)[
0
]
wx
=
np
.
array
(
self
.
_load_param
(
self
.
_scope
,
wx_var_name
))
wh
=
np
.
array
(
self
.
_load_param
(
self
.
_scope
,
wh_var_name
))
OC
=
wh
.
shape
[
0
]
scale_ur
=
1.0
/
np
.
max
(
np
.
abs
(
np
.
concatenate
(
[
wx
[:,
:
2
*
OC
],
wh
.
flatten
()[:
2
*
OC
*
OC
]
.
reshape
(
OC
,
2
*
OC
)
],
axis
=
0
)),
axis
=
0
)
scale_o
=
1.0
/
np
.
max
(
np
.
abs
(
np
.
concatenate
(
[
wx
[:,
2
*
OC
:],
wh
.
flatten
()[
2
*
OC
*
OC
:]
.
reshape
(
OC
,
OC
)
],
axis
=
0
)),
axis
=
0
)
gru_weights_scale
=
np
.
concatenate
(
[
scale_ur
,
scale_o
]).
astype
(
'float'
)
lod_tensor
=
self
.
_convert_scale2tensor
(
gru_weights_scale
)
use_unsigned_int
=
False
self
.
_var_quant_scales
[
wx_var_name
]
=
(
use_unsigned_int
,
lod_tensor
)
assert
len
(
op
.
input
(
wx_name
))
==
len
(
op
.
input
(
wh_name
)
),
'Mismatch in number of weights inputs ({} for WeightX vs. {} for WeightH).'
.
format
(
len
(
op
.
input
(
wx_name
)),
len
(
op
.
input
(
wh_name
)))
for
i
,
wx_var_name
in
enumerate
(
op
.
input
(
wx_name
)):
wh_var_name
=
op
.
input
(
wh_name
)[
i
]
use_unsigned_int
=
False
lod_tensor
=
_compute_single_gru_weight_scales
(
wx_var_name
,
wh_var_name
)
self
.
_var_quant_scales
[
wx_var_name
]
=
(
use_unsigned_int
,
lod_tensor
)
_compute_var_scales
(
self
.
_conv_ops
,
"Filter"
,
axis
=
1
)
_compute_var_scales
(
self
.
_fc_ops
,
"W"
,
axis
=
0
)
...
...
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
浏览文件 @
4fd4095d
...
...
@@ -239,7 +239,7 @@ if(LINUX AND WITH_MKLDNN)
set
(
QUANT2_GRU_MODEL_ARCHIVE
"GRU_quant_acc.tar.gz"
)
set
(
QUANT2_GRU_MODEL_DIR
"
${
QUANT_INSTALL_DIR
}
/GRU_quant2"
)
download_quant_model
(
${
QUANT2_GRU_MODEL_DIR
}
${
QUANT2_GRU_MODEL_ARCHIVE
}
)
set
(
QUANT2_GRU_OPS_TO_QUANTIZE
"
fusion
_gru"
)
set
(
QUANT2_GRU_OPS_TO_QUANTIZE
"
multi
_gru"
)
### Save FP32 model or INT8 model from Quant model
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录