Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
966447e3
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
966447e3
编写于
10月 01, 2020
作者:
W
Wojciech Uss
提交者:
GitHub
10月 01, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added support for quantization of fusion_gru (#27518)
上级
0cd4907e
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
268 addition
and
47 deletion
+268
-47
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+1
-1
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+20
-3
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+15
-0
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
+71
-8
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
+7
-12
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+81
-0
paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
+8
-2
paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
+2
-2
python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
...luid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
+36
-0
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+15
-11
python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
.../tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
+12
-8
未找到文件。
cmake/external/mkldnn.cmake
浏览文件 @
966447e3
...
...
@@ -20,7 +20,7 @@ SET(MKLDNN_SOURCE_DIR ${THIRD_PARTY_PATH}/mkldnn/src/extern_mkldnn)
SET
(
MKLDNN_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/mkldnn
)
SET
(
MKLDNN_INC_DIR
"
${
MKLDNN_INSTALL_DIR
}
/include"
CACHE PATH
"mkldnn include directory."
FORCE
)
SET
(
MKLDNN_REPOSITORY https://github.com/oneapi-src/oneDNN.git
)
SET
(
MKLDNN_TAG
64a48f9565aa72f6359917b3406328075a409939
)
SET
(
MKLDNN_TAG
361725600224f41b7347a1c6bee9b04d1e6c14d7
)
# Introduce variables:
# * CMAKE_INSTALL_LIBDIR
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
966447e3
...
...
@@ -1882,9 +1882,9 @@ PDNode *patterns::MultipleQuantize::operator()() {
PDNode
*
patterns
::
QuantizePlacement
::
operator
()(
const
std
::
unordered_set
<
std
::
string
>
&
quantize_enabled_op_types
)
{
std
::
unordered_set
<
std
::
string
>
supported_op_types
=
std
::
unordered_set
<
std
::
string
>
(
{
"concat"
,
"conv2d"
,
"elementwise_add"
,
"fc"
,
"matmul"
,
"pool2d"
,
"prior_box
"
,
"relu"
,
"reshape2"
,
"transpose2
"
});
std
::
unordered_set
<
std
::
string
>
(
{
"concat"
,
"conv2d"
,
"elementwise_add"
,
"fc"
,
"matmul"
,
"pool2d
"
,
"prior_box"
,
"relu"
,
"reshape2"
,
"transpose2"
,
"fusion_gru
"
});
if
(
!
quantize_enabled_op_types
.
empty
())
{
supported_op_types
=
quantize_enabled_op_types
;
}
...
...
@@ -2280,6 +2280,23 @@ PDNode *patterns::MatmulTransposeReshapePattern::operator()() {
return
reshape_out
;
}
PDNode
*
patterns
::
FusionGru
::
operator
()()
{
auto
op
=
pattern
->
NewNode
(
op_repr
())
->
assert_is_op
(
"fusion_gru"
);
auto
x
=
pattern
->
NewNode
(
x_repr
())
->
AsInput
()
->
assert_is_op_input
(
"fusion_gru"
,
"X"
);
auto
weight_h
=
pattern
->
NewNode
(
weight_h_repr
())
->
AsInput
()
->
assert_is_op_input
(
"fusion_gru"
,
"WeightH"
);
auto
weight_x
=
pattern
->
NewNode
(
weight_x_repr
())
->
AsInput
()
->
assert_is_op_input
(
"fusion_gru"
,
"WeightX"
);
auto
out
=
pattern
->
NewNode
(
out_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"fusion_gru"
,
"Hidden"
);
op
->
LinksFrom
({
x
,
weight_h
,
weight_x
}).
LinksTo
({
out
});
return
out
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
966447e3
...
...
@@ -1312,6 +1312,21 @@ struct MatmulTransposeReshapePattern : public PatternBase {
PATTERN_DECL_NODE
(
reshape_out_xshape
);
};
// fusion_gru op
// Forward pass for fusion_gru.
// fusion_gru out is a result of the operator.
struct
FusionGru
:
public
PatternBase
{
FusionGru
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"fusion_gru"
)
{}
PDNode
*
operator
()();
PATTERN_DECL_NODE
(
op
);
PATTERN_DECL_NODE
(
x
);
PATTERN_DECL_NODE
(
weight_h
);
PATTERN_DECL_NODE
(
weight_x
);
PATTERN_DECL_NODE
(
out
);
};
}
// namespace patterns
// Link two ir::Nodes from each other.
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
浏览文件 @
966447e3
...
...
@@ -63,8 +63,9 @@ enum { U8_MAX = 255, S8_MAX = 127 };
void
CPUQuantizePass
::
QuantizeInput
(
Graph
*
g
,
Node
*
op
,
Node
*
input
,
std
::
string
input_name
,
double
scale_to_one
,
bool
is_unsigned
,
std
::
string
scale_attr_name
)
const
{
bool
is_input_unsigned
,
std
::
string
scale_attr_name
,
float
shift
,
std
::
string
shift_attr_name
)
const
{
auto
inputs
=
op
->
Op
()
->
InputNames
();
bool
name_found
=
std
::
find
(
inputs
.
begin
(),
inputs
.
end
(),
input_name
)
!=
inputs
.
end
();
...
...
@@ -72,7 +73,7 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
platform
::
errors
::
InvalidArgument
(
"Var(%s) isn't the input of the %s operator."
,
input_name
,
op
->
Op
()
->
Type
()));
unsigned
max
=
is_unsigned
?
U8_MAX
:
S8_MAX
;
unsigned
max
=
is_
input_
unsigned
?
U8_MAX
:
S8_MAX
;
float
scale
=
scale_to_one
*
max
;
// Create quantize output variable
...
...
@@ -86,7 +87,8 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
q_desc
.
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
quantize_out_node
->
Name
()}));
q_desc
.
SetAttr
(
"Scale"
,
scale
);
q_desc
.
SetAttr
(
"is_negative_input"
,
!
is_unsigned
);
q_desc
.
SetAttr
(
"Shift"
,
shift
);
q_desc
.
SetAttr
(
"is_negative_input"
,
!
is_input_unsigned
);
q_desc
.
SetAttr
(
"output_format"
,
Has
(
"data_layout"
)
?
Get
<
std
::
string
>
(
"data_layout"
)
:
"NHWC"
);
...
...
@@ -103,11 +105,13 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
IR_NODE_LINK_TO
(
quantize_out_node
,
op
);
if
(
!
scale_attr_name
.
empty
())
op
->
Op
()
->
SetAttr
(
scale_attr_name
,
scale
);
if
(
!
shift_attr_name
.
empty
())
op
->
Op
()
->
SetAttr
(
shift_attr_name
,
shift
);
}
void
CPUQuantizePass
::
QuantizeInputs
(
Graph
*
g
,
Node
*
op
,
std
::
string
input_name
,
bool
are_unsigned
,
std
::
string
scale_attr_name
)
const
{
bool
are_inputs_unsigned
,
std
::
string
scale_attr_name
,
float
shift
,
std
::
string
shift_attr_name
)
const
{
auto
inputs
=
op
->
inputs
;
auto
output
=
op
->
outputs
[
0
];
PADDLE_ENFORCE_GE
(
inputs
.
size
(),
1
,
...
...
@@ -127,7 +131,7 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
std
::
vector
<
std
::
string
>
quantize_out_node_names
(
inputs
.
size
());
double
scale_out
=
GetScaleValueForNode
(
output
);
unsigned
max
=
are_unsigned
?
U8_MAX
:
S8_MAX
;
unsigned
max
=
are_
inputs_
unsigned
?
U8_MAX
:
S8_MAX
;
float
scale
=
scale_out
*
max
;
for
(
size_t
i
=
0
;
i
<
inputs
.
size
();
i
++
)
{
...
...
@@ -137,10 +141,11 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
quantize_out_node_names
[
i
]
=
quantize_out_nodes
[
i
]
->
Name
();
q_desc
.
SetAttr
(
"Scale"
,
scale
);
q_desc
.
SetAttr
(
"Shift"
,
shift
);
q_desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
inputs
[
i
]
->
Name
()}));
q_desc
.
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
quantize_out_node_names
[
i
]}));
q_desc
.
SetAttr
(
"is_negative_input"
,
!
are_unsigned
);
q_desc
.
SetAttr
(
"is_negative_input"
,
!
are_
inputs_
unsigned
);
auto
quantize_op
=
g
->
CreateOpNode
(
&
q_desc
);
// OpDesc will be copied.
// link quantize op
...
...
@@ -154,6 +159,7 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
op
->
Op
()
->
SetInput
(
input_name
,
quantize_out_node_names
);
if
(
!
scale_attr_name
.
empty
())
op
->
Op
()
->
SetAttr
(
scale_attr_name
,
scale
);
if
(
!
shift_attr_name
.
empty
())
op
->
Op
()
->
SetAttr
(
shift_attr_name
,
shift
);
}
void
CPUQuantizePass
::
DequantizeOutput
(
Graph
*
g
,
Node
*
op
,
Node
*
output
,
...
...
@@ -782,6 +788,62 @@ void CPUQuantizePass::QuantizeElementwiseAdd(Graph* graph) const {
quantize_elementwise_add_count
);
}
void
CPUQuantizePass
::
QuantizeFusionGru
(
Graph
*
graph
)
const
{
GraphPatternDetector
gpd
;
patterns
::
FusionGru
pattern
{
gpd
.
mutable_pattern
(),
name_scope_
};
pattern
();
int
quantize_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"Quantize fusion_gru op"
;
GET_IR_NODE_FROM_SUBGRAPH
(
op
,
op
,
pattern
);
// skip if should not be quantized
if
(
!
platform
::
HasOpINT8DataType
(
op
->
Op
()))
{
LogQuantizationDisabled
(
op
);
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
x
,
x
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
weight_h
,
weight_h
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
weight_x
,
weight_x
,
pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
out
,
out
,
pattern
);
if
(
!
AreScalesPresentForNodes
(
op
,
{
x
,
weight_h
,
weight_x
}))
{
LogCannotQuantizeOp
(
op
);
return
;
}
bool
is_x_unsigned
{
false
};
auto
input_x_scale
=
GetScaleValueForNode
(
x
,
&
is_x_unsigned
);
double
input_x_shift
{
128.
};
if
(
is_x_unsigned
)
input_x_shift
=
0.
;
QuantizeInput
(
g
,
op
,
x
,
"X"
,
input_x_scale
,
is_x_unsigned
,
"Scale_data"
,
input_x_shift
,
"Shift_data"
);
auto
weight_scale_tensor
=
GetScaleTensorForNode
(
weight_x
);
EigenVectorArrayMap
eigen_tensor
{
weight_scale_tensor
.
data
<
double
>
(),
weight_scale_tensor
.
numel
(),
1
};
eigen_tensor
*=
static_cast
<
double
>
(
S8_MAX
);
std
::
vector
<
float
>
scale_weights
{
weight_scale_tensor
.
data
<
double
>
(),
weight_scale_tensor
.
data
<
double
>
()
+
weight_scale_tensor
.
numel
()};
op
->
Op
()
->
SetAttr
(
"Scale_weights"
,
scale_weights
);
// return fp32 data
op
->
Op
()
->
SetAttr
(
"force_fp32_output"
,
true
);
++
quantize_count
;
};
gpd
(
graph
,
handler
);
AddStatis
(
quantize_count
);
PrettyLogDetail
(
"--- quantized %d fusion_gru ops"
,
quantize_count
);
}
void
CPUQuantizePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
3
)
<<
"Quantizing the graph."
;
PADDLE_ENFORCE_NOT_NULL
(
...
...
@@ -801,6 +863,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizeReshape
(
graph
);
QuantizeMatmul
(
graph
);
QuantizeElementwiseAdd
(
graph
);
QuantizeFusionGru
(
graph
);
}
}
// namespace ir
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
浏览文件 @
966447e3
...
...
@@ -49,31 +49,26 @@ class CPUQuantizePass : public FusePassBase {
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
void
QuantizeConv
(
Graph
*
graph
,
bool
with_residual_data
=
false
)
const
;
void
QuantizeFc
(
Graph
*
graph
)
const
;
void
QuantizePool
(
Graph
*
graph
)
const
;
void
QuantizeConcat
(
Graph
*
graph
)
const
;
void
QuantizePriorBox
(
Graph
*
graph
)
const
;
void
QuantizeTranspose
(
Graph
*
graph
)
const
;
void
QuantizeReshape
(
Graph
*
graph
)
const
;
void
QuantizeMatmul
(
Graph
*
graph
)
const
;
void
QuantizeElementwiseAdd
(
Graph
*
graph
)
const
;
void
QuantizeFusionGru
(
Graph
*
graph
)
const
;
void
QuantizeInput
(
Graph
*
g
,
Node
*
op
,
Node
*
input
,
std
::
string
input_name
,
double
scale_to_one
,
bool
is_unsigned
,
std
::
string
scale_attr_name
=
""
)
const
;
double
scale_to_one
,
bool
is_input_unsigned
,
std
::
string
scale_attr_name
=
""
,
float
shift
=
0.0
,
std
::
string
shift_attr_name
=
""
)
const
;
// quantize all inputs of given name with the same (minimum) scale
void
QuantizeInputs
(
Graph
*
g
,
Node
*
op
,
std
::
string
input_name
,
bool
are_unsigned
,
std
::
string
scale_attr_name
=
""
)
const
;
bool
are_inputs_unsigned
,
std
::
string
scale_attr_name
=
""
,
float
shift
=
0.0
,
std
::
string
shift_attr_name
=
""
)
const
;
void
DequantizeOutput
(
Graph
*
g
,
Node
*
op
,
Node
*
output
,
std
::
string
output_name
,
double
scale_to_one
,
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
浏览文件 @
966447e3
...
...
@@ -91,6 +91,16 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op
->
SetAttr
(
"Scale_x"
,
1.0
f
);
op
->
SetAttr
(
"Scale_y"
,
1.0
f
);
op
->
SetAttr
(
"Scale_out"
,
1.0
f
);
}
else
if
(
type
==
"fusion_gru"
)
{
op
->
SetInput
(
"X"
,
{
inputs
[
0
]});
op
->
SetInput
(
"Bias"
,
{
inputs
[
1
]});
op
->
SetInput
(
"WeightX"
,
{
inputs
[
2
]});
op
->
SetInput
(
"WeightH"
,
{
inputs
[
3
]});
op
->
SetOutput
(
"Hidden"
,
{
outputs
[
0
]});
op
->
SetAttr
(
"mkldnn_data_type"
,
mkldnn_data_type
);
op
->
SetAttr
(
"Scale_data"
,
1.0
f
);
op
->
SetAttr
(
"Shift_data"
,
0.0
f
);
op
->
SetAttr
(
"Weight_scale"
,
std
::
vector
<
float
>
{
1.0
f
});
}
}
...
...
@@ -389,6 +399,77 @@ TEST(CpuQuantizePass, transpose) {
quant_count
,
dequant_count
,
added_nodes_count
,
2.0
f
*
127
);
}
static
const
std
::
initializer_list
<
std
::
string
>
variable_names_fusion_gru
=
{
"x"
,
"wx"
,
"wh"
,
"b"
,
"h"
};
// x->Fusion_gru->h
ProgramDesc
BuildProgramDescFusionGru
()
{
ProgramDesc
prog
;
for
(
auto
&
v
:
variable_names_transpose
)
{
auto
*
var
=
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
if
(
v
.
find
(
"wx"
)
==
0
||
v
.
find
(
"wh"
)
||
v
.
find
(
"b"
))
{
var
->
SetPersistable
(
true
);
}
}
SetOp
(
&
prog
,
"fusion_gru"
,
"Fusion_gru"
,
{
"x"
,
"wx"
,
"wh"
,
"b"
},
{
"h"
},
true
,
"int8"
);
return
prog
;
}
void
MainTestFusionGru
(
const
ProgramDesc
&
prog
,
int
gru_count
,
int
quant_count
,
int
dequant_count
,
int
added_nodes_count
,
float
scale
,
float
shift
)
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
int
original_nodes_num
,
current_nodes_num
;
PreparePass
(
&
graph
,
prog
,
variable_names_fusion_gru
,
&
original_nodes_num
,
&
current_nodes_num
);
int
quantize_nodes_count
=
0
;
int
dequantize_nodes_count
=
0
;
int
gru_nodes_count
=
0
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
())
{
auto
*
op
=
node
->
Op
();
if
(
op
->
Type
()
==
"fusion_gru"
)
{
gru_nodes_count
++
;
auto
op_name
=
BOOST_GET_CONST
(
std
::
string
,
op
->
GetAttr
(
"name"
));
EXPECT_EQ
(
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"Scale_data"
)),
scale
)
<<
"Scale_data for node '"
+
op_name
+
"'."
;
EXPECT_EQ
(
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"Shift_data"
)),
shift
)
<<
"Shift_data for node '"
+
op_name
+
"'."
;
EXPECT_EQ
(
BOOST_GET_CONST
(
std
::
vector
<
float
>
,
op
->
GetAttr
(
"Scale_weights"
))[
0
],
scale
)
<<
"Scale_weights for node '"
+
op_name
+
"'."
;
EXPECT_EQ
(
BOOST_GET_CONST
(
bool
,
op
->
GetAttr
(
"force_fp32_output"
)),
true
)
<<
"force_fp32_output for node '"
+
op_name
+
"'."
;
}
else
if
(
op
->
Type
()
==
"quantize"
)
{
quantize_nodes_count
++
;
}
else
if
(
op
->
Type
()
==
"dequantize"
)
{
dequantize_nodes_count
++
;
}
}
}
EXPECT_EQ
(
gru_nodes_count
,
gru_count
);
EXPECT_EQ
(
quantize_nodes_count
,
quant_count
);
EXPECT_EQ
(
dequantize_nodes_count
,
dequant_count
);
EXPECT_EQ
(
original_nodes_num
+
added_nodes_count
,
current_nodes_num
);
}
TEST
(
CpuQuantizePass
,
fusion_gru
)
{
// x->Fusion_gru->h
int
gru_count
=
1
;
int
quant_count
=
1
;
int
dequant_count
=
0
;
// 1 Quant + 1 IN + 0 DeQuant + 0 OUT
int
added_nodes_count
=
1
+
1
+
0
+
0
;
MainTestFusionGru
(
BuildProgramDescFusionGru
(),
gru_count
,
quant_count
,
dequant_count
,
added_nodes_count
,
2.
*
127
,
128.
);
}
static
const
std
::
initializer_list
<
std
::
string
>
variable_names_reshape
=
{
"a"
,
"w1"
,
"b"
,
"c"
,
"d"
,
"e"
,
"f"
};
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_squash_pass.cc
浏览文件 @
966447e3
...
...
@@ -76,6 +76,8 @@ void CPUQuantizeSquashPass::DequantQuantSquash(
BOOST_GET_CONST
(
float
,
dequant_op
->
Op
()
->
GetAttr
(
"Scale"
));
float
quant_scale
=
BOOST_GET_CONST
(
float
,
quant_op
->
Op
()
->
GetAttr
(
"Scale"
));
float
dequant_shift
=
dequant_op
->
Op
()
->
GetAttrIfExists
<
float
>
(
"Shift"
);
float
quant_shift
=
quant_op
->
Op
()
->
GetAttrIfExists
<
float
>
(
"Shift"
);
PADDLE_ENFORCE_NE
(
nodes_keep_counter
->
find
(
dequant_out
),
nodes_keep_counter
->
end
(),
platform
::
errors
::
NotFound
(
"The dequant output node is not found."
));
...
...
@@ -83,7 +85,7 @@ void CPUQuantizeSquashPass::DequantQuantSquash(
// check if dequantize op should be kept or removed, decrease the counter
bool
keep_dequant
=
(
*
nodes_keep_counter
)[
dequant_out
]
--
>
1
;
if
(
dequant_scale
==
quant_scale
)
{
if
(
dequant_scale
==
quant_scale
&&
dequant_shift
==
quant_shift
)
{
// squash dequantize-quantize to nothing
auto
quant_out_var_name
=
quant_out
->
Name
();
auto
next_op_inputs
=
next_op_desc
->
InputNames
();
...
...
@@ -110,7 +112,9 @@ void CPUQuantizeSquashPass::DequantQuantSquash(
desc
.
SetInput
(
"Input"
,
std
::
vector
<
std
::
string
>
({
dequant_in
->
Name
()}));
desc
.
SetOutput
(
"Output"
,
std
::
vector
<
std
::
string
>
({
quant_out
->
Name
()}));
desc
.
SetAttr
(
"Scale_in"
,
dequant_scale
);
desc
.
SetAttr
(
"Shift_in"
,
dequant_shift
);
desc
.
SetAttr
(
"Scale_out"
,
quant_scale
);
desc
.
SetAttr
(
"Shift_out"
,
quant_shift
);
auto
requant_op
=
g
->
CreateOpNode
(
&
desc
);
...
...
@@ -293,6 +297,7 @@ void CPUQuantizeSquashPass::MultipleQuantizeSquash(Graph* graph) const {
}));
auto
*
first_quant_out
=
first_quant_op
->
outputs
[
0
];
float
scale
=
first_quant_op
->
Op
()
->
GetAttrIfExists
<
float
>
(
"Scale"
);
float
shift
=
first_quant_op
->
Op
()
->
GetAttrIfExists
<
float
>
(
"Shift"
);
PADDLE_ENFORCE_NE
(
scale
,
0
,
platform
::
errors
::
InvalidArgument
(
...
...
@@ -302,7 +307,8 @@ void CPUQuantizeSquashPass::MultipleQuantizeSquash(Graph* graph) const {
auto
quant_op
=
prev_out
->
outputs
[
iter
];
if
(
quant_op
->
IsOp
()
&&
quant_op
->
Op
()
->
Type
()
==
"quantize"
&&
quant_op
->
id
()
!=
first_quant_op
->
id
()
&&
quant_op
->
Op
()
->
GetAttrIfExists
<
float
>
(
"Scale"
)
==
scale
)
{
quant_op
->
Op
()
->
GetAttrIfExists
<
float
>
(
"Scale"
)
==
scale
&&
quant_op
->
Op
()
->
GetAttrIfExists
<
float
>
(
"Shift"
)
==
shift
)
{
auto
quant_out
=
quant_op
->
outputs
[
0
];
auto
last_op
=
quant_out
->
outputs
[
0
];
...
...
paddle/fluid/operators/fused/mkldnn/fusion_gru_mkldnn_op.cc
浏览文件 @
966447e3
...
...
@@ -95,7 +95,7 @@ class GRUMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::gru_forward> {
// Create memory descriptors
auto
input_md
=
MKLDNNMemDesc
({
Ti
,
N
,
IC
},
MKLDNNGetDataType
<
T
>
(),
MKLDNNMemoryFormat
::
any
);
MKLDNNMemoryFormat
::
ntc
);
auto
weight_x_md
=
MKLDNNMemDesc
({
L
,
D
,
IC
,
G
,
OC
},
weights_dt
,
MKLDNNMemoryFormat
::
any
);
auto
weight_h_md
=
...
...
@@ -103,7 +103,7 @@ class GRUMKLDNNHandler : public platform::MKLDNNHandlerT<T, dnnl::gru_forward> {
auto
bias_md
=
MKLDNNMemDesc
({
L
,
D
,
G
,
OC
},
MKLDNNGetDataType
<
float
>
(),
MKLDNNMemoryFormat
::
ldgo
);
auto
hidden_md
=
MKLDNNMemDesc
({
Ti
,
N
,
OC
},
MKLDNNGetDataType
<
T_out
>
(),
MKLDNNMemoryFormat
::
any
);
MKLDNNMemoryFormat
::
ntc
);
auto
h0_md
=
MKLDNNMemDesc
({
L
,
D
,
N
,
OC
},
MKLDNNGetDataType
<
T
>
(),
MKLDNNMemoryFormat
::
ldnc
);
...
...
python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
浏览文件 @
966447e3
...
...
@@ -66,6 +66,7 @@ class Quant2Int8MkldnnPass(object):
self
.
_fc_ops
=
[
'fc'
]
self
.
_relu_ops
=
[
'relu'
,
'relu6'
]
self
.
_matmul_ops
=
[
'matmul'
]
self
.
_gru_ops
=
[
'fusion_gru'
]
self
.
_weight_scales
=
{}
# Collect the Input and Output sclaes from Fake quant models
self
.
_var_quant_scales
=
{}
...
...
@@ -449,8 +450,43 @@ class Quant2Int8MkldnnPass(object):
self
.
_var_quant_scales
[
weight_var_name
]
=
(
use_unsigned_int
,
lod_tensor
)
def
_compute_gru_weight_scales
(
wx_name
,
wh_name
):
for
op
in
graph
.
all_op_nodes
():
if
op
.
op
().
type
()
in
self
.
_gru_ops
:
wx_var_name
=
op
.
input
(
wx_name
)[
0
]
wh_var_name
=
op
.
input
(
wh_name
)[
0
]
wx
=
np
.
array
(
self
.
_load_param
(
self
.
_scope
,
wx_var_name
))
wh
=
np
.
array
(
self
.
_load_param
(
self
.
_scope
,
wh_var_name
))
OC
=
wh
.
shape
[
0
]
scale_ur
=
1.0
/
np
.
max
(
np
.
abs
(
np
.
concatenate
(
[
wx
[:,
:
2
*
OC
],
wh
.
flatten
()[:
2
*
OC
*
OC
]
.
reshape
(
OC
,
2
*
OC
)
],
axis
=
0
)),
axis
=
0
)
scale_o
=
1.0
/
np
.
max
(
np
.
abs
(
np
.
concatenate
(
[
wx
[:,
2
*
OC
:],
wh
.
flatten
()[
2
*
OC
*
OC
:]
.
reshape
(
OC
,
OC
)
],
axis
=
0
)),
axis
=
0
)
gru_weights_scale
=
np
.
concatenate
(
[
scale_ur
,
scale_o
]).
astype
(
'float'
)
lod_tensor
=
self
.
_convert_scale2tensor
(
gru_weights_scale
)
use_unsigned_int
=
False
self
.
_var_quant_scales
[
wx_var_name
]
=
(
use_unsigned_int
,
lod_tensor
)
_compute_var_scales
(
self
.
_conv_ops
,
"Filter"
,
axis
=
1
)
_compute_var_scales
(
self
.
_fc_ops
,
"W"
,
axis
=
0
)
_compute_var_scales
(
self
.
_gru_ops
,
"WeightH"
,
axis
=
0
)
_compute_gru_weight_scales
(
"WeightX"
,
"WeightH"
)
return
graph
def
_find_avg_pooling_ids
(
self
,
graph
):
...
...
python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
浏览文件 @
966447e3
...
...
@@ -98,18 +98,16 @@ function(download_quant_model install_dir data_file)
endif
()
endfunction
()
function
(
save_quant_ic_model_test target quant_model_dir
fp32_model_save_path
int8_model_save_path
)
function
(
save_quant_ic_model_test target quant_model_dir int8_model_save_path
)
py_test
(
${
target
}
SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/save_quant_model.py
ARGS --quant_model_path
${
quant_model_dir
}
--fp32_model_save_path
${
fp32_model_save_path
}
--int8_model_save_path
${
int8_model_save_path
}
--debug
)
endfunction
()
function
(
save_quant_nlp_model_test target quant_model_dir
fp32_model_save_path
int8_model_save_path ops_to_quantize
)
function
(
save_quant_nlp_model_test target quant_model_dir int8_model_save_path ops_to_quantize
)
py_test
(
${
target
}
SRCS
${
CMAKE_CURRENT_SOURCE_DIR
}
/save_quant_model.py
ARGS --quant_model_path
${
quant_model_dir
}
--fp32_model_save_path
${
fp32_model_save_path
}
--int8_model_save_path
${
int8_model_save_path
}
--ops_to_quantize
${
ops_to_quantize
}
)
endfunction
()
...
...
@@ -227,8 +225,6 @@ if(LINUX AND WITH_MKLDNN)
set
(
NLP_LABLES_PATH
"
${
NLP_DATA_DIR
}
/Ernie_dataset/label.xnli.dev"
)
download_quant_data
(
${
NLP_DATA_DIR
}
${
NLP_DATA_ARCHIVE
}
)
set
(
QUANT2_NLP_OPS_TO_QUANTIZE
"fc,reshape2,transpose2,matmul,elementwise_add"
)
# Quant2 Ernie
set
(
QUANT2_ERNIE_MODEL_ARCHIVE
"ernie_qat.tar.gz"
)
set
(
QUANT2_ERNIE_MODEL_DIR
"
${
QUANT_INSTALL_DIR
}
/Ernie_quant2"
)
...
...
@@ -236,17 +232,25 @@ if(LINUX AND WITH_MKLDNN)
set
(
FP32_ERNIE_MODEL_ARCHIVE
"ernie_fp32_model.tar.gz"
)
set
(
FP32_ERNIE_MODEL_DIR
"
${
QUANT_INSTALL_DIR
}
/Ernie_float"
)
download_quant_fp32_model
(
${
FP32_ERNIE_MODEL_DIR
}
${
FP32_ERNIE_MODEL_ARCHIVE
}
)
inference_quant2_int8_nlp_test
(
test_quant2_int8_ernie_mkldnn
${
QUANT2_ERNIE_MODEL_DIR
}
/Ernie_qat/float
${
FP32_ERNIE_MODEL_DIR
}
/ernie_fp32_model
${
NLP_DATA_PATH
}
${
NLP_LABLES_PATH
}
${
QUANT2_NLP_OPS_TO_QUANTIZE
}
)
set
(
QUANT2_ERNIE_OPS_TO_QUANTIZE
"fc,reshape2,transpose2,matmul,elementwise_add"
)
inference_quant2_int8_nlp_test
(
test_quant2_int8_ernie_mkldnn
${
QUANT2_ERNIE_MODEL_DIR
}
/Ernie_qat/float
${
FP32_ERNIE_MODEL_DIR
}
/ernie_fp32_model
${
NLP_DATA_PATH
}
${
NLP_LABLES_PATH
}
${
QUANT2_ERNIE_OPS_TO_QUANTIZE
}
)
# Quant2 GRU
set
(
QUANT2_GRU_MODEL_ARCHIVE
"GRU_quant_acc.tar.gz"
)
set
(
QUANT2_GRU_MODEL_DIR
"
${
QUANT_INSTALL_DIR
}
/GRU_quant2"
)
download_quant_model
(
${
QUANT2_GRU_MODEL_DIR
}
${
QUANT2_GRU_MODEL_ARCHIVE
}
)
set
(
QUANT2_GRU_OPS_TO_QUANTIZE
"fusion_gru"
)
### Save FP32 model or INT8 model from Quant model
set
(
QUANT2_INT8_RESNET50_SAVE_PATH
"
${
QUANT_INSTALL_DIR
}
/ResNet50_quant2_int8"
)
set
(
QUANT2_FP32_RESNET50_SAVE_PATH
"
${
QUANT_INSTALL_DIR
}
/ResNet50_quant2_fp32"
)
save_quant_ic_model_test
(
save_quant2_model_resnet50
${
QUANT2_RESNET50_MODEL_DIR
}
/ResNet50_qat_perf/float
${
QUANT2_FP32_RESNET50_SAVE_PATH
}
${
QUANT2_INT8_RESNET50_SAVE_PATH
}
)
save_quant_ic_model_test
(
save_quant2_model_resnet50
${
QUANT2_RESNET50_MODEL_DIR
}
/ResNet50_qat_perf/float
${
QUANT2_INT8_RESNET50_SAVE_PATH
}
)
set
(
QUANT2_INT8_ERNIE_SAVE_PATH
"
${
QUANT_INSTALL_DIR
}
/Ernie_quant2_int8"
)
set
(
QUANT2_FP32_ERNIE_SAVE_PATH
"
${
QUANT_INSTALL_DIR
}
/Ernie_quant2_fp32"
)
save_quant_nlp_model_test
(
save_quant2_model_ernie
${
QUANT2_ERNIE_MODEL_DIR
}
/Ernie_qat/float
${
QUANT2_FP32_ERNIE_SAVE_PATH
}
${
QUANT2_INT8_ERNIE_SAVE_PATH
}
${
QUANT2_NLP_OPS_TO_QUANTIZE
}
)
save_quant_nlp_model_test
(
save_quant2_model_ernie
${
QUANT2_ERNIE_MODEL_DIR
}
/Ernie_qat/float
${
QUANT2_INT8_ERNIE_SAVE_PATH
}
${
QUANT2_ERNIE_OPS_TO_QUANTIZE
}
)
set
(
QUANT2_INT8_GRU_SAVE_PATH
"
${
QUANT_INSTALL_DIR
}
/GRU_quant2_int8"
)
save_quant_nlp_model_test
(
save_quant2_model_gru
${
QUANT2_GRU_MODEL_DIR
}
/GRU_quant_acc
${
QUANT2_INT8_GRU_SAVE_PATH
}
${
QUANT2_GRU_OPS_TO_QUANTIZE
}
)
# Convert Quant2 model to dot and pdf files
set
(
QUANT2_INT8_ERNIE_DOT_SAVE_PATH
"
${
QUANT_INSTALL_DIR
}
/Ernie_quant2_int8_dot_file"
)
...
...
python/paddle/fluid/tests/unittests/mkldnn/test_fusion_gru_int8_mkldnn_op.py
浏览文件 @
966447e3
...
...
@@ -45,9 +45,10 @@ class TestFusionGRUINT8MKLDNNOp(OpTest):
# Input data
x_f32
=
np
.
random
.
rand
(
T
,
self
.
IC
).
astype
(
'float32'
)
*
2
-
1
scale_data
=
63
shift_data
=
64
x_u8
=
(
x_f32
*
scale_data
+
shift_data
).
astype
(
np
.
uint8
)
scale_data
=
63.0
shift_data
=
64.0
x_u8
=
np
.
rint
(
x_f32
*
scale_data
+
shift_data
).
astype
(
np
.
uint8
)
# x_u8 = (x_f32 * scale_data + shift_data).astype(np.uint8)
# WeightX/WeightH data
wx
=
np
.
random
.
rand
(
self
.
IC
,
3
*
self
.
OC
).
astype
(
'float32'
)
*
2
-
1
...
...
@@ -58,7 +59,8 @@ class TestFusionGRUINT8MKLDNNOp(OpTest):
# WeightX data shape in PP: [IC, 3 * OC]
# WeightH data shape in PP: [OC, 2 * OC] + [OC, OC]
# Scales shape in oneDNN: [3, OC]
scale_ur
=
63
/
np
.
max
(
np
.
abs
(
s8_max
=
127.0
scale_ur
=
s8_max
/
np
.
max
(
np
.
abs
(
np
.
concatenate
(
[
wx
[:,
:
2
*
self
.
OC
],
wh
.
flatten
()[:
2
*
self
.
OC
*
self
.
OC
]
...
...
@@ -66,7 +68,7 @@ class TestFusionGRUINT8MKLDNNOp(OpTest):
],
axis
=
0
)),
axis
=
0
)
scale_o
=
63
/
np
.
max
(
np
.
abs
(
scale_o
=
s8_max
/
np
.
max
(
np
.
abs
(
np
.
concatenate
(
[
wx
[:,
2
*
self
.
OC
:],
wh
.
flatten
()[
2
*
self
.
OC
*
self
.
OC
:]
...
...
@@ -102,7 +104,9 @@ class TestFusionGRUINT8MKLDNNOp(OpTest):
self
.
outputs
=
{
'Hidden'
:
(
hidden_f32
,
self
.
lod
)}
else
:
self
.
error_margin
=
1
hidden_u8
=
(
hidden_f32
*
scale_data
+
shift_data
).
astype
(
np
.
uint8
)
hidden_u8
=
np
.
rint
(
hidden_f32
*
scale_data
+
shift_data
).
astype
(
np
.
uint8
)
# hidden_u8 = (hidden_f32 * scale_data + shift_data).astype(np.uint8)
self
.
outputs
=
{
'Hidden'
:
(
hidden_u8
,
self
.
lod
)}
self
.
attrs
=
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录