Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
f836c8aa
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f836c8aa
编写于
3月 28, 2020
作者:
W
Wojciech Uss
提交者:
GitHub
3月 28, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add check for scales and a message (#23119)
上级
8bfd62ff
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
113 addition
and
53 deletion
+113
-53
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
+55
-50
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
+6
-1
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+39
-1
python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py
...uid/contrib/slim/quantization/quantization_mkldnn_pass.py
+13
-1
未找到文件。
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
浏览文件 @
f836c8aa
...
...
@@ -85,7 +85,7 @@ void CPUQuantizePass::QuantizeInput(Graph* g, Node* op, Node* input,
}
void
CPUQuantizePass
::
QuantizeInputs
(
Graph
*
g
,
Node
*
op
,
std
::
string
input_name
,
VarQuantScale
*
scales
,
bool
are_unsigned
,
bool
are_unsigned
,
std
::
string
scale_attr_name
)
const
{
auto
inputs
=
op
->
inputs
;
auto
output
=
op
->
outputs
[
0
];
...
...
@@ -99,7 +99,7 @@ void CPUQuantizePass::QuantizeInputs(Graph* g, Node* op, std::string input_name,
std
::
vector
<
Node
*>
quantize_out_nodes
(
inputs
.
size
());
std
::
vector
<
std
::
string
>
quantize_out_node_names
(
inputs
.
size
());
double
scale_out
=
(
*
scales
)[
output
->
Name
()].
second
.
data
<
double
>
()[
0
]
;
double
scale_out
=
GetScaleValueForNode
(
output
)
;
unsigned
max
=
are_unsigned
?
U8_MAX
:
S8_MAX
;
float
scale
=
scale_out
*
max
;
...
...
@@ -169,6 +169,27 @@ void CPUQuantizePass::DequantizeOutput(Graph* g, Node* op, Node* output,
if
(
!
scale_attr_name
.
empty
())
op
->
Op
()
->
SetAttr
(
scale_attr_name
,
scale
);
}
std
::
pair
<
bool
,
LoDTensor
>
CPUQuantizePass
::
GetScaleDataForNode
(
const
Node
*
node
)
const
{
auto
&
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
PADDLE_ENFORCE_EQ
(
scales
.
count
(
node
->
Name
()),
1
,
platform
::
errors
::
InvalidArgument
(
"Quantization scale for the variable %s is missing."
,
node
->
Name
()));
return
scales
[
node
->
Name
()];
}
LoDTensor
CPUQuantizePass
::
GetScaleTensorForNode
(
const
Node
*
node
)
const
{
return
GetScaleDataForNode
(
node
).
second
;
}
double
CPUQuantizePass
::
GetScaleValueForNode
(
const
Node
*
node
,
bool
*
is_unsigned
)
const
{
auto
scale_data
=
GetScaleDataForNode
(
node
);
if
(
is_unsigned
!=
nullptr
)
*
is_unsigned
=
scale_data
.
first
;
return
scale_data
.
second
.
data
<
double
>
()[
0
];
}
void
CPUQuantizePass
::
QuantizeConv
(
Graph
*
graph
,
bool
with_residual_data
)
const
{
GraphPatternDetector
gpd
;
...
...
@@ -190,15 +211,12 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
GET_IR_NODE_FROM_SUBGRAPH
(
conv_input
,
conv_input
,
conv_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
conv_output
,
conv_output
,
conv_pattern
);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
auto
input_scale
=
scales
[
conv_input
->
Name
()].
second
.
data
<
double
>
()[
0
];
bool
is_input_unsigned
=
scales
[
conv_input
->
Name
()].
first
;
bool
is_input_unsigned
{
false
};
auto
input_scale
=
GetScaleValueForNode
(
conv_input
,
&
is_input_unsigned
);
QuantizeInput
(
g
,
conv_op
,
conv_input
,
"Input"
,
input_scale
,
is_input_unsigned
,
"Scale_in"
);
auto
filter_scale_tensor
=
scales
[
conv_filter
->
Name
()].
second
;
auto
filter_scale_tensor
=
GetScaleTensorForNode
(
conv_filter
)
;
EigenVectorArrayMap
eigen_tensor
{
filter_scale_tensor
.
data
<
double
>
(),
filter_scale_tensor
.
numel
(),
1
};
eigen_tensor
*=
static_cast
<
double
>
(
S8_MAX
);
...
...
@@ -211,16 +229,16 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
if
(
with_residual_data
)
{
GET_IR_NODE_FROM_SUBGRAPH
(
conv_residual_data
,
conv_residual_data
,
conv_pattern
);
bool
is_residual_unsigned
{
false
};
auto
residual_scale
=
scales
[
conv_residual_data
->
Name
()].
second
.
data
<
double
>
()[
0
];
bool
is_residual_unsigned
=
scales
[
conv_residual_data
->
Name
()].
first
;
GetScaleValueForNode
(
conv_residual_data
,
&
is_residual_unsigned
);
QuantizeInput
(
g
,
conv_op
,
conv_residual_data
,
"ResidualData"
,
residual_scale
,
is_residual_unsigned
,
"Scale_in_eltwise"
);
}
auto
output_scale
=
scales
[
conv_output
->
Name
()].
second
.
data
<
double
>
()[
0
]
;
bool
is_output_unsigned
=
scales
[
conv_output
->
Name
()].
first
;
bool
is_output_unsigned
{
false
}
;
auto
output_scale
=
GetScaleValueForNode
(
conv_output
,
&
is_output_unsigned
)
;
DequantizeOutput
(
g
,
conv_op
,
conv_output
,
"Output"
,
output_scale
,
is_output_unsigned
,
"Scale_out"
);
...
...
@@ -270,15 +288,12 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
input
,
input
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
output
,
output
,
fc_pattern
);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
auto
input_scale
=
scales
[
input
->
Name
()].
second
.
data
<
double
>
()[
0
];
bool
is_input_unsigned
=
scales
[
input
->
Name
()].
first
;
bool
is_input_unsigned
{
false
};
auto
input_scale
=
GetScaleValueForNode
(
input
,
&
is_input_unsigned
);
QuantizeInput
(
g
,
fc
,
input
,
"Input"
,
input_scale
,
is_input_unsigned
,
"Scale_in"
);
auto
weight_scale_tensor
=
scales
[
weights
->
Name
()].
second
;
auto
weight_scale_tensor
=
GetScaleTensorForNode
(
weights
)
;
EigenVectorArrayMap
eigen_tensor
{
weight_scale_tensor
.
data
<
double
>
(),
weight_scale_tensor
.
numel
(),
1
};
eigen_tensor
*=
static_cast
<
double
>
(
S8_MAX
);
...
...
@@ -288,8 +303,8 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
fc
->
Op
()
->
SetAttr
(
"Scale_weights"
,
filter_scale
);
auto
output_scale
=
scales
[
output
->
Name
()].
second
.
data
<
double
>
()[
0
]
;
bool
is_output_unsigned
=
scales
[
output
->
Name
()].
first
;
bool
is_output_unsigned
{
false
}
;
auto
output_scale
=
GetScaleValueForNode
(
output
,
&
is_output_unsigned
)
;
DequantizeOutput
(
g
,
fc
,
output
,
"Out"
,
output_scale
,
is_output_unsigned
,
"Scale_out"
);
...
...
@@ -323,15 +338,12 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
pool_input
,
pool_input
,
pool_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
pool_output
,
pool_output
,
pool_pattern
);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
auto
input_scale
=
scales
[
pool_input
->
Name
()].
second
.
data
<
double
>
()[
0
];
bool
is_input_unsigned
=
scales
[
pool_input
->
Name
()].
first
;
bool
is_input_unsigned
{
false
};
auto
input_scale
=
GetScaleValueForNode
(
pool_input
,
&
is_input_unsigned
);
QuantizeInput
(
g
,
pool_op
,
pool_input
,
"X"
,
input_scale
,
is_input_unsigned
);
auto
output_scale
=
scales
[
pool_output
->
Name
()].
second
.
data
<
double
>
()[
0
]
;
bool
is_output_unsigned
=
scales
[
pool_output
->
Name
()].
first
;
bool
is_output_unsigned
{
false
}
;
auto
output_scale
=
GetScaleValueForNode
(
pool_output
,
&
is_output_unsigned
)
;
DequantizeOutput
(
g
,
pool_op
,
pool_output
,
"Out"
,
output_scale
,
is_output_unsigned
);
...
...
@@ -362,15 +374,13 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
concat_out
,
concat_out
,
concat_pattern
);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
// if all inputs were unsigned, then the output was set to unsigned
// during the scale calculation step
bool
are_all_inputs_unsigned
=
scales
[
concat_out
->
Name
()].
first
;
QuantizeInputs
(
g
,
concat_op
,
"X"
,
&
scales
,
are_all_inputs_unsigned
);
bool
are_all_inputs_unsigned
{
false
};
auto
output_scale
=
GetScaleValueForNode
(
concat_out
,
&
are_all_inputs_unsigned
);
auto
output_scale
=
scales
[
concat_out
->
Name
()].
second
.
data
<
double
>
()[
0
]
;
QuantizeInputs
(
g
,
concat_op
,
"X"
,
are_all_inputs_unsigned
)
;
DequantizeOutput
(
g
,
concat_op
,
concat_out
,
"Out"
,
output_scale
,
are_all_inputs_unsigned
);
...
...
@@ -403,11 +413,9 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
prior_box_input
,
prior_box_input
,
prior_box_pattern
);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
auto
input_scale
=
scales
[
prior_box_input
->
Name
()].
second
.
data
<
double
>
()[
0
];
bool
is_input_unsigned
=
scales
[
prior_box_input
->
Name
()].
first
;
bool
is_input_unsigned
{
false
};
auto
input_scale
=
GetScaleValueForNode
(
prior_box_input
,
&
is_input_unsigned
);
QuantizeInput
(
g
,
prior_box_op
,
prior_box_input
,
"Input"
,
input_scale
,
is_input_unsigned
);
...
...
@@ -451,15 +459,14 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
transpose_in
,
transpose_in
,
transpose_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
transpose_out
,
transpose_out
,
transpose_pattern
);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
auto
input_scale
=
scales
[
transpose_in
->
Name
()].
second
.
data
<
double
>
()[
0
];
bool
is_input_unsigned
=
scales
[
transpose_in
->
Name
()].
first
;
bool
is_input_unsigned
{
false
};
auto
input_scale
=
GetScaleValueForNode
(
transpose_in
,
&
is_input_unsigned
);
QuantizeInput
(
g
,
transpose_op
,
transpose_in
,
"X"
,
input_scale
,
is_input_unsigned
);
auto
output_scale
=
scales
[
transpose_out
->
Name
()].
second
.
data
<
double
>
()[
0
];
bool
is_output_unsigned
=
scales
[
transpose_out
->
Name
()].
first
;
bool
is_output_unsigned
{
false
};
auto
output_scale
=
GetScaleValueForNode
(
transpose_out
,
&
is_output_unsigned
);
DequantizeOutput
(
g
,
transpose_op
,
transpose_out
,
"Out"
,
output_scale
,
is_output_unsigned
);
...
...
@@ -504,15 +511,13 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
GET_IR_NODE_FROM_SUBGRAPH
(
reshape_in
,
reshape_in
,
reshape_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
reshape_out
,
reshape_out
,
reshape_pattern
);
// get scales calculated after warmup, they scale variables to MAX=1.0
auto
scales
=
Get
<
VarQuantScale
>
(
"quant_var_scales"
);
auto
input_scale
=
scales
[
reshape_in
->
Name
()].
second
.
data
<
double
>
()[
0
];
bool
is_input_unsigned
=
scales
[
reshape_in
->
Name
()].
first
;
bool
is_input_unsigned
{
false
};
auto
input_scale
=
GetScaleValueForNode
(
reshape_in
,
&
is_input_unsigned
);
QuantizeInput
(
g
,
reshape_op
,
reshape_in
,
"X"
,
input_scale
,
is_input_unsigned
);
auto
output_scale
=
scales
[
reshape_out
->
Name
()].
second
.
data
<
double
>
()[
0
]
;
bool
is_output_unsigned
=
scales
[
reshape_out
->
Name
()].
first
;
bool
is_output_unsigned
{
false
}
;
auto
output_scale
=
GetScaleValueForNode
(
reshape_out
,
&
is_output_unsigned
)
;
DequantizeOutput
(
g
,
reshape_op
,
reshape_out
,
"Out"
,
output_scale
,
is_output_unsigned
);
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
浏览文件 @
f836c8aa
...
...
@@ -64,7 +64,7 @@ class CPUQuantizePass : public FusePassBase {
// quantize all inputs of given name with the same (minimum) scale
void
QuantizeInputs
(
Graph
*
g
,
Node
*
op
,
std
::
string
input_name
,
VarQuantScale
*
scales
,
bool
are_unsigned
,
bool
are_unsigned
,
std
::
string
scale_attr_name
=
""
)
const
;
void
DequantizeOutput
(
Graph
*
g
,
Node
*
op
,
Node
*
output
,
...
...
@@ -72,6 +72,11 @@ class CPUQuantizePass : public FusePassBase {
bool
is_unsigned
,
std
::
string
scale_attr_name
=
""
)
const
;
std
::
pair
<
bool
,
LoDTensor
>
GetScaleDataForNode
(
const
Node
*
node
)
const
;
LoDTensor
GetScaleTensorForNode
(
const
Node
*
node
)
const
;
double
GetScaleValueForNode
(
const
Node
*
node
,
bool
*
is_unsigned
=
nullptr
)
const
;
const
std
::
string
name_scope_
{
"quantize"
};
};
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
浏览文件 @
f836c8aa
...
...
@@ -86,13 +86,15 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
void
PreparePass
(
std
::
unique_ptr
<
ir
::
Graph
>*
graph
,
const
ProgramDesc
&
prog
,
const
std
::
initializer_list
<
std
::
string
>
variable_names
,
int
*
original_nodes_num
,
int
*
current_nodes_num
)
{
int
*
original_nodes_num
,
int
*
current_nodes_num
,
std
::
string
var_without_scale
=
""
)
{
auto
place
=
paddle
::
platform
::
CPUPlace
();
NaiveExecutor
exe
{
place
};
Scope
scope
;
exe
.
CreateVariables
(
prog
,
0
,
true
,
&
scope
);
auto
*
scales
=
new
VarQuantScale
();
for
(
auto
&
v
:
variable_names
)
{
if
(
v
.
compare
(
var_without_scale
)
==
0
)
continue
;
InitTensorHolder
(
&
scope
,
place
,
v
.
c_str
());
LoDTensor
tensor
;
tensor
.
Resize
({
1
});
...
...
@@ -475,6 +477,42 @@ TEST(CpuQuantizePass, reshapeBetweenNonQuantizedOp) {
transpose_count
,
reshape_count
,
quant_count
,
dequant_count
,
added_nodes_count
,
2.0
f
*
127
);
}
void
MainTestCheckScales
(
const
ProgramDesc
&
prog
,
const
std
::
initializer_list
<
std
::
string
>
variable_names
,
const
std
::
string
&
var_without_scale
)
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
std
::
stringstream
error_msg_ss
;
error_msg_ss
<<
"Quantization scale for the variable "
<<
var_without_scale
<<
" is missing."
;
bool
caught_exception
=
false
;
try
{
int
original_nodes_num
,
current_nodes_num
;
PreparePass
(
&
graph
,
prog
,
variable_names
,
&
original_nodes_num
,
&
current_nodes_num
,
var_without_scale
);
}
catch
(
paddle
::
platform
::
EnforceNotMet
&
error
)
{
caught_exception
=
true
;
std
::
string
ex_msg
=
error
.
what
();
EXPECT_NE
(
ex_msg
.
find
(
error_msg_ss
.
str
()),
std
::
string
::
npos
);
}
EXPECT_TRUE
(
caught_exception
);
}
// (a, w)->Conv->o
ProgramDesc
BuildProgramDescCheckScalesConv
()
{
ProgramDesc
prog
;
SetOp
(
&
prog
,
"conv2d"
,
"Conv"
,
{
"a"
,
"w"
},
{
"o"
},
true
,
true
);
return
prog
;
}
// Check if an exception with a proper message is thrown when quantization scale
// is missing for a variable
TEST
(
CPUQuantizePass
,
check_scales
)
{
const
std
::
initializer_list
<
std
::
string
>
var_names
=
{
"a"
,
"w"
,
"o"
};
MainTestCheckScales
(
BuildProgramDescCheckScalesConv
(),
var_names
,
"a"
);
}
}
// namespace
}
// namespace ir
...
...
python/paddle/fluid/contrib/slim/quantization/quantization_mkldnn_pass.py
浏览文件 @
f836c8aa
...
...
@@ -423,8 +423,11 @@ class Qat2Int8MkldnnPass(object):
return
waiting_for_scale
waiting_for_scale
=
_update_scales
(
graph
)
waiting_for_scale_prev
=
set
()
while
len
(
waiting_for_scale
)
!=
0
:
while
len
(
waiting_for_scale
)
!=
0
and
waiting_for_scale
!=
waiting_for_scale_prev
:
waiting_for_scale_prev
=
waiting_for_scale
waiting_for_scale
=
_update_scales
(
graph
)
return
graph
...
...
@@ -547,7 +550,16 @@ class Qat2Int8MkldnnPass(object):
tensor
=
self
.
_scope
.
find_var
(
name
).
get_tensor
()
tensor
.
set
(
array
,
self
.
_place
)
def
_remove_ctrl_vars
(
self
,
graph
):
remove_ctr_vars
=
set
()
for
node
in
graph
.
all_var_nodes
():
if
node
.
is_ctrl_var
():
remove_ctr_vars
.
add
(
node
)
graph
.
safe_remove_nodes
(
remove_ctr_vars
)
return
graph
def
_optimize_fp32_graph
(
self
,
graph
):
graph
=
self
.
_remove_ctrl_vars
(
graph
)
graph
=
self
.
_apply_pass
(
graph
,
'mkldnn_placement_pass'
,
[
'mkldnn_enabled_op_types'
],
[
set
()])
if
self
.
_is_conv_quantized
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录