Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
36abeff4
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
36abeff4
编写于
6月 30, 2020
作者:
S
Sylwester Fraczek
提交者:
GitHub
6月 30, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
adding elementwiseadd quantization (#25178)
上级
87a4a7ec
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
222 addition
and
14 deletion
+222
-14
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
+109
-7
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
+2
-0
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
+107
-7
paddle/fluid/inference/api/mkldnn_quantizer_config.cc
paddle/fluid/inference/api/mkldnn_quantizer_config.cc
+4
-0
未找到文件。
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc
浏览文件 @
36abeff4
...
@@ -37,10 +37,11 @@ void UnlinkNodes(ir::Node* a, ir::Node* b) {
...
@@ -37,10 +37,11 @@ void UnlinkNodes(ir::Node* a, ir::Node* b) {
b
->
inputs
.
end
());
b
->
inputs
.
end
());
}
}
void
LogCannotQuantizeOp
(
Node
*
op
)
{
void
LogCannotQuantizeOp
(
Node
*
op
,
const
char
*
details
=
nullptr
)
{
std
::
stringstream
msg_ss
;
std
::
stringstream
msg_ss
;
msg_ss
<<
"Cannot quantize operator "
<<
op
->
Name
()
msg_ss
<<
"Cannot quantize operator "
<<
op
->
Name
()
<<
" (type: "
<<
op
->
Op
()
->
Type
()
<<
", id: "
<<
op
->
id
()
<<
")."
;
<<
" (type: "
<<
op
->
Op
()
->
Type
()
<<
", id: "
<<
op
->
id
()
<<
")."
;
if
(
details
)
msg_ss
<<
" "
<<
details
;
PrettyLogDetail
(
msg_ss
.
str
().
c_str
());
PrettyLogDetail
(
msg_ss
.
str
().
c_str
());
}
}
...
@@ -51,6 +52,13 @@ void LogScaleIsMissingForVar(Node* var) {
...
@@ -51,6 +52,13 @@ void LogScaleIsMissingForVar(Node* var) {
PrettyLogDetail
(
msg_ss
.
str
().
c_str
());
PrettyLogDetail
(
msg_ss
.
str
().
c_str
());
}
}
void
LogQuantizationDisabled
(
Node
*
op
)
{
std
::
stringstream
msg_ss
;
VLOG
(
4
)
<<
"Qantization skipped for operator "
<<
op
->
Name
()
<<
" (type: "
<<
op
->
Op
()
->
Type
()
<<
", id: "
<<
op
->
id
()
<<
"). Attribute use_quantizer = false."
;
}
}
// namespace
}
// namespace
enum
{
U8_MAX
=
255
,
S8_MAX
=
127
};
enum
{
U8_MAX
=
255
,
S8_MAX
=
127
};
...
@@ -239,7 +247,10 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
...
@@ -239,7 +247,10 @@ void CPUQuantizePass::QuantizeConv(Graph* graph,
auto
*
conv_op_desc
=
conv_op
->
Op
();
auto
*
conv_op_desc
=
conv_op
->
Op
();
// skip if should not be quantized
// skip if should not be quantized
if
(
!
conv_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
return
;
if
(
!
conv_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
LogQuantizationDisabled
(
conv_op
);
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
conv_filter
,
conv_filter
,
conv_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
conv_filter
,
conv_filter
,
conv_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
conv_input
,
conv_input
,
conv_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
conv_input
,
conv_input
,
conv_pattern
);
...
@@ -333,9 +344,13 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
...
@@ -333,9 +344,13 @@ void CPUQuantizePass::QuantizeFc(Graph* graph) const {
auto
*
fc_op_desc
=
fc
->
Op
();
auto
*
fc_op_desc
=
fc
->
Op
();
// skip if should not be quantized
// skip if should not be quantized
if
(
fc_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
)
!=
true
||
if
(
!
fc_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
fc_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_mkldnn"
)
!=
true
)
LogQuantizationDisabled
(
fc
);
return
;
}
if
(
!
fc_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_mkldnn"
))
{
return
;
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
weights
,
weights
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
weights
,
weights
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
input
,
input
,
fc_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
input
,
input
,
fc_pattern
);
...
@@ -396,7 +411,10 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
...
@@ -396,7 +411,10 @@ void CPUQuantizePass::QuantizePool(Graph* graph) const {
auto
*
pool_op_desc
=
pool_op
->
Op
();
auto
*
pool_op_desc
=
pool_op
->
Op
();
// skip if should not be quantized
// skip if should not be quantized
if
(
!
pool_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
return
;
if
(
!
pool_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
LogQuantizationDisabled
(
pool_op
);
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
pool_input
,
pool_input
,
pool_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
pool_input
,
pool_input
,
pool_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
pool_output
,
pool_output
,
pool_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
pool_output
,
pool_output
,
pool_pattern
);
...
@@ -438,7 +456,10 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
...
@@ -438,7 +456,10 @@ void CPUQuantizePass::QuantizeConcat(Graph* graph) const {
auto
*
concat_op_desc
=
concat_op
->
Op
();
auto
*
concat_op_desc
=
concat_op
->
Op
();
// skip if should not be quantized
// skip if should not be quantized
if
(
!
concat_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
return
;
if
(
!
concat_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
LogQuantizationDisabled
(
concat_op
);
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
concat_out
,
concat_out
,
concat_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
concat_out
,
concat_out
,
concat_pattern
);
...
@@ -481,7 +502,10 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
...
@@ -481,7 +502,10 @@ void CPUQuantizePass::QuantizePriorBox(Graph* graph) const {
auto
*
prior_box_op_desc
=
prior_box_op
->
Op
();
auto
*
prior_box_op_desc
=
prior_box_op
->
Op
();
// skip if should not be quantized
// skip if should not be quantized
if
(
!
prior_box_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
return
;
if
(
!
prior_box_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
LogQuantizationDisabled
(
prior_box_op
);
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
prior_box_input
,
prior_box_input
,
GET_IR_NODE_FROM_SUBGRAPH
(
prior_box_input
,
prior_box_input
,
prior_box_pattern
);
prior_box_pattern
);
...
@@ -522,6 +546,7 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
...
@@ -522,6 +546,7 @@ void CPUQuantizePass::QuantizeTranspose(Graph* graph) const {
// skip if should not be quantized
// skip if should not be quantized
if
(
!
transpose_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
if
(
!
transpose_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
LogQuantizationDisabled
(
transpose_op
);
return
;
return
;
}
}
GET_IR_NODE_FROM_SUBGRAPH
(
prev_op
,
prev_op
,
transpose_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
prev_op
,
prev_op
,
transpose_pattern
);
...
@@ -576,6 +601,7 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
...
@@ -576,6 +601,7 @@ void CPUQuantizePass::QuantizeReshape(Graph* graph) const {
// skip if should not be quantized
// skip if should not be quantized
if
(
!
reshape_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
if
(
!
reshape_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
LogQuantizationDisabled
(
reshape_op
);
return
;
return
;
}
}
GET_IR_NODE_FROM_SUBGRAPH
(
prev_op
,
prev_op
,
reshape_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
prev_op
,
prev_op
,
reshape_pattern
);
...
@@ -628,6 +654,7 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
...
@@ -628,6 +654,7 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
// skip if should not be quantized
// skip if should not be quantized
if
(
!
matmul_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
if
(
!
matmul_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
LogQuantizationDisabled
(
matmul_op
);
return
;
return
;
}
}
GET_IR_NODE_FROM_SUBGRAPH
(
prev_op_x
,
prev_op_x
,
matmul_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
prev_op_x
,
prev_op_x
,
matmul_pattern
);
...
@@ -676,6 +703,80 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
...
@@ -676,6 +703,80 @@ void CPUQuantizePass::QuantizeMatmul(Graph* graph) const {
PrettyLogDetail
(
"--- quantized %d matmul ops"
,
quantize_matmul_count
);
PrettyLogDetail
(
"--- quantized %d matmul ops"
,
quantize_matmul_count
);
}
}
void
CPUQuantizePass
::
QuantizeElementwiseAdd
(
Graph
*
graph
)
const
{
GraphPatternDetector
gpd
;
auto
pattern
=
gpd
.
mutable_pattern
();
patterns
::
ElementwiseAdd
elementwise_add_pattern
{
pattern
,
name_scope_
};
elementwise_add_pattern
(
pattern
->
NewNode
(
elementwise_add_pattern
.
elementwise_add_x_repr
()),
pattern
->
NewNode
(
elementwise_add_pattern
.
elementwise_add_y_repr
()));
int
quantize_elementwise_add_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"Quantize elementwise_add op"
;
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add_op
,
elementwise_add_op
,
elementwise_add_pattern
);
auto
*
elementwise_add_op_desc
=
elementwise_add_op
->
Op
();
// skip if should not be quantized
if
(
!
elementwise_add_op_desc
->
GetAttrIfExists
<
bool
>
(
"use_quantizer"
))
{
LogQuantizationDisabled
(
elementwise_add_op
);
return
;
}
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add_x
,
elementwise_add_x
,
elementwise_add_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add_y
,
elementwise_add_y
,
elementwise_add_pattern
);
GET_IR_NODE_FROM_SUBGRAPH
(
elementwise_add_out
,
elementwise_add_out
,
elementwise_add_pattern
);
if
(
!
AreScalesPresentForNodes
(
elementwise_add_op
,
{
elementwise_add_x
,
elementwise_add_y
}))
{
LogCannotQuantizeOp
(
elementwise_add_op
);
return
;
}
bool
is_x_unsigned
{
false
},
is_y_unsigned
{
false
};
auto
input_x_scale
=
GetScaleValueForNode
(
elementwise_add_x
,
&
is_x_unsigned
);
auto
input_y_scale
=
GetScaleValueForNode
(
elementwise_add_y
,
&
is_y_unsigned
);
// TODO(sfraczek): add support for different signness
if
(
is_x_unsigned
!=
is_y_unsigned
)
{
LogCannotQuantizeOp
(
elementwise_add_op
,
"ElementwiseAdd inputs must be of the same type."
);
return
;
}
QuantizeInput
(
g
,
elementwise_add_op
,
elementwise_add_x
,
"X"
,
input_x_scale
,
is_x_unsigned
,
"Scale_x"
);
QuantizeInput
(
g
,
elementwise_add_op
,
elementwise_add_y
,
"Y"
,
input_y_scale
,
is_y_unsigned
,
"Scale_y"
);
// if quantization scale is missing for output tensor, return fp32 data
if
(
AreScalesPresentForNodes
(
elementwise_add_op
,
{
elementwise_add_out
}))
{
bool
is_output_unsigned
{
false
};
auto
output_scale
=
GetScaleValueForNode
(
elementwise_add_out
,
&
is_output_unsigned
);
DequantizeOutput
(
g
,
elementwise_add_op
,
elementwise_add_out
,
"Out"
,
output_scale
,
is_output_unsigned
,
"Scale_out"
);
}
else
{
elementwise_add_op
->
Op
()
->
SetAttr
(
"force_fp32_output"
,
true
);
}
++
quantize_elementwise_add_count
;
};
gpd
(
graph
,
handler
);
AddStatis
(
quantize_elementwise_add_count
);
PrettyLogDetail
(
"--- quantized %d elementwise_add ops"
,
quantize_elementwise_add_count
);
}
void
CPUQuantizePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
void
CPUQuantizePass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
3
)
<<
"Quantizing the graph."
;
VLOG
(
3
)
<<
"Quantizing the graph."
;
PADDLE_ENFORCE
(
graph
);
PADDLE_ENFORCE
(
graph
);
...
@@ -692,6 +793,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
...
@@ -692,6 +793,7 @@ void CPUQuantizePass::ApplyImpl(ir::Graph* graph) const {
QuantizeFc
(
graph
);
QuantizeFc
(
graph
);
QuantizeReshape
(
graph
);
QuantizeReshape
(
graph
);
QuantizeMatmul
(
graph
);
QuantizeMatmul
(
graph
);
QuantizeElementwiseAdd
(
graph
);
}
}
}
// namespace ir
}
// namespace ir
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h
浏览文件 @
36abeff4
...
@@ -60,6 +60,8 @@ class CPUQuantizePass : public FusePassBase {
...
@@ -60,6 +60,8 @@ class CPUQuantizePass : public FusePassBase {
void
QuantizeMatmul
(
Graph
*
graph
)
const
;
void
QuantizeMatmul
(
Graph
*
graph
)
const
;
void
QuantizeElementwiseAdd
(
Graph
*
graph
)
const
;
void
QuantizeInput
(
Graph
*
g
,
Node
*
op
,
Node
*
input
,
std
::
string
input_name
,
void
QuantizeInput
(
Graph
*
g
,
Node
*
op
,
Node
*
input
,
std
::
string
input_name
,
double
scale_to_one
,
bool
is_unsigned
,
double
scale_to_one
,
bool
is_unsigned
,
std
::
string
scale_attr_name
=
""
)
const
;
std
::
string
scale_attr_name
=
""
)
const
;
...
...
paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc
浏览文件 @
36abeff4
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h"
#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h"
#include <gtest/gtest.h>
#include <gtest/gtest.h>
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/place.h"
...
@@ -82,6 +83,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
...
@@ -82,6 +83,14 @@ void SetOp(ProgramDesc* prog, const std::string& type, const std::string& name,
op
->
SetAttr
(
"Scale_x"
,
1.0
f
);
op
->
SetAttr
(
"Scale_x"
,
1.0
f
);
op
->
SetAttr
(
"Scale_y"
,
1.0
f
);
op
->
SetAttr
(
"Scale_y"
,
1.0
f
);
op
->
SetAttr
(
"Scale_out"
,
1.0
f
);
op
->
SetAttr
(
"Scale_out"
,
1.0
f
);
}
else
if
(
type
==
"elementwise_add"
)
{
op
->
SetInput
(
"X"
,
{
inputs
[
0
]});
if
(
inputs
.
size
()
>
1
)
op
->
SetInput
(
"Y"
,
{
inputs
[
1
]});
op
->
SetOutput
(
"Out"
,
{
outputs
[
0
]});
op
->
SetAttr
(
"use_quantizer"
,
use_quantizer
);
op
->
SetAttr
(
"Scale_x"
,
1.0
f
);
op
->
SetAttr
(
"Scale_y"
,
1.0
f
);
op
->
SetAttr
(
"Scale_out"
,
1.0
f
);
}
}
}
}
...
@@ -95,7 +104,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
...
@@ -95,7 +104,8 @@ void InitTensorHolder(Scope* scope, const paddle::platform::Place& place,
void
PreparePass
(
std
::
unique_ptr
<
ir
::
Graph
>*
graph
,
const
ProgramDesc
&
prog
,
void
PreparePass
(
std
::
unique_ptr
<
ir
::
Graph
>*
graph
,
const
ProgramDesc
&
prog
,
const
std
::
initializer_list
<
std
::
string
>
variable_names
,
const
std
::
initializer_list
<
std
::
string
>
variable_names
,
int
*
original_nodes_num
,
int
*
current_nodes_num
,
int
*
original_nodes_num
,
int
*
current_nodes_num
,
std
::
string
var_without_scale
=
""
)
{
std
::
string
var_without_scale
=
""
,
std
::
string
var_signed
=
""
)
{
auto
place
=
paddle
::
platform
::
CPUPlace
();
auto
place
=
paddle
::
platform
::
CPUPlace
();
NaiveExecutor
exe
{
place
};
NaiveExecutor
exe
{
place
};
Scope
scope
;
Scope
scope
;
...
@@ -108,8 +118,7 @@ void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
...
@@ -108,8 +118,7 @@ void PreparePass(std::unique_ptr<ir::Graph>* graph, const ProgramDesc& prog,
tensor
.
Resize
({
1
});
tensor
.
Resize
({
1
});
auto
*
ptr
=
tensor
.
mutable_data
<
double
>
(
place
);
auto
*
ptr
=
tensor
.
mutable_data
<
double
>
(
place
);
ptr
[
0
]
=
2.0
;
ptr
[
0
]
=
2.0
;
(
*
scales
)[
v
]
=
std
::
make_pair
(
v
==
var_signed
,
std
::
move
(
tensor
));
(
*
scales
)[
v
]
=
std
::
make_pair
(
false
,
std
::
move
(
tensor
));
}
}
(
*
graph
)
->
SetNotOwned
(
kParamScopeAttr
,
&
scope
);
(
*
graph
)
->
SetNotOwned
(
kParamScopeAttr
,
&
scope
);
...
@@ -387,7 +396,7 @@ static const std::initializer_list<std::string> variable_names_reshape = {
...
@@ -387,7 +396,7 @@ static const std::initializer_list<std::string> variable_names_reshape = {
// c->Dropout->d
// c->Dropout->d
ProgramDesc
BuildProgramDescReshape
()
{
ProgramDesc
BuildProgramDescReshape
()
{
ProgramDesc
prog
;
ProgramDesc
prog
;
for
(
auto
&
v
:
variable_names_
transpos
e
)
{
for
(
auto
&
v
:
variable_names_
reshap
e
)
{
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
}
}
SetOp
(
&
prog
,
"dequantize"
,
"Dequantize1"
,
{
"a"
},
{
"b"
},
true
);
SetOp
(
&
prog
,
"dequantize"
,
"Dequantize1"
,
{
"a"
},
{
"b"
},
true
);
...
@@ -402,7 +411,7 @@ ProgramDesc BuildProgramDescReshape() {
...
@@ -402,7 +411,7 @@ ProgramDesc BuildProgramDescReshape() {
// c->Dropout->d
// c->Dropout->d
ProgramDesc
BuildProgramDescReshapeBetweenNonQuantizedOp
()
{
ProgramDesc
BuildProgramDescReshapeBetweenNonQuantizedOp
()
{
ProgramDesc
prog
;
ProgramDesc
prog
;
for
(
auto
&
v
:
variable_names_
transpos
e
)
{
for
(
auto
&
v
:
variable_names_
reshap
e
)
{
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
}
}
...
@@ -491,7 +500,7 @@ static const std::initializer_list<std::string> variable_names_matmul = {
...
@@ -491,7 +500,7 @@ static const std::initializer_list<std::string> variable_names_matmul = {
ProgramDesc
BuildProgramDescMatmul
()
{
ProgramDesc
BuildProgramDescMatmul
()
{
ProgramDesc
prog
;
ProgramDesc
prog
;
for
(
auto
&
v
:
variable_names_
transpose
)
{
for
(
auto
&
v
:
variable_names_
matmul
)
{
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
}
}
SetOp
(
&
prog
,
"dequantize"
,
"Dequantize1"
,
{
"a"
},
{
"b"
},
true
);
SetOp
(
&
prog
,
"dequantize"
,
"Dequantize1"
,
{
"a"
},
{
"b"
},
true
);
...
@@ -504,7 +513,7 @@ ProgramDesc BuildProgramDescMatmul() {
...
@@ -504,7 +513,7 @@ ProgramDesc BuildProgramDescMatmul() {
ProgramDesc
BuildProgramDescMatmulNotQuantized
()
{
ProgramDesc
BuildProgramDescMatmulNotQuantized
()
{
ProgramDesc
prog
;
ProgramDesc
prog
;
for
(
auto
&
v
:
variable_names_
transpose
)
{
for
(
auto
&
v
:
variable_names_
matmul
)
{
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
}
}
SetOp
(
&
prog
,
"dropout"
,
"Dropout"
,
{
"a"
},
{
"b"
},
false
);
SetOp
(
&
prog
,
"dropout"
,
"Dropout"
,
{
"a"
},
{
"b"
},
false
);
...
@@ -569,6 +578,97 @@ TEST(CpuQuantizePass, matmul_not_quantized) {
...
@@ -569,6 +578,97 @@ TEST(CpuQuantizePass, matmul_not_quantized) {
MainTestMatmul
(
BuildProgramDescMatmulNotQuantized
(),
matmul_count
,
MainTestMatmul
(
BuildProgramDescMatmulNotQuantized
(),
matmul_count
,
quant_count
,
dequant_count
,
added_nodes_count
,
1.0
f
);
quant_count
,
dequant_count
,
added_nodes_count
,
1.0
f
);
}
}
static
const
std
::
initializer_list
<
std
::
string
>
variable_names_elementwise_add
=
{
"a"
,
"b"
,
"c"
,
"d"
,
"e"
,
"f"
};
ProgramDesc
BuildProgramDescElementwiseAdd
()
{
ProgramDesc
prog
;
for
(
auto
&
v
:
variable_names_elementwise_add
)
{
prog
.
MutableBlock
(
0
)
->
Var
(
v
);
}
SetOp
(
&
prog
,
"dequantize"
,
"Dequantize1"
,
{
"a"
},
{
"b"
},
true
);
SetOp
(
&
prog
,
"dequantize"
,
"Dequantize2"
,
{
"c"
},
{
"d"
},
true
);
SetOp
(
&
prog
,
"elementwise_add"
,
"ElementwiseAdd"
,
{
"b"
,
"d"
},
{
"e"
},
true
,
true
);
SetOp
(
&
prog
,
"dropout"
,
"Dropout"
,
{
"e"
},
{
"f"
},
true
,
false
);
return
prog
;
}
void
MainTestElementwiseAdd
(
const
ProgramDesc
&
prog
,
int
elementwise_add_count
,
int
quant_count
,
int
dequant_count
,
int
added_nodes_count
,
float
scale
,
bool
output_scale_missing
=
false
,
bool
unsigned_and_signed_input
=
false
)
{
std
::
unique_ptr
<
ir
::
Graph
>
graph
(
new
ir
::
Graph
(
prog
));
int
original_nodes_num
,
current_nodes_num
;
PreparePass
(
&
graph
,
prog
,
variable_names_elementwise_add
,
&
original_nodes_num
,
&
current_nodes_num
,
output_scale_missing
?
"e"
:
""
,
unsigned_and_signed_input
?
"b"
:
""
);
int
quantize_nodes_count
=
0
;
int
dequantize_nodes_count
=
0
;
int
elementwise_add_nodes_count
=
0
;
for
(
auto
*
node
:
graph
->
Nodes
())
{
if
(
node
->
IsOp
())
{
auto
*
op
=
node
->
Op
();
if
(
op
->
Type
()
==
"elementwise_add"
)
{
elementwise_add_nodes_count
++
;
if
(
unsigned_and_signed_input
)
scale
=
1.0
f
;
auto
op_name
=
BOOST_GET_CONST
(
std
::
string
,
op
->
GetAttr
(
"name"
));
EXPECT_EQ
(
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"Scale_x"
)),
scale
)
<<
"Scale_x for node '"
+
op_name
+
"'."
;
EXPECT_EQ
(
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"Scale_y"
)),
scale
)
<<
"Scale_y for node '"
+
op_name
+
"'."
;
if
(
output_scale_missing
)
scale
=
1.0
;
EXPECT_EQ
(
BOOST_GET_CONST
(
float
,
op
->
GetAttr
(
"Scale_out"
)),
scale
)
<<
"Scale_out for node '"
+
op_name
+
"'."
;
}
else
if
(
op
->
Type
()
==
"quantize"
)
{
quantize_nodes_count
++
;
}
else
if
(
op
->
Type
()
==
"dequantize"
)
{
dequantize_nodes_count
++
;
}
}
}
EXPECT_EQ
(
elementwise_add_nodes_count
,
elementwise_add_count
);
EXPECT_EQ
(
quantize_nodes_count
,
quant_count
);
EXPECT_EQ
(
dequantize_nodes_count
,
dequant_count
);
EXPECT_EQ
(
original_nodes_num
+
added_nodes_count
,
current_nodes_num
);
}
TEST
(
CpuQuantizePass
,
elementwise_add
)
{
int
elementwise_add_count
=
1
;
int
quant_count
=
2
;
int
dequant_count
=
3
;
// 2 Quant + 2 IN + 1 DeQuant + 1 OUT
int
added_nodes_count
=
6
;
MainTestElementwiseAdd
(
BuildProgramDescElementwiseAdd
(),
elementwise_add_count
,
quant_count
,
dequant_count
,
added_nodes_count
,
2.0
f
*
127
);
}
TEST
(
CpuQuantizePass
,
elementwise_add_output_scale_missing
)
{
int
elementwise_add_count
=
1
;
int
quant_count
=
2
;
int
dequant_count
=
2
;
// 2 Quant + 2 IN
int
added_nodes_count
=
4
;
MainTestElementwiseAdd
(
BuildProgramDescElementwiseAdd
(),
elementwise_add_count
,
quant_count
,
dequant_count
,
added_nodes_count
,
2.0
f
*
127
,
true
);
}
TEST
(
CpuQuantizePass
,
elementwise_add_unsigned_and_signed_input
)
{
int
elementwise_add_count
=
1
;
int
quant_count
=
0
;
int
dequant_count
=
2
;
int
added_nodes_count
=
0
;
MainTestElementwiseAdd
(
BuildProgramDescElementwiseAdd
(),
elementwise_add_count
,
quant_count
,
dequant_count
,
added_nodes_count
,
2.0
f
*
127
,
false
,
true
);
}
}
// namespace
}
// namespace
}
// namespace ir
}
// namespace ir
...
...
paddle/fluid/inference/api/mkldnn_quantizer_config.cc
浏览文件 @
36abeff4
...
@@ -49,6 +49,10 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
...
@@ -49,6 +49,10 @@ MkldnnQuantizerConfig::MkldnnQuantizerConfig() {
rules_
[
"matmul"
][
"Y"
]
=
ScaleAlgo
::
KL
;
rules_
[
"matmul"
][
"Y"
]
=
ScaleAlgo
::
KL
;
rules_
[
"matmul"
][
"Out"
]
=
ScaleAlgo
::
KL
;
rules_
[
"matmul"
][
"Out"
]
=
ScaleAlgo
::
KL
;
rules_
[
"elementwise_add"
][
"X"
]
=
ScaleAlgo
::
KL
;
rules_
[
"elementwise_add"
][
"Y"
]
=
ScaleAlgo
::
KL
;
rules_
[
"elementwise_add"
][
"Out"
]
=
ScaleAlgo
::
KL
;
// Reshape2 does not perform calculation on the data and shapes are not
// Reshape2 does not perform calculation on the data and shapes are not
// changed. Scale is calculated on input data and assign to Quantize and
// changed. Scale is calculated on input data and assign to Quantize and
// Dequantize scale.
// Dequantize scale.
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录