Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c00d869b
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c00d869b
编写于
4月 10, 2022
作者:
B
baoachun
提交者:
GitHub
4月 10, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add mkldnn compute_propagate_scales int8 pass (#41592)
上级
a78ca1cf
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
531 addition
and
0 deletion
+531
-0
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-0
paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc
...amework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc
+438
-0
paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h
...ramework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h
+92
-0
未找到文件。
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
c00d869b
...
@@ -141,6 +141,7 @@ if(WITH_MKLDNN)
...
@@ -141,6 +141,7 @@ if(WITH_MKLDNN)
pass_library
(
multi_gru_fuse_pass inference DIR mkldnn
)
pass_library
(
multi_gru_fuse_pass inference DIR mkldnn
)
pass_library
(
multi_gru_seq_fuse_pass inference DIR mkldnn
)
pass_library
(
multi_gru_seq_fuse_pass inference DIR mkldnn
)
pass_library
(
quant_dequant_mkldnn_pass inference DIR mkldnn
)
pass_library
(
quant_dequant_mkldnn_pass inference DIR mkldnn
)
pass_library
(
compute_propagate_scales_mkldnn_pass inference DIR mkldnn
)
endif
()
endif
()
if
(
WITH_IPU
)
if
(
WITH_IPU
)
...
...
paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc
0 → 100644
浏览文件 @
c00d869b
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <float.h>
#include <algorithm>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h"
#include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h"
#include "paddle/fluid/framework/op_version_registry.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
void
ComputePropagateScalesMkldnnPass
::
GetTensorFromVector
(
const
std
::
vector
<
float
>&
data_v
,
Tensor
*
tensor
)
const
{
const
int
size
=
static_cast
<
int
>
(
data_v
.
size
());
auto
*
data
=
tensor
->
mutable_data
<
float
>
({
size
},
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
size
;
i
++
)
{
data
[
i
]
=
data_v
[
i
];
}
}
void
ComputePropagateScalesMkldnnPass
::
GetQuantInfo
(
ir
::
Graph
*
graph
,
StringPairMap
*
var_quant_scales
)
const
{
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
float
>>
info_map
{};
GetInfoFromTheFirstOp
(
graph
,
"has_quant_info"
,
"var_quant_scales"
,
&
info_map
);
for
(
auto
iter
=
info_map
.
begin
();
iter
!=
info_map
.
end
();
iter
++
)
{
Tensor
tensor
;
GetTensorFromVector
(
iter
->
second
,
&
tensor
);
auto
pair
=
std
::
make_pair
(
false
,
tensor
);
var_quant_scales
->
insert
(
std
::
make_pair
(
iter
->
first
,
pair
));
}
}
std
::
vector
<
float
>
ComputePropagateScalesMkldnnPass
::
GetScales
(
Tensor
*
tensor
,
int
axis
)
const
{
PADDLE_ENFORCE_LT
(
axis
,
2
,
platform
::
errors
::
InvalidArgument
(
"The input axis is required to be less than 2."
));
auto
*
data
=
tensor
->
data
<
float
>
();
const
auto
dims
=
tensor
->
dims
();
PADDLE_ENFORCE_EQ
(
dims
.
size
(),
2
,
platform
::
errors
::
InvalidArgument
(
"The input tensor's rank is required to be 2."
));
const
int
rows
=
dims
.
at
(
0
);
const
int
columns
=
dims
.
at
(
1
);
std
::
vector
<
float
>
scales
;
if
(
axis
==
0
)
{
for
(
int
i
=
0
;
i
<
columns
;
i
++
)
{
float
max_value
=
FLT_MIN
;
for
(
int
j
=
0
;
j
<
rows
;
j
++
)
{
max_value
=
std
::
max
(
max_value
,
std
::
abs
(
data
[
i
+
j
*
columns
]));
}
max_value
=
1.0
/
max_value
;
if
(
std
::
isinf
(
max_value
)
||
std
::
isnan
(
max_value
))
{
max_value
=
0.0
;
}
scales
.
push_back
(
max_value
);
}
}
else
{
for
(
int
i
=
0
;
i
<
rows
;
i
++
)
{
float
max_value
=
FLT_MIN
;
for
(
int
j
=
i
*
columns
;
j
<
(
i
+
1
)
*
columns
;
j
++
)
{
max_value
=
std
::
max
(
max_value
,
std
::
abs
(
data
[
j
]));
}
max_value
=
1.0
/
max_value
;
if
(
std
::
isinf
(
max_value
)
||
std
::
isnan
(
max_value
))
{
max_value
=
0.0
;
}
scales
.
push_back
(
max_value
);
}
}
return
scales
;
}
void
ComputePropagateScalesMkldnnPass
::
ComputeVarScales
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
const
std
::
unordered_set
<
std
::
string
>&
ops
,
const
std
::
string
&
weight_name
,
const
int
axis
,
StringPairMap
*
var_quant_scales
)
const
{
for
(
auto
*
op_node
:
ir
::
TopologyVarientSort
(
*
graph
,
static_cast
<
ir
::
SortKind
>
(
0
)))
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
*
op_desc
=
op_node
->
Op
();
if
(
ops
.
count
(
op_desc
->
Type
()))
{
auto
var_name
=
op_desc
->
Input
(
weight_name
)[
0
];
auto
*
var
=
scope
->
FindVar
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var
,
platform
::
errors
::
NotFound
(
"The input persistable var [%s] of [%s] op is not found."
,
var_name
,
op_desc
->
Type
()));
auto
*
weight_tensor
=
var
->
GetMutable
<
LoDTensor
>
();
const
auto
dims
=
weight_tensor
->
dims
();
int
volume
=
1
;
for
(
int
i
=
1
;
i
<
dims
.
size
();
i
++
)
{
volume
*=
dims
[
i
];
}
Tensor
tmp_tensor
;
std
::
vector
<
int64_t
>
reshape_dims
=
{
dims
[
0
],
volume
};
tmp_tensor
.
Resize
(
phi
::
make_ddim
(
reshape_dims
));
auto
*
weight_data
=
weight_tensor
->
data
<
float
>
();
auto
*
tmp_data
=
tmp_tensor
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
weight_tensor
->
numel
();
i
++
)
{
tmp_data
[
i
]
=
std
::
abs
(
weight_data
[
i
]);
}
auto
scales_v
=
GetScales
(
&
tmp_tensor
,
axis
);
Tensor
tensor
;
GetTensorFromVector
(
scales_v
,
&
tensor
);
auto
pair
=
std
::
make_pair
(
false
,
tensor
);
var_quant_scales
->
insert
(
std
::
make_pair
(
var_name
,
pair
));
}
}
}
void
ComputePropagateScalesMkldnnPass
::
ComputeSingleGruWeightScales
(
Scope
*
scope
,
const
std
::
string
&
wx_var_name
,
const
std
::
string
&
wh_var_name
,
Tensor
*
tensor
)
const
{
auto
*
wx_var
=
scope
->
FindVar
(
wx_var_name
);
PADDLE_ENFORCE_NOT_NULL
(
wx_var
,
platform
::
errors
::
NotFound
(
"The input persistable var [%s] is not found."
,
wx_var_name
));
auto
*
wh_var
=
scope
->
FindVar
(
wh_var_name
);
PADDLE_ENFORCE_NOT_NULL
(
wh_var
,
platform
::
errors
::
NotFound
(
"The input persistable var [%s] is not found."
,
wh_var_name
));
const
auto
*
wx_tensor
=
wx_var
->
GetMutable
<
LoDTensor
>
();
const
auto
*
wh_tensor
=
wh_var
->
GetMutable
<
LoDTensor
>
();
const
int
OC
=
wh_tensor
->
dims
()[
0
];
std
::
vector
<
float
>
scale_ur
(
2
*
OC
);
std
::
vector
<
float
>
scale_o
(
OC
);
for
(
int
row_id
=
0
;
row_id
<
wx_tensor
->
dims
()[
0
];
row_id
++
)
{
for
(
int
col_id
=
0
;
col_id
<
2
*
OC
;
col_id
++
)
{
int
idx
=
(
row_id
*
wx_tensor
->
dims
()[
1
])
+
col_id
;
auto
abs_value
=
std
::
abs
(
wx_tensor
->
data
<
float
>
()[
idx
]);
if
(
row_id
==
0
)
{
scale_ur
[
col_id
]
=
abs_value
;
}
else
{
if
(
abs_value
>
scale_ur
[
col_id
])
scale_ur
[
col_id
]
=
abs_value
;
}
}
}
for
(
int
i
=
0
;
i
<
2
*
OC
*
OC
;
i
++
)
{
int
col_id
=
i
%
(
2
*
OC
);
auto
abs_value
=
std
::
abs
(
wh_tensor
->
data
<
float
>
()[
i
]);
if
(
abs_value
>
scale_ur
[
col_id
])
scale_ur
[
col_id
]
=
abs_value
;
}
for
(
int
row_id
=
0
;
row_id
<
wx_tensor
->
dims
()[
0
];
row_id
++
)
{
for
(
int
col_id
=
2
*
OC
;
col_id
<
wx_tensor
->
dims
()[
1
];
col_id
++
)
{
int
idx
=
(
row_id
*
wx_tensor
->
dims
()[
1
])
+
col_id
;
auto
abs_value
=
std
::
abs
(
wx_tensor
->
data
<
float
>
()[
idx
]);
if
(
row_id
==
0
)
{
scale_o
[
col_id
%
OC
]
=
abs_value
;
}
else
{
if
(
abs_value
>
scale_o
[
col_id
])
scale_o
[
col_id
%
OC
]
=
abs_value
;
}
}
}
for
(
int
i
=
2
*
OC
*
OC
;
i
<
OC
*
wh_tensor
->
dims
()[
1
];
i
++
)
{
int
col_id
=
i
%
OC
;
auto
abs_value
=
std
::
abs
(
wh_tensor
->
data
<
float
>
()[
i
]);
if
(
abs_value
>
scale_o
[
col_id
])
scale_o
[
col_id
]
=
abs_value
;
}
scale_ur
.
insert
(
scale_ur
.
end
(),
scale_o
.
begin
(),
scale_o
.
end
());
transform
(
scale_ur
.
begin
(),
scale_ur
.
end
(),
scale_ur
.
begin
(),
[](
float
c
)
{
return
1
/
c
;
});
GetTensorFromVector
(
scale_ur
,
tensor
);
}
void
ComputePropagateScalesMkldnnPass
::
ComputeGruWeightScales
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
const
std
::
string
&
wx_name
,
const
std
::
string
&
wh_name
,
StringPairMap
*
var_quant_scales
)
const
{
for
(
auto
*
op_node
:
ir
::
TopologyVarientSort
(
*
graph
,
static_cast
<
ir
::
SortKind
>
(
0
)))
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
*
op_desc
=
op_node
->
Op
();
if
(
op_desc
->
Type
()
==
"fusion_gru"
||
op_desc
->
Type
()
==
"multi_gru"
)
{
auto
wx_var_names
=
op_desc
->
Input
(
wx_name
);
auto
wh_var_names
=
op_desc
->
Input
(
wh_name
);
const
int
wx_names_size
=
static_cast
<
int
>
(
wx_var_names
.
size
());
const
int
wh_names_size
=
static_cast
<
int
>
(
wh_var_names
.
size
());
PADDLE_ENFORCE_EQ
(
wx_names_size
,
wh_names_size
,
platform
::
errors
::
Fatal
(
"Mismatch in number of weights inputs (%d "
"for WeightX vs. %d for WeightH)."
,
wx_names_size
,
wh_names_size
));
for
(
int
i
=
0
;
i
<
wx_names_size
;
i
++
)
{
auto
wh_var_name
=
wh_var_names
[
i
];
auto
wx_var_name
=
wx_var_names
[
i
];
Tensor
tensor
;
ComputeSingleGruWeightScales
(
scope
,
wx_var_name
,
wh_var_name
,
&
tensor
);
auto
pair
=
std
::
make_pair
(
false
,
tensor
);
var_quant_scales
->
insert
(
std
::
make_pair
(
wx_var_name
,
pair
));
}
}
}
}
void
ComputePropagateScalesMkldnnPass
::
ComputeSingleLstmWeightScales
(
Scope
*
scope
,
const
std
::
string
&
wx_var_name
,
const
std
::
string
&
wh_var_name
,
Tensor
*
tensor
)
const
{
auto
*
wx_var
=
scope
->
FindVar
(
wx_var_name
);
PADDLE_ENFORCE_NOT_NULL
(
wx_var
,
platform
::
errors
::
NotFound
(
"The input persistable var [%s] is not found."
,
wx_var_name
));
auto
*
wh_var
=
scope
->
FindVar
(
wh_var_name
);
PADDLE_ENFORCE_NOT_NULL
(
wh_var
,
platform
::
errors
::
NotFound
(
"The input persistable var [%s] is not found."
,
wh_var_name
));
const
auto
*
wx_tensor
=
wx_var
->
GetMutable
<
LoDTensor
>
();
const
auto
*
wh_tensor
=
wh_var
->
GetMutable
<
LoDTensor
>
();
std
::
vector
<
float
>
scale
(
wx_tensor
->
dims
()[
1
]);
for
(
int
row_id
=
0
;
row_id
<
wx_tensor
->
dims
()[
0
];
row_id
++
)
{
for
(
int
col_id
=
0
;
col_id
<
wx_tensor
->
dims
()[
1
];
col_id
++
)
{
int
idx
=
(
row_id
*
wx_tensor
->
dims
()[
1
])
+
col_id
;
auto
abs_value
=
std
::
abs
(
wx_tensor
->
data
<
float
>
()[
idx
]);
if
(
row_id
==
0
)
{
scale
[
col_id
]
=
abs_value
;
}
else
{
if
(
abs_value
>
scale
[
col_id
])
scale
[
col_id
]
=
abs_value
;
}
}
}
for
(
int
row_id
=
0
;
row_id
<
wh_tensor
->
dims
()[
0
];
row_id
++
)
{
for
(
int
col_id
=
0
;
col_id
<
wh_tensor
->
dims
()[
1
];
col_id
++
)
{
int
idx
=
(
row_id
*
wh_tensor
->
dims
()[
1
])
+
col_id
;
auto
abs_value
=
std
::
abs
(
wh_tensor
->
data
<
float
>
()[
idx
]);
if
(
abs_value
>
scale
[
col_id
])
scale
[
col_id
]
=
abs_value
;
}
}
transform
(
scale
.
begin
(),
scale
.
end
(),
scale
.
begin
(),
[](
float
c
)
{
return
1
/
c
;
});
GetTensorFromVector
(
scale
,
tensor
);
}
void
ComputePropagateScalesMkldnnPass
::
ComputeLstmWeightScales
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
const
std
::
string
&
wx_name
,
const
std
::
string
&
wh_name
,
StringPairMap
*
var_quant_scales
)
const
{
for
(
auto
*
op_node
:
ir
::
TopologyVarientSort
(
*
graph
,
static_cast
<
ir
::
SortKind
>
(
0
)))
{
if
(
!
op_node
->
IsOp
())
continue
;
auto
*
op_desc
=
op_node
->
Op
();
if
(
op_desc
->
Type
()
==
"fusion_lstm"
)
{
auto
wx_var_names
=
op_desc
->
Input
(
wx_name
);
auto
wh_var_names
=
op_desc
->
Input
(
wh_name
);
const
int
wx_names_size
=
static_cast
<
int
>
(
wx_var_names
.
size
());
const
int
wh_names_size
=
static_cast
<
int
>
(
wh_var_names
.
size
());
PADDLE_ENFORCE_EQ
(
wx_names_size
,
wh_names_size
,
platform
::
errors
::
Fatal
(
"Mismatch in number of weights inputs (%d "
"for WeightX vs. %d for WeightH)."
,
wx_names_size
,
wh_names_size
));
for
(
int
i
=
0
;
i
<
wx_names_size
;
i
++
)
{
auto
wh_var_name
=
wh_var_names
[
i
];
auto
wx_var_name
=
wx_var_names
[
i
];
Tensor
tensor
;
ComputeSingleLstmWeightScales
(
scope
,
wx_var_name
,
wh_var_name
,
&
tensor
);
auto
pair
=
std
::
make_pair
(
false
,
tensor
);
var_quant_scales
->
insert
(
std
::
make_pair
(
wx_var_name
,
pair
));
}
}
}
}
void
ComputePropagateScalesMkldnnPass
::
ComputeWeightScales
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
StringPairMap
*
var_quant_scales
)
const
{
ComputeVarScales
(
graph
,
scope
,
{
"conv2d"
,
"depthwise_conv2d"
},
"Filter"
,
1
,
var_quant_scales
);
ComputeVarScales
(
graph
,
scope
,
{
"fc"
},
"W"
,
0
,
var_quant_scales
);
ComputeVarScales
(
graph
,
scope
,
{
"fusion_gru"
,
"multi_gru"
},
"WeightH"
,
0
,
var_quant_scales
);
ComputeVarScales
(
graph
,
scope
,
{
"fusion_lstm"
},
"WeightH"
,
0
,
var_quant_scales
);
ComputeGruWeightScales
(
graph
,
scope
,
"WeightX"
,
"WeightH"
,
var_quant_scales
);
ComputeLstmWeightScales
(
graph
,
scope
,
"WeightX"
,
"WeightH"
,
var_quant_scales
);
}
void
ComputePropagateScalesMkldnnPass
::
UpdateScaleOpInScale
(
Node
*
op_node
,
const
std
::
string
&
input_name
,
const
std
::
string
&
output_name
,
StringPairMap
*
var_quant_scales
)
const
{
auto
iter
=
var_quant_scales
->
find
(
output_name
);
if
(
iter
!=
var_quant_scales
->
end
())
{
auto
pair
=
iter
->
second
;
const
auto
tensor
=
pair
.
second
;
const
auto
scale
=
BOOST_GET_CONST
(
float
,
op_node
->
Op
()
->
GetAttr
(
"scale"
));
Tensor
tmp_tensor
;
tmp_tensor
.
Resize
(
tensor
.
dims
());
auto
*
data
=
tmp_tensor
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
tensor
.
numel
();
i
++
)
{
data
[
i
]
=
data
[
i
]
*
scale
;
}
auto
new_pair
=
std
::
make_pair
(
pair
.
first
,
tmp_tensor
);
var_quant_scales
->
insert
(
std
::
make_pair
(
input_name
,
new_pair
));
}
}
std
::
unordered_set
<
std
::
string
>
ComputePropagateScalesMkldnnPass
::
UpdateScales
(
ir
::
Graph
*
graph
,
StringPairMap
*
var_quant_scales
,
const
std
::
unordered_set
<
std
::
string
>&
scale_immutable_ops
)
const
{
std
::
unordered_set
<
std
::
string
>
waiting_for_scale
{};
for
(
auto
*
op_node
:
ir
::
TopologyVarientSort
(
*
graph
,
static_cast
<
ir
::
SortKind
>
(
0
)))
{
if
(
!
op_node
->
IsOp
())
continue
;
const
auto
op_name
=
op_node
->
Name
();
if
(
scale_immutable_ops
.
count
(
op_name
))
{
std
::
string
input_name
;
if
(
op_name
==
"slice"
)
{
input_name
=
op_node
->
Op
()
->
Input
(
"Input"
)[
0
];
}
else
{
input_name
=
op_node
->
Op
()
->
Input
(
"X"
)[
0
];
}
const
std
::
string
output_name
=
op_node
->
Op
()
->
Output
(
"Out"
)[
0
];
auto
in_iter
=
var_quant_scales
->
find
(
input_name
);
auto
out_iter
=
var_quant_scales
->
find
(
output_name
);
if
(
in_iter
==
var_quant_scales
->
end
()
&&
out_iter
==
var_quant_scales
->
end
())
{
waiting_for_scale
.
insert
(
input_name
);
waiting_for_scale
.
insert
(
output_name
);
}
else
if
(
in_iter
!=
var_quant_scales
->
end
())
{
out_iter
->
second
=
in_iter
->
second
;
}
else
if
(
out_iter
!=
var_quant_scales
->
end
())
{
in_iter
->
second
=
out_iter
->
second
;
}
}
else
if
(
op_name
==
"scale"
)
{
const
std
::
string
output_name
=
op_node
->
Op
()
->
Output
(
"Out"
)[
0
];
auto
out_iter
=
var_quant_scales
->
find
(
output_name
);
if
(
out_iter
!=
var_quant_scales
->
end
())
{
const
std
::
string
input_name
=
op_node
->
Op
()
->
Input
(
"X"
)[
0
];
UpdateScaleOpInScale
(
op_node
,
input_name
,
output_name
,
var_quant_scales
);
}
}
}
return
waiting_for_scale
;
}
void
ComputePropagateScalesMkldnnPass
::
PropagateScales
(
ir
::
Graph
*
graph
,
StringPairMap
*
var_quant_scales
,
const
std
::
unordered_set
<
std
::
string
>&
scale_immutable_ops
)
const
{
auto
waiting_for_scale
=
UpdateScales
(
graph
,
var_quant_scales
,
scale_immutable_ops
);
std
::
unordered_set
<
std
::
string
>
waiting_for_scale_prev
{};
while
(
waiting_for_scale
.
size
()
!=
0
&&
waiting_for_scale
!=
waiting_for_scale_prev
)
{
waiting_for_scale_prev
.
clear
();
waiting_for_scale_prev
.
insert
(
waiting_for_scale
.
begin
(),
waiting_for_scale
.
end
());
waiting_for_scale
=
UpdateScales
(
graph
,
var_quant_scales
,
scale_immutable_ops
);
}
}
void
ComputePropagateScalesMkldnnPass
::
ConvertStringPairMap
(
const
StringPairMap
&
var_quant_scales
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
float
>>*
info_map
)
const
{
for
(
auto
iter
=
var_quant_scales
.
begin
();
iter
!=
var_quant_scales
.
end
();
iter
++
)
{
auto
*
data
=
iter
->
second
.
second
.
data
<
float
>
();
std
::
vector
<
float
>
data_v
;
for
(
int
i
=
0
;
i
<
iter
->
second
.
second
.
numel
();
i
++
)
{
data_v
.
push_back
(
data
[
i
]);
}
info_map
->
insert
(
std
::
make_pair
(
iter
->
first
,
data_v
));
}
}
void
ComputePropagateScalesMkldnnPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
VLOG
(
3
)
<<
"Convert paddle model to mkldnn quantized model."
;
const
std
::
string
pattern_name
=
"compute_propagate_scales_mkldnn_pass"
;
FusePassBase
::
Init
(
pattern_name
,
graph
);
const
std
::
unordered_set
<
std
::
string
>
scale_immutable_ops
=
{
"transpose2"
,
"reshape2"
,
"pool2d"
,
"slice"
,
"nearest_interp"
,
"nearest_interp_v2"
};
StringPairMap
var_quant_scales
{};
auto
*
scope
=
param_scope
();
GetQuantInfo
(
graph
,
&
var_quant_scales
);
ComputeWeightScales
(
graph
,
scope
,
&
var_quant_scales
);
PropagateScales
(
graph
,
&
var_quant_scales
,
scale_immutable_ops
);
// save var_quant_scales in the first op's attr
// for cpu_quantize_pass
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
float
>>
info_map
;
ConvertStringPairMap
(
var_quant_scales
,
&
info_map
);
SaveInfoInTheFirstOp
(
graph
,
"has_quant_info"
,
"var_quant_scales"
,
info_map
);
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
compute_propagate_scales_mkldnn_pass
,
paddle
::
framework
::
ir
::
ComputePropagateScalesMkldnnPass
);
REGISTER_PASS_CAPABILITY
(
compute_propagate_scales_mkldnn_pass
)
.
AddCombination
(
paddle
::
framework
::
compatible
::
OpVersionComparatorCombination
()
.
LE
(
"conv2d"
,
1
)
.
EQ
(
"fc"
,
0
)
.
LE
(
"conv2d_transpose"
,
2
)
.
EQ
(
"fake_quantize_abs_max"
,
0
)
.
EQ
(
"fake_quantize_range_abs_max"
,
0
)
.
EQ
(
"fake_quantize_moving_average_abs_max"
,
0
)
.
LE
(
"fake_channel_wise_quantize_abs_max"
,
1
)
.
EQ
(
"fake_dequantize_max_abs"
,
0
));
paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h
0 → 100644
浏览文件 @
c00d869b
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
using
StringPairMap
=
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
bool
,
Tensor
>>
;
class
ComputePropagateScalesMkldnnPass
:
public
FusePassBase
{
public:
ComputePropagateScalesMkldnnPass
()
=
default
;
virtual
~
ComputePropagateScalesMkldnnPass
()
{}
#ifdef PADDLE_WITH_TESTING
friend
class
ComputePropagateScalesMkldnnPassTest
;
#endif
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
private:
void
GetTensorFromVector
(
const
std
::
vector
<
float
>&
data_v
,
Tensor
*
tensor
)
const
;
void
GetQuantInfo
(
ir
::
Graph
*
graph
,
StringPairMap
*
var_quant_scales
)
const
;
std
::
vector
<
float
>
GetScales
(
Tensor
*
tensor
,
int
axis
)
const
;
void
ComputeVarScales
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
const
std
::
unordered_set
<
std
::
string
>&
ops
,
const
std
::
string
&
weight_name
,
const
int
axis
,
StringPairMap
*
var_quant_scales
)
const
;
void
ComputeSingleGruWeightScales
(
Scope
*
scope
,
const
std
::
string
&
wx_var_name
,
const
std
::
string
&
wh_var_name
,
Tensor
*
tensor
)
const
;
void
ComputeGruWeightScales
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
const
std
::
string
&
wx_name
,
const
std
::
string
&
wh_name
,
StringPairMap
*
var_quant_scales
)
const
;
void
ComputeSingleLstmWeightScales
(
Scope
*
scope
,
const
std
::
string
&
wx_var_name
,
const
std
::
string
&
wh_var_name
,
Tensor
*
tensor
)
const
;
void
ComputeLstmWeightScales
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
const
std
::
string
&
wx_name
,
const
std
::
string
&
wh_name
,
StringPairMap
*
var_quant_scales
)
const
;
void
ComputeWeightScales
(
ir
::
Graph
*
graph
,
Scope
*
scope
,
StringPairMap
*
var_quant_scales
)
const
;
void
UpdateScaleOpInScale
(
Node
*
op_node
,
const
std
::
string
&
input_name
,
const
std
::
string
&
output_name
,
StringPairMap
*
var_quant_scales
)
const
;
std
::
unordered_set
<
std
::
string
>
UpdateScales
(
ir
::
Graph
*
graph
,
StringPairMap
*
var_quant_scales
,
const
std
::
unordered_set
<
std
::
string
>&
scale_immutable_ops
)
const
;
void
PropagateScales
(
ir
::
Graph
*
graph
,
StringPairMap
*
var_quant_scales
,
const
std
::
unordered_set
<
std
::
string
>&
scale_immutable_ops
)
const
;
void
ConvertStringPairMap
(
const
StringPairMap
&
var_quant_scales
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
float
>>*
info_map
)
const
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录