Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
c71025eb
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
c71025eb
编写于
8月 26, 2021
作者:
X
XGZhang
提交者:
GitHub
8月 26, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix the bug of channel-wise quantization for ernie (#34948)
上级
0efda9d9
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
92 addition
and
33 deletion
+92
-33
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
+4
-0
paddle/fluid/operators/compat/fake_channel_wise_dequantize_max_abs.pbtxt
...erators/compat/fake_channel_wise_dequantize_max_abs.pbtxt
+4
-0
paddle/fluid/operators/fake_dequantize_op.cc
paddle/fluid/operators/fake_dequantize_op.cc
+61
-20
paddle/fluid/operators/fake_dequantize_op.cu
paddle/fluid/operators/fake_dequantize_op.cu
+10
-7
paddle/fluid/operators/fake_dequantize_op.h
paddle/fluid/operators/fake_dequantize_op.h
+7
-5
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
...ddle/fluid/contrib/slim/quantization/quantization_pass.py
+6
-1
未找到文件。
paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc
浏览文件 @
c71025eb
...
...
@@ -115,6 +115,10 @@ QuantDequantFusePass::QuantDequantFusePass() {
.
AddAttr
(
"quant_axis"
)
.
IsIntIn
({
0
,
1
})
.
IsOptional
()
.
End
()
.
AddAttr
(
"x_num_col_dims"
)
.
IsType
<
int
>
()
.
IsOptional
()
.
End
();
AddOpCompat
(
OpCompat
(
"conv2d"
))
.
AddInput
(
"Input"
)
...
...
paddle/fluid/operators/compat/fake_channel_wise_dequantize_max_abs.pbtxt
浏览文件 @
c71025eb
...
...
@@ -17,4 +17,8 @@ def {
name: "quant_axis"
type: INT
}
attrs {
name: "x_num_col_dims"
type: INT
}
}
paddle/fluid/operators/fake_dequantize_op.cc
浏览文件 @
c71025eb
...
...
@@ -39,7 +39,7 @@ struct ChannelDequantizeFunctor<platform::CPUDeviceContext, T> {
void
operator
()(
const
platform
::
CPUDeviceContext
&
dev_ctx
,
const
framework
::
Tensor
*
in
,
const
framework
::
Tensor
**
scales
,
const
int
scale_num
,
T
max_range
,
const
int
quant_axis
,
framework
::
Tensor
*
out
)
{
const
int
x_num_col_dims
,
framework
::
Tensor
*
out
)
{
if
(
scale_num
==
1
)
{
// Dequant op is before quantized op
// Dequantize the weight of quantized op
...
...
@@ -81,23 +81,50 @@ struct ChannelDequantizeFunctor<platform::CPUDeviceContext, T> {
}
else
if
(
scale_num
==
2
)
{
// Dequant op is after quantized op
// Dequantize the output tensor of quantized op
int
batch_size
=
in
->
dims
()[
0
];
int
channel
=
in
->
dims
()[
1
];
const
T
*
scale_one
=
scales
[
0
]
->
data
<
T
>
();
const
T
*
scale_two
=
scales
[
1
]
->
data
<
T
>
();
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
framework
::
Tensor
one_batch_in
=
in
->
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
in
->
dims
(),
1
,
in
->
dims
().
size
()));
framework
::
Tensor
one_batch_out
=
out
->
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
out
->
dims
(),
1
,
out
->
dims
().
size
()));
for
(
int
j
=
0
;
j
<
channel
;
j
++
)
{
T
s
=
scale_one
[
j
];
framework
::
Tensor
one_channel_in
=
one_batch_in
.
Slice
(
j
,
j
+
1
);
framework
::
Tensor
one_channel_out
=
one_batch_out
.
Slice
(
j
,
j
+
1
);
auto
in_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
one_channel_in
);
auto
out_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
one_channel_out
);
auto
&
dev
=
*
dev_ctx
.
eigen_device
();
out_e
.
device
(
dev
)
=
in_e
*
s
*
scale_two
[
0
]
/
max_range
;
if
(
x_num_col_dims
>
1
)
{
auto
in_dims
=
in
->
dims
();
const
int64_t
channel
=
in_dims
[
x_num_col_dims
];
const
T
*
scale_one
=
scales
[
0
]
->
data
<
T
>
();
const
T
*
scale_two
=
scales
[
1
]
->
data
<
T
>
();
int64_t
out_iter
=
1
;
for
(
int
i
=
0
;
i
<
x_num_col_dims
;
i
++
)
{
out_iter
*=
in_dims
[
i
];
}
int64_t
step_i
=
in
->
numel
()
/
out_iter
;
int64_t
step_j
=
in
->
numel
()
/
(
out_iter
*
channel
);
auto
*
in_data
=
in
->
data
<
T
>
();
auto
*
out_data
=
out
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
for
(
int64_t
i
=
0
;
i
<
out_iter
;
i
++
)
{
for
(
int64_t
j
=
0
;
j
<
channel
;
j
++
)
{
auto
*
cur_in
=
in_data
+
i
*
step_i
+
j
*
step_j
;
auto
*
cur_out
=
out_data
+
i
*
step_i
+
j
*
step_j
;
T
s
=
scale_one
[
j
];
for
(
int64_t
k
=
0
;
k
<
step_j
;
k
++
)
{
*
cur_out
=
(
*
cur_in
)
*
s
*
scale_two
[
0
]
/
max_range
;
++
cur_in
;
++
cur_out
;
}
}
}
}
else
{
int
batch_size
=
in
->
dims
()[
0
];
int
channel
=
in
->
dims
()[
1
];
const
T
*
scale_one
=
scales
[
0
]
->
data
<
T
>
();
const
T
*
scale_two
=
scales
[
1
]
->
data
<
T
>
();
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
framework
::
Tensor
one_batch_in
=
in
->
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
in
->
dims
(),
1
,
in
->
dims
().
size
()));
framework
::
Tensor
one_batch_out
=
out
->
Slice
(
i
,
i
+
1
).
Resize
(
framework
::
slice_ddim
(
out
->
dims
(),
1
,
out
->
dims
().
size
()));
for
(
int
j
=
0
;
j
<
channel
;
j
++
)
{
T
s
=
scale_one
[
j
];
framework
::
Tensor
one_channel_in
=
one_batch_in
.
Slice
(
j
,
j
+
1
);
framework
::
Tensor
one_channel_out
=
one_batch_out
.
Slice
(
j
,
j
+
1
);
auto
in_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
one_channel_in
);
auto
out_e
=
framework
::
EigenVector
<
T
>::
Flatten
(
one_channel_out
);
auto
&
dev
=
*
dev_ctx
.
eigen_device
();
out_e
.
device
(
dev
)
=
in_e
*
s
*
scale_two
[
0
]
/
max_range
;
}
}
}
}
...
...
@@ -199,7 +226,16 @@ class FakeChannelWiseDequantizeMaxAbsOpMaker
"the received is %d"
,
quant_axis
));
});
AddAttr
<
int
>
(
"x_num_col_dims"
,
"The x_num_col_dims of mul. Only used for mul or matmul."
)
.
SetDefault
(
1
)
.
AddCustomChecker
([](
const
int
&
x_num_col_dims
)
{
PADDLE_ENFORCE_EQ
(
x_num_col_dims
==
0
,
false
,
platform
::
errors
::
InvalidArgument
(
"'x_num_col_dims' should be larger than 0, but "
"the received is %d"
,
x_num_col_dims
));
});
AddComment
(
R"DOC(
FakeChannelWiseDequantizeMaxAbsOp operator.
...
...
@@ -245,4 +281,9 @@ REGISTER_OP_VERSION(fake_channel_wise_dequantize_max_abs)
R"ROC(add new attributes [quant_axis] for applying per-channel "
"dequantization to conv2d_tranpose and mul ops.)ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewAttr
(
"quant_axis"
,
"The axis for dequantization."
,
0
));
"quant_axis"
,
"The axis for dequantization."
,
0
))
.
AddCheckpoint
(
R"ROC(add new attributes [x_num_col_dims] for applying per-channel "
"dequantization to mul ops.)ROC"
,
paddle
::
framework
::
compatible
::
OpVersionDesc
().
NewAttr
(
"x_num_col_dims"
,
"The x_num_col_dims for dequantization."
,
1
));
paddle/fluid/operators/fake_dequantize_op.cu
浏览文件 @
c71025eb
...
...
@@ -77,9 +77,9 @@ __global__ void DequantizeOneScaleQuantAxis1(const T* in, const T* scale,
template
<
typename
T
>
__global__
void
DequantizeTwoScale
(
const
T
*
in
,
const
T
*
scale_one
,
const
T
*
scale_two
,
T
max_range
,
int
num
,
int
batch
_size
,
int
channel
,
T
*
out
)
{
int
iter
_size
,
int
channel
,
T
*
out
)
{
int
tid
=
threadIdx
.
x
;
int
channel_size
=
num
/
(
batch
_size
*
channel
);
int
channel_size
=
num
/
(
iter
_size
*
channel
);
int
scale_index
=
blockIdx
.
x
%
channel
;
const
T
*
in_c
=
in
+
blockIdx
.
x
*
channel_size
;
T
*
out_c
=
out
+
blockIdx
.
x
*
channel_size
;
...
...
@@ -93,7 +93,7 @@ struct ChannelDequantizeFunctor<platform::CUDADeviceContext, T> {
void
operator
()(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
const
framework
::
Tensor
*
in
,
const
framework
::
Tensor
**
scales
,
const
int
scale_num
,
T
max_range
,
const
int
quant_axis
,
framework
::
Tensor
*
out
)
{
const
int
x_num_col_dims
,
framework
::
Tensor
*
out
)
{
auto
in_dims
=
in
->
dims
();
const
T
*
in_data
=
in
->
data
<
T
>
();
T
*
out_data
=
out
->
mutable_data
<
T
>
(
dev_ctx
.
GetPlace
());
...
...
@@ -116,14 +116,17 @@ struct ChannelDequantizeFunctor<platform::CUDADeviceContext, T> {
}
else
if
(
scale_num
==
2
)
{
// Not need to consider quant_axis
int
num
=
in
->
numel
();
int
batch_size
=
in
->
dims
()[
0
];
int
channel
=
in
->
dims
()[
1
];
int
iter_size
=
1
;
for
(
int
i
=
0
;
i
<
x_num_col_dims
;
i
++
)
{
iter_size
*=
in
->
dims
()[
i
];
}
int
channel
=
in
->
dims
()[
x_num_col_dims
];
const
T
*
scale_one
=
scales
[
0
]
->
data
<
T
>
();
const
T
*
scale_two
=
scales
[
1
]
->
data
<
T
>
();
int
block
=
1024
;
int
grid
=
batch
_size
*
channel
;
int
grid
=
iter
_size
*
channel
;
DequantizeTwoScale
<
T
><<<
grid
,
block
,
0
,
dev_ctx
.
stream
()
>>>
(
in_data
,
scale_one
,
scale_two
,
max_range
,
num
,
batch
_size
,
channel
,
in_data
,
scale_one
,
scale_two
,
max_range
,
num
,
iter
_size
,
channel
,
out_data
);
}
}
...
...
paddle/fluid/operators/fake_dequantize_op.h
浏览文件 @
c71025eb
...
...
@@ -33,7 +33,8 @@ template <typename DeviceContext, typename T>
struct
ChannelDequantizeFunctor
{
void
operator
()(
const
DeviceContext
&
dev_ctx
,
const
framework
::
Tensor
*
in
,
const
framework
::
Tensor
**
scales
,
const
int
scale_num
,
T
max_range
,
const
int
quant_axis
,
framework
::
Tensor
*
out
);
T
max_range
,
const
int
quant_axis
,
const
int
x_num_col_dims
,
framework
::
Tensor
*
out
);
};
template
<
typename
DeviceContext
,
typename
T
>
...
...
@@ -64,6 +65,7 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel<T> {
auto
quant_bits
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"quant_bits"
);
auto
quant_axis
=
ctx
.
Attr
<
int
>
(
"quant_axis"
);
auto
x_num_col_dims
=
ctx
.
Attr
<
int
>
(
"x_num_col_dims"
);
int
max_range
=
1
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
...
...
@@ -80,11 +82,11 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel<T> {
max_range
*=
(
std
::
pow
(
2
,
quant_bits
[
0
]
-
1
)
-
1
);
}
else
if
(
scale_num
==
2
)
{
PADDLE_ENFORCE_EQ
(
scales
[
0
]
->
numel
(),
in
->
dims
()[
1
],
scales
[
0
]
->
numel
(),
in
->
dims
()[
x_num_col_dims
],
platform
::
errors
::
PreconditionNotMet
(
"The number of first scale values must be the same with "
"
second dimension value of Input(X) when the `Scales` has two
"
"elements, but %ld != %ld here."
,
"
corresponding dimension value of Input(X) when the `Scales`
"
"
has two
elements, but %ld != %ld here."
,
scales
[
0
]
->
numel
(),
in
->
dims
()[
1
]));
PADDLE_ENFORCE_EQ
(
scales
[
1
]
->
numel
(),
1
,
platform
::
errors
::
PreconditionNotMet
(
...
...
@@ -96,7 +98,7 @@ class FakeChannelWiseDequantizeMaxAbsKernel : public framework::OpKernel<T> {
}
ChannelDequantizeFunctor
<
DeviceContext
,
T
>
()(
dev_ctx
,
in
,
scales
.
data
(),
scale_num
,
static_cast
<
T
>
(
max_range
),
quant_axis
,
out
);
quant_axis
,
x_num_col_dims
,
out
);
}
};
...
...
python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
浏览文件 @
c71025eb
...
...
@@ -1273,12 +1273,17 @@ class QuantizationFreezePass(object):
var_type
=
output_var_node
.
type
(),
shape
=
output_var_node
.
shape
(),
var_dtype
=
output_var_node
.
dtype
())
if
op_node
.
op
().
has_attr
(
"x_num_col_dims"
):
x_num_col_dims
=
op_node
.
op
().
attr
(
"x_num_col_dims"
)
else
:
x_num_col_dims
=
1
dequant_op_node
=
graph
.
create_op_node
(
op_type
=
'fake_channel_wise_dequantize_max_abs'
,
attrs
=
{
'quant_bits'
:
[
self
.
_weight_bits
,
self
.
_activation_bits
],
'quant_axis'
:
quant_axis
,
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
'op_role'
:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
,
'x_num_col_dims'
:
x_num_col_dims
},
inputs
=
{
'X'
:
output_var_node
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录