Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
18dd1294
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
18dd1294
编写于
8月 24, 2018
作者:
D
Dang Qingqing
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine code.
上级
4a4567fc
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
171 addition
and
0 deletion
+171
-0
paddle/fluid/operators/fake_quantize_op.cc
paddle/fluid/operators/fake_quantize_op.cc
+46
-0
paddle/fluid/operators/fake_quantize_op.h
paddle/fluid/operators/fake_quantize_op.h
+125
-0
未找到文件。
paddle/fluid/operators/fake_quantize_op.cc
浏览文件 @
18dd1294
...
...
@@ -18,6 +18,52 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
class
FakeQuantizeAbsMaxOp
:
public
framework
::
OperatorWithKernel
{
public:
FakeQuantizeAbsMaxOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of FakeQuantizeOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of FakeQuantizeOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"OutScale"
),
"Output(Scale) of FakeQuantizeOp should not be null."
);
ctx
->
SetOutputDim
(
"Out"
,
ctx
->
GetInputDim
(
"X"
));
ctx
->
SetOutputDim
(
"OutScale"
,
{
1
});
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
};
class
FakeQuantizeAbsMaxOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor) Input is float data type."
);
AddOutput
(
"Out"
,
"(Tensor) Output of quantized low level tensor, "
"but also saved as float data type."
);
AddOutput
(
"OutScale"
,
"(Tensor) Current scale"
);
AddAttr
<
int
>
(
"bit_length"
,
"(int, default 8)"
)
.
SetDefault
(
8
)
.
AddCustomChecker
([](
const
int
&
bit_length
)
{
PADDLE_ENFORCE
(
bit_length
>=
1
&&
bit_length
<=
16
,
"'bit_length' should be between 1 and 16."
);
});
AddComment
(
R"DOC(
FakeQuantize operator
$$scale = max(abs(X))$$
$$range = 2^{bit_length - 1} - 1$$
$$Out = round(X/scale * range)$$
)DOC"
);
}
};
class
FakeQuantizeOp
:
public
framework
::
OperatorWithKernel
{
public:
FakeQuantizeOp
(
const
std
::
string
&
type
,
...
...
paddle/fluid/operators/fake_quantize_op.h
浏览文件 @
18dd1294
...
...
@@ -24,6 +24,131 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
FakeQuantizeAbsMaxKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
T
FindAbsMax
(
framework
::
Tensor
*
in
,
int
n
)
const
{
T
*
p
=
in
->
mutable_data
<
T
>
(
platform
::
CPUPlace
());
T
abs_max
=
(
T
)
0.00000001
;
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
T
tmp
=
fabs
(
p
[
i
]);
if
(
tmp
>
abs_max
)
abs_max
=
tmp
;
}
return
T
(
abs_max
);
}
T
FindRangeAbsMax
(
framework
::
Tensor
*
scale_list
,
framework
::
Tensor
*
out_scale
,
const
T
&
cur_scale
,
int
window_size
,
int
current_iter
)
const
{
T
*
sl
=
scale_list
->
mutable_data
<
T
>
(
platform
::
CPUPlace
());
T
remove_tmp
=
sl
[
current_iter
];
sl
[
current_iter
]
=
cur_scale
;
T
&
max_scale
=
out_scale
->
mutable_data
<
T
>
(
platform
::
CPUPlace
())[
0
];
if
(
max_scale
<
cur_scale
)
{
max_scale
=
cur_scale
;
}
else
if
(
fabs
(
remove_tmp
-
max_scale
)
<
1e-6
)
{
int
size
=
(
current_iter
>
window_size
)
?
window_size
:
current_iter
;
max_scale
=
T
(
FindAbsMax
(
scale_list
,
size
));
}
return
max_scale
;
}
T
FindMovingAverageAbsMmax
(
framework
::
Tensor
*
in_scale
,
framework
::
Tensor
*
out_scale
,
const
T
&
cur_scale
)
const
{
T
*
ins
=
in_scale
->
mutable_data
<
T
>
(
platform
::
CPUPlace
());
T
*
outs
=
out_scale
->
mutable_data
<
T
>
(
platform
::
CPUPlace
());
outs
[
0
]
=
0.9
*
cur_scale
+
0.1
*
ins
[
0
];
return
T
(
outs
[
0
]);
}
virtual
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
{
auto
*
tensor
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
auto
*
in
=
context
.
Input
<
framework
::
Tensor
>
(
"X"
);
const
bool
is_test
=
context
.
Attr
<
bool
>
(
"is_test"
);
tensor
->
mutable_data
<
T
>
(
in
->
place
());
auto
*
oms_tensor
=
context
.
Output
<
framework
::
Tensor
>
(
"OutMovingScale"
);
oms_tensor
->
mutable_data
<
T
>
(
in
->
place
());
auto
quantize_type
=
static_cast
<
std
::
string
>
(
context
.
Attr
<
std
::
string
>
(
"quantize_type"
));
if
(
quantize_type
==
std
::
string
(
"range_abs_max"
))
{
auto
*
oss_tensor
=
context
.
Output
<
framework
::
Tensor
>
(
"OutScales"
);
oss_tensor
->
mutable_data
<
T
>
(
context
.
Input
<
framework
::
Tensor
>
(
"InScales"
)
->
place
());
auto
*
oci_tensor
=
context
.
Output
<
framework
::
Tensor
>
(
"OutCurrentIter"
);
oci_tensor
->
mutable_data
<
T
>
(
context
.
Input
<
framework
::
Tensor
>
(
"InCurrentIter"
)
->
place
());
}
T
scale
=
static_cast
<
T
>
(
1
);
int
window_size
=
context
.
Attr
<
int
>
(
"window_size"
);
int
bit_length
=
context
.
Attr
<
int
>
(
"bit_length"
);
int
bin_cnt
=
std
::
pow
(
2
,
bit_length
-
1
)
-
1
;
auto
&
dev
=
*
context
.
template
device_context
<
DeviceContext
>().
eigen_device
();
auto
raw_in
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
in
);
if
(
quantize_type
==
std
::
string
(
"abs_max"
))
{
auto
*
saving_scale
=
context
.
Output
<
framework
::
Tensor
>
(
"OutMovingScale"
);
auto
scale_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
saving_scale
);
scale_out
.
device
(
dev
)
=
raw_in
.
abs
().
maximum
();
scale
=
scale_out
(
0
);
auto
&
device_ctx
=
context
.
template
device_context
<
DeviceContext
>();
auto
*
scale_list
=
context
.
Output
<
framework
::
Tensor
>
(
"OutScales"
);
math
::
SetConstant
<
DeviceContext
,
T
>
scalar
;
scale_list
->
mutable_data
<
T
>
(
context
.
GetPlace
());
scalar
(
device_ctx
,
scale_list
,
static_cast
<
T
>
(
0
));
auto
*
iter
=
context
.
Output
<
framework
::
Tensor
>
(
"OutCurrentIter"
);
iter
->
mutable_data
<
T
>
(
context
.
GetPlace
());
scalar
(
device_ctx
,
iter
,
static_cast
<
T
>
(
0
));
}
else
if
(
quantize_type
==
std
::
string
(
"range_abs_max"
))
{
auto
*
moving_scale
=
context
.
Input
<
framework
::
Tensor
>
(
"InMovingScale"
);
if
(
is_test
)
{
scale
=
moving_scale
->
data
<
T
>
()[
0
];
}
else
{
auto
*
it
=
context
.
Input
<
framework
::
Tensor
>
(
"InCurrentIter"
);
auto
*
iter
=
context
.
Output
<
framework
::
Tensor
>
(
"OutCurrentIter"
);
const
int
*
last_iter
=
it
->
data
<
int
>
();
int
*
current_iter
=
iter
->
mutable_data
<
int
>
(
platform
::
CPUPlace
());
auto
*
scale_list
=
context
.
Output
<
framework
::
Tensor
>
(
"OutScales"
);
auto
*
saving_scale
=
context
.
Output
<
framework
::
Tensor
>
(
"OutMovingScale"
);
auto
scale_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
saving_scale
);
scale_out
.
device
(
dev
)
=
raw_in
.
abs
().
maximum
();
scale
=
saving_scale
->
mutable_data
<
T
>
(
platform
::
CPUPlace
())[
0
];
scale
=
FindRangeAbsMax
(
scale_list
,
saving_scale
,
scale
,
window_size
,
current_iter
[
0
]);
saving_scale
->
mutable_data
<
T
>
(
platform
::
CPUPlace
())[
0
]
=
scale
;
(
*
current_iter
)
=
(
*
last_iter
)
+
1
;
}
}
else
if
(
quantize_type
==
std
::
string
(
"moving_average_abs_max"
))
{
auto
*
moving_scale
=
context
.
Input
<
framework
::
Tensor
>
(
"InMovingScale"
);
if
(
is_test
)
{
scale
=
moving_scale
->
data
<
T
>
()[
0
];
}
else
{
auto
*
saving_scale
=
context
.
Output
<
framework
::
Tensor
>
(
"OutMovingScale"
);
auto
scale_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
saving_scale
);
scale_out
.
device
(
dev
)
=
raw_in
.
abs
().
maximum
();
scale
=
saving_scale
->
mutable_data
<
T
>
(
platform
::
CPUPlace
())[
0
];
scale
=
FindMovingAverageAbsMmax
(
const_cast
<
framework
::
Tensor
*>
(
moving_scale
),
saving_scale
,
scale
);
saving_scale
->
mutable_data
<
T
>
(
platform
::
CPUPlace
())[
0
]
=
scale
;
}
}
Transform
<
DeviceContext
>
trans
;
trans
(
context
.
template
device_context
<
DeviceContext
>(),
in
->
data
<
T
>
(),
in
->
data
<
T
>
()
+
in
->
numel
(),
tensor
->
mutable_data
<
T
>
(
in
->
place
()),
ClipFunctor
<
T
>
(
-
scale
,
scale
));
auto
eigen_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
tensor
);
auto
eigen_in
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
tensor
);
eigen_out
.
device
(
dev
)
=
(
bin_cnt
/
scale
*
eigen_in
).
round
();
}
};
using
platform
::
Transform
;
template
<
typename
DeviceContext
,
typename
T
>
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录