Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
61bc016c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
61bc016c
编写于
8月 23, 2022
作者:
Z
zhaoyingli
提交者:
GitHub
8月 23, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[AutoParallel] Add Quant Pass (#44877)
* add quant pass
上级
9ccdb5fa
变更
14
显示空白变更内容
内联
并排
Showing
14 changed file
with
588 addition
and
63 deletion
+588
-63
paddle/fluid/framework/distributed_strategy.proto
paddle/fluid/framework/distributed_strategy.proto
+10
-0
python/paddle/distributed/auto_parallel/completion.py
python/paddle/distributed/auto_parallel/completion.py
+6
-0
python/paddle/distributed/auto_parallel/operators/common.py
python/paddle/distributed/auto_parallel/operators/common.py
+2
-0
python/paddle/distributed/auto_parallel/parallelizer_v2.py
python/paddle/distributed/auto_parallel/parallelizer_v2.py
+21
-3
python/paddle/distributed/auto_parallel/reshard.py
python/paddle/distributed/auto_parallel/reshard.py
+9
-3
python/paddle/distributed/fleet/base/distributed_strategy.py
python/paddle/distributed/fleet/base/distributed_strategy.py
+54
-0
python/paddle/distributed/passes/__init__.py
python/paddle/distributed/passes/__init__.py
+1
-0
python/paddle/distributed/passes/auto_parallel_quantization.py
...n/paddle/distributed/passes/auto_parallel_quantization.py
+258
-0
python/paddle/distributed/passes/auto_parallel_recompute.py
python/paddle/distributed/passes/auto_parallel_recompute.py
+4
-4
python/paddle/fluid/contrib/slim/quantization/__init__.py
python/paddle/fluid/contrib/slim/quantization/__init__.py
+2
-1
python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
.../paddle/fluid/contrib/slim/quantization/imperative/qat.py
+17
-4
python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
...addle/fluid/contrib/slim/quantization/imperative/utils.py
+23
-48
python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
...paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
+1
-0
python/paddle/fluid/tests/unittests/auto_parallel/test_quantization.py
.../fluid/tests/unittests/auto_parallel/test_quantization.py
+180
-0
未找到文件。
paddle/fluid/framework/distributed_strategy.proto
浏览文件 @
61bc016c
...
@@ -184,6 +184,14 @@ message TensorParallelConfig {
...
@@ -184,6 +184,14 @@ message TensorParallelConfig {
optional
int32
tensor_init_seed
=
2
[
default
=
-
1
];
optional
int32
tensor_init_seed
=
2
[
default
=
-
1
];
}
}
message
QatConfig
{
optional
bool
channel_wise_abs_max
=
1
[
default
=
true
];
optional
int32
weight_bits
=
2
[
default
=
8
];
optional
int32
activation_bits
=
3
[
default
=
8
];
repeated
string
not_quant_pattern
=
4
;
optional
string
algo
=
5
;
}
enum
TableType
{
enum
TableType
{
PS_SPARSE_TABLE
=
0
;
PS_SPARSE_TABLE
=
0
;
PS_DENSE_TABLE
=
1
;
PS_DENSE_TABLE
=
1
;
...
@@ -327,6 +335,7 @@ message DistributedStrategy {
...
@@ -327,6 +335,7 @@ message DistributedStrategy {
optional
bool
heter_ccl_mode
=
38
[
default
=
false
];
optional
bool
heter_ccl_mode
=
38
[
default
=
false
];
optional
bool
is_fl_ps_mode
=
39
[
default
=
false
];
optional
bool
is_fl_ps_mode
=
39
[
default
=
false
];
optional
bool
with_coordinator
=
40
[
default
=
false
];
optional
bool
with_coordinator
=
40
[
default
=
false
];
optional
bool
qat
=
41
[
default
=
false
];
optional
RecomputeConfig
recompute_configs
=
101
;
optional
RecomputeConfig
recompute_configs
=
101
;
optional
AMPConfig
amp_configs
=
102
;
optional
AMPConfig
amp_configs
=
102
;
...
@@ -344,6 +353,7 @@ message DistributedStrategy {
...
@@ -344,6 +353,7 @@ message DistributedStrategy {
optional
TrainerDescConfig
trainer_desc_configs
=
114
;
optional
TrainerDescConfig
trainer_desc_configs
=
114
;
repeated
TableParameter
downpour_table_param
=
115
;
repeated
TableParameter
downpour_table_param
=
115
;
optional
FsClientParameter
fs_client_param
=
116
;
optional
FsClientParameter
fs_client_param
=
116
;
optional
QatConfig
qat_configs
=
117
;
optional
BuildStrategy
build_strategy
=
201
;
optional
BuildStrategy
build_strategy
=
201
;
optional
ExecutionStrategy
execution_strategy
=
202
;
optional
ExecutionStrategy
execution_strategy
=
202
;
...
...
python/paddle/distributed/auto_parallel/completion.py
浏览文件 @
61bc016c
...
@@ -940,6 +940,12 @@ class Completer:
...
@@ -940,6 +940,12 @@ class Completer:
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
):
core
.
op_proto_and_checker_maker
.
OpRole
.
Forward
):
appended_grad_times
+=
1
appended_grad_times
+=
1
if
int
(
op
.
attr
(
'op_role'
))
==
int
(
int
(
core
.
op_proto_and_checker_maker
.
OpRole
.
Backward
)
|
int
(
core
.
op_proto_and_checker_maker
.
OpRole
.
Loss
)):
assert
op
.
type
==
"fill_constant"
break
# complete the annotation of grad op (xxx_grad op or sum op)
# complete the annotation of grad op (xxx_grad op or sum op)
# xxx_grad op will have a corresponding forward op in grad_op_id_to_op_id
# xxx_grad op will have a corresponding forward op in grad_op_id_to_op_id
grad_op
=
ops
[
idx
]
grad_op
=
ops
[
idx
]
...
...
python/paddle/distributed/auto_parallel/operators/common.py
浏览文件 @
61bc016c
...
@@ -245,6 +245,8 @@ def is_parameter_related(varname, block):
...
@@ -245,6 +245,8 @@ def is_parameter_related(varname, block):
varname
=
varname
[:
varname
.
index
(
".subprog_"
)]
varname
=
varname
[:
varname
.
index
(
".subprog_"
)]
if
".cast_fp"
in
varname
:
if
".cast_fp"
in
varname
:
varname
=
varname
[:
varname
.
index
(
".cast_fp"
)]
varname
=
varname
[:
varname
.
index
(
".cast_fp"
)]
if
".quantized"
in
varname
:
varname
=
varname
[:
varname
.
index
(
".quantized"
)]
assert
block
.
has_var
(
varname
)
assert
block
.
has_var
(
varname
)
var
=
block
.
var
(
varname
)
var
=
block
.
var
(
varname
)
return
var
.
is_parameter
return
var
.
is_parameter
...
...
python/paddle/distributed/auto_parallel/parallelizer_v2.py
浏览文件 @
61bc016c
...
@@ -66,8 +66,8 @@ class Parallelizer:
...
@@ -66,8 +66,8 @@ class Parallelizer:
serial_loss
)
serial_loss
)
# Apply pre optimization passes
# Apply pre optimization passes
time0
=
time
.
time
()
time0
=
time
.
time
()
se
lf
.
_apply_pre_optimization
(
serial_main_program
,
se
rial_main_program
,
serial_startup_program
,
params_grads
=
self
.
_apply_pre_optimization
(
serial_startup_program
,
serial_loss
,
serial_main_program
,
serial_startup_program
,
serial_loss
,
serial_optimizer
,
params_grads
)
serial_optimizer
,
params_grads
)
self
.
_logger
.
info
(
self
.
_logger
.
info
(
"within parallel apply_pre_optimization time: {}, mode {}"
.
"within parallel apply_pre_optimization time: {}, mode {}"
.
...
@@ -162,6 +162,22 @@ class Parallelizer:
...
@@ -162,6 +162,22 @@ class Parallelizer:
optimizer
,
params_grads
):
optimizer
,
params_grads
):
if
self
.
_strategy
is
None
:
if
self
.
_strategy
is
None
:
return
return
# apply quantization pass
# The pass can be applied when mode must be 'train'
if
self
.
_mode
==
'train'
and
self
.
_strategy
.
qat
:
config
=
copy
.
deepcopy
(
self
.
_strategy
.
qat_configs
)
config
[
"dist_context"
]
=
self
.
_dist_context
config
[
"params_grads"
]
=
params_grads
auto_parallel_quantization_pass
=
new_pass
(
"auto_parallel_quantization"
,
config
)
auto_parallel_quantization_pass
.
apply
([
main_program
],
[
startup_program
],
self
.
_pass_context
)
main_program
=
self
.
_pass_context
.
get_attr
(
"main_program"
)
startup_program
=
self
.
_pass_context
.
get_attr
(
"startup_program"
)
params_grads
=
self
.
_pass_context
.
get_attr
(
"params_grads"
)
# apply amp pass
# apply amp pass
# FIXME we disenable amp for eval since it has a little bug with
# FIXME we disenable amp for eval since it has a little bug with
# eval program and which will be fixed in future
# eval program and which will be fixed in future
...
@@ -195,6 +211,8 @@ class Parallelizer:
...
@@ -195,6 +211,8 @@ class Parallelizer:
[
startup_program
],
[
startup_program
],
self
.
_pass_context
)
self
.
_pass_context
)
return
main_program
,
startup_program
,
params_grads
def
_apply_post_optimization
(
self
,
main_program
,
startup_program
,
rank
,
def
_apply_post_optimization
(
self
,
main_program
,
startup_program
,
rank
,
params_grads
):
params_grads
):
if
self
.
_strategy
is
None
:
if
self
.
_strategy
is
None
:
...
...
python/paddle/distributed/auto_parallel/reshard.py
浏览文件 @
61bc016c
...
@@ -685,7 +685,8 @@ class Remover:
...
@@ -685,7 +685,8 @@ class Remover:
block
.
_remove_op
(
idx
)
block
.
_remove_op
(
idx
)
@
staticmethod
@
staticmethod
def
remove_no_need_vars
(
auto_parallel_main_prog
,
dist_params_grads
):
def
remove_no_need_vars
(
auto_parallel_main_prog
,
dist_params_grads
,
feed_var_names
):
"""Remove no need vars in the main program"""
"""Remove no need vars in the main program"""
for
block_idx
,
block
in
enumerate
(
auto_parallel_main_prog
.
blocks
):
for
block_idx
,
block
in
enumerate
(
auto_parallel_main_prog
.
blocks
):
remove_vars
=
set
()
remove_vars
=
set
()
...
@@ -731,7 +732,7 @@ class Remover:
...
@@ -731,7 +732,7 @@ class Remover:
idx
+=
1
idx
+=
1
for
var
in
remove_vars
:
for
var
in
remove_vars
:
if
block
.
vars
[
var
].
is_data
:
if
var
in
feed_var_names
:
continue
continue
block
.
_remove_var
(
var
)
block
.
_remove_var
(
var
)
...
@@ -743,7 +744,12 @@ class Remover:
...
@@ -743,7 +744,12 @@ class Remover:
rank_id
)
rank_id
)
Resharder
.
change_while_op_input_and_output
(
auto_parallel_main_prog
,
Resharder
.
change_while_op_input_and_output
(
auto_parallel_main_prog
,
dist_context
)
dist_context
)
Remover
.
remove_no_need_vars
(
auto_parallel_main_prog
,
dist_params_grads
)
# 'feed_var_names' cannot be removed from auto_parallel_main_prog
feed_var_names
=
[]
for
var
in
sum
(
list
(
dist_context
.
serial_feed_vars
.
values
()),
[]):
feed_var_names
.
append
(
var
.
name
)
Remover
.
remove_no_need_vars
(
auto_parallel_main_prog
,
dist_params_grads
,
feed_var_names
)
@
staticmethod
@
staticmethod
def
remove_no_need_in_startup
(
auto_parallel_main_prog
,
def
remove_no_need_in_startup
(
auto_parallel_main_prog
,
...
...
python/paddle/distributed/fleet/base/distributed_strategy.py
浏览文件 @
61bc016c
...
@@ -1991,6 +1991,60 @@ class DistributedStrategy(object):
...
@@ -1991,6 +1991,60 @@ class DistributedStrategy(object):
else
:
else
:
print
(
"WARNING: auto-search should have value of bool type"
)
print
(
"WARNING: auto-search should have value of bool type"
)
@
property
def
qat
(
self
):
"""
Indicating whether we are using quantization training
Default Value: False
"""
return
self
.
strategy
.
qat
@
qat
.
setter
def
qat
(
self
,
flag
):
if
isinstance
(
flag
,
bool
):
self
.
strategy
.
qat
=
flag
else
:
print
(
"WARNING: qat should have value of bool type"
)
@
property
def
qat_configs
(
self
):
"""
Set quantization training configurations. In general, qat has serveral configurable
settings that can be configured through a dict.
**Notes**:
channel_wise_abs_max(bool): Whether to use `per_channel` quantization training. Default is True.
weight_bits(int): quantization bit number for weight. Default is 8.
activation_bits(int): quantization bit number for activation. Default is 8.
not_quant_pattern(list[str]): When the skip pattern is detected in an op's name scope,
the corresponding op will not be quantized.
algo(str): Other quantization training algorithm.
Exampless:
.. code-block:: python
import paddle.distributed.fleet as fleet
strategy = fleet.DistributedStrategy()
strategy.qat = True
strategy.qat_configs = {
"channel_wise_abs_max": True,
"weight_bits": 8,
"activation_bits: 8,
"not_quant_pattern": ['skip_quant']}
"""
return
get_msg_dict
(
self
.
strategy
.
qat_configs
)
@
qat_configs
.
setter
def
qat_configs
(
self
,
configs
):
check_configs_key
(
self
.
strategy
.
qat_configs
,
configs
,
"qat_configs"
)
assign_configs_value
(
self
.
strategy
.
qat_configs
,
configs
)
@
property
@
property
def
heter_ccl_mode
(
self
):
def
heter_ccl_mode
(
self
):
"""
"""
...
...
python/paddle/distributed/passes/__init__.py
浏览文件 @
61bc016c
...
@@ -19,6 +19,7 @@ from .auto_parallel_sharding import *
...
@@ -19,6 +19,7 @@ from .auto_parallel_sharding import *
from
.auto_parallel_amp
import
*
from
.auto_parallel_amp
import
*
from
.auto_parallel_fp16
import
*
from
.auto_parallel_fp16
import
*
from
.auto_parallel_recompute
import
*
from
.auto_parallel_recompute
import
*
from
.auto_parallel_quantization
import
*
from
.auto_parallel_data_parallel_optimization
import
*
from
.auto_parallel_data_parallel_optimization
import
*
from
.cpp_pass
import
*
from
.cpp_pass
import
*
import
os
import
os
...
...
python/paddle/distributed/passes/auto_parallel_quantization.py
0 → 100644
浏览文件 @
61bc016c
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
from
paddle.fluid
import
core
,
framework
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
from
paddle.fluid.contrib.slim.quantization
import
utils
from
paddle.fluid.contrib.slim.quantization
import
QuantizationTransformPassV2
from
paddle.fluid.contrib.slim.quantization
import
AddQuantDequantPassV2
from
paddle.fluid.contrib.slim.quantization
import
OutScaleForTrainingPass
from
paddle.distributed.auto_parallel.dist_attribute
import
OperatorDistributedAttribute
,
TensorDistributedAttribute
from
.pass_base
import
PassBase
,
register_pass
TRANSFORM_PASS_OP_TYPES
=
utils
.
_weight_supported_quantizable_op_type
QUANT_DEQUANT_PASS_OP_TYPES
=
utils
.
_act_supported_quantizable_op_type
def
_node_id
(
node
):
return
(
node
.
node
.
graph_id
(),
node
.
node
.
id
())
@
register_pass
(
"auto_parallel_quantization"
)
class
QuantizationPass
(
PassBase
):
def
__init__
(
self
):
super
(
QuantizationPass
,
self
).
__init__
()
self
.
set_attr
(
"dist_context"
,
None
)
self
.
set_attr
(
"params_grads"
,
None
)
def
_check_self
(
self
):
if
self
.
get_attr
(
"dist_context"
)
is
None
:
return
False
if
self
.
get_attr
(
"params_grads"
)
is
None
:
return
False
return
True
def
_check_conflict
(
self
,
other_pass
):
return
True
def
_apply_single_impl
(
self
,
main_program
,
startup_program
,
context
):
dist_context
=
self
.
get_attr
(
"dist_context"
)
params_grads
=
self
.
get_attr
(
"params_grads"
)
# TODO: scope and place will be removed,
# cause params should be initialized by engine module.
scope
=
paddle
.
static
.
global_scope
()
place
=
paddle
.
fluid
.
CUDAPlace
(
ParallelEnv
().
dev_id
)
# 1. Program convert to Graph, and this pass is only for train mode
main_graph
=
framework
.
IrGraph
(
core
.
Graph
(
main_program
.
desc
),
for_test
=
False
)
# 2. Prepare inputs
transform_pass_ops
=
[]
quant_dequant_ops
=
[]
quantize_op_types
=
[
'conv2d'
,
'depthwise_conv2d'
,
'mul'
,
'matmul'
,
'matmul_v2'
]
for
op_type
in
quantize_op_types
:
if
op_type
in
TRANSFORM_PASS_OP_TYPES
:
transform_pass_ops
.
append
(
op_type
)
elif
op_type
in
QUANT_DEQUANT_PASS_OP_TYPES
:
quant_dequant_ops
.
append
(
op_type
)
weight_quantize_type
=
"channel_wise_abs_max"
if
self
.
get_attr
(
'channel_wise_abs_max'
)
else
"abs_max"
# 3. Add quant op for ops which have parameters
transform_pass
=
QuantizationTransformPassV2
(
scope
=
scope
,
place
=
place
,
weight_bits
=
self
.
get_attr
(
'weight_bits'
),
activation_bits
=
self
.
get_attr
(
'activation_bits'
),
skip_pattern
=
self
.
get_attr
(
'not_quant_pattern'
),
activation_quantize_type
=
"moving_average_abs_max"
,
quantizable_op_type
=
transform_pass_ops
,
weight_quantize_type
=
weight_quantize_type
,
weight_quantize_func
=
None
,
act_quantize_func
=
None
,
weight_preprocess_func
=
None
,
act_preprocess_func
=
None
,
optimizer_func
=
None
,
executor
=
None
)
transform_pass
.
apply
(
main_graph
)
# 4. Add quant op for ops which don't have parameter
quant_dequant_pass
=
AddQuantDequantPassV2
(
scope
=
scope
,
place
=
place
,
quant_bits
=
self
.
get_attr
(
'activation_bits'
),
skip_pattern
=
self
.
get_attr
(
'not_quant_pattern'
),
quantizable_op_type
=
quant_dequant_ops
)
quant_dequant_pass
.
apply
(
main_graph
)
# 5. Gather quantitative information for the output
out_scale_training_pass
=
OutScaleForTrainingPass
(
scope
=
scope
,
place
=
place
)
out_scale_training_pass
.
apply
(
main_graph
)
# 6. Convert Graph back to Program
quant_program
=
main_graph
.
to_program
()
# 7. get new prams_grads from quant_program
new_params_grads
=
[]
for
param
,
grad
in
params_grads
:
if
param
.
name
not
in
quant_program
.
global_block
().
vars
:
continue
new_param
=
quant_program
.
global_block
().
vars
[
param
.
name
]
new_grad
=
quant_program
.
global_block
().
vars
[
grad
.
name
]
new_params_grads
.
append
((
new_param
,
new_grad
))
# 8. complete distributed attribution
# NOTE: hack implement, upgrading soon
for
ib
,
block
in
enumerate
(
quant_program
.
blocks
):
# recover origin ops' dist_attr and set quant ops' dist_attr
qat_offset
=
0
for
ip
,
quant_op
in
enumerate
(
block
.
ops
):
quant_op_dist_attr
=
OperatorDistributedAttribute
()
if
"quantize"
in
quant_op
.
type
or
\
quant_op
.
type
==
"moving_average_abs_max_scale"
:
input_name
=
quant_op
.
desc
.
input
(
'X'
)[
0
]
if
"quantize"
in
input_name
:
input_name
=
input_name
[:
input_name
.
index
(
".quantized"
)]
if
quant_op
.
type
==
"moving_average_abs_max_scale"
:
consume_op
=
main_program
.
blocks
[
ib
].
vars
[
input_name
].
op
else
:
consume_op
=
main_program
.
blocks
[
ib
].
ops
[
ip
-
qat_offset
]
consume_op_dist_attr
=
dist_context
.
get_dist_op_for_program
(
consume_op
).
dist_attr
ref_process_mesh
=
consume_op_dist_attr
.
process_mesh
if
input_name
in
consume_op_dist_attr
.
outputs_dist_attrs
:
consume_input_dist_attr
=
consume_op_dist_attr
.
outputs_dist_attrs
[
input_name
]
else
:
consume_input_dist_attr
=
consume_op_dist_attr
.
inputs_dist_attrs
[
input_name
]
quant_op_dist_attr
.
impl_idx
=
0
quant_op_dist_attr
.
impl_type
=
"default"
quant_op_dist_attr
.
process_mesh
=
ref_process_mesh
quant_op_dist_attr
.
set_input_dist_attr
(
quant_op
.
desc
.
input
(
'X'
)[
0
],
consume_input_dist_attr
)
for
slot_name
in
quant_op
.
desc
.
input_names
():
if
slot_name
==
"X"
:
continue
for
in_name
in
quant_op
.
desc
.
input
(
slot_name
):
input_var
=
block
.
vars
[
in_name
]
tensor_dist_attr
=
TensorDistributedAttribute
()
tensor_dist_attr
.
process_mesh
=
ref_process_mesh
tensor_dist_attr
.
dims_mapping
=
[
-
1
]
dist_context
.
set_tensor_dist_attr_for_program
(
input_var
,
tensor_dist_attr
)
quant_op_dist_attr
.
set_input_dist_attr
(
in_name
,
tensor_dist_attr
)
for
slot_name
in
quant_op
.
desc
.
output_names
():
output_name
=
quant_op
.
desc
.
output
(
slot_name
)[
0
]
output_var
=
block
.
vars
[
output_name
]
if
slot_name
==
"Y"
:
dist_context
.
set_tensor_dist_attr_for_program
(
output_var
,
consume_input_dist_attr
)
quant_op_dist_attr
.
set_output_dist_attr
(
output_name
,
consume_input_dist_attr
)
else
:
tensor_dist_attr
=
TensorDistributedAttribute
()
tensor_dist_attr
.
process_mesh
=
ref_process_mesh
tensor_dist_attr
.
dims_mapping
=
[
-
1
]
dist_context
.
set_tensor_dist_attr_for_program
(
output_var
,
tensor_dist_attr
)
quant_op_dist_attr
.
set_output_dist_attr
(
output_name
,
tensor_dist_attr
)
quant_op
.
_set_attr
(
"op_device"
,
""
)
qat_offset
+=
1
else
:
origin_op
=
main_program
.
blocks
[
ib
].
ops
[
ip
-
qat_offset
]
quant_op
.
desc
.
set_original_id
(
origin_op
.
desc
.
original_id
())
dist_origin_op
=
dist_context
.
get_dist_op_for_program
(
origin_op
)
assert
dist_origin_op
is
not
None
,
"origin op must have dist attr."
origin_op_dist_attr
=
dist_origin_op
.
dist_attr
quant_op_dist_attr
.
impl_idx
=
origin_op_dist_attr
.
impl_idx
quant_op_dist_attr
.
impl_type
=
origin_op_dist_attr
.
impl_type
quant_op_dist_attr
.
process_mesh
=
origin_op_dist_attr
.
process_mesh
for
idx
,
input_name
in
enumerate
(
quant_op
.
input_arg_names
):
origin_input_name
=
origin_op
.
input_arg_names
[
idx
]
origin_input_dist_attr
=
origin_op_dist_attr
.
inputs_dist_attrs
[
origin_input_name
]
quant_op_dist_attr
.
set_input_dist_attr
(
input_name
,
origin_input_dist_attr
)
if
input_name
not
in
main_program
.
blocks
[
ib
].
vars
:
origin_input_var
=
main_program
.
blocks
[
ib
].
vars
[
origin_input_name
]
origin_in_tensor_dist_attr
=
dist_context
.
get_dist_tensor_for_program
(
origin_input_var
).
dist_attr
quant_input_var
=
block
.
vars
[
input_name
]
dist_context
.
set_tensor_dist_attr_for_program
(
quant_input_var
,
origin_in_tensor_dist_attr
)
for
idx
,
output_name
in
enumerate
(
quant_op
.
output_arg_names
):
origin_output_name
=
origin_op
.
output_arg_names
[
idx
]
origin_output_dist_attr
=
origin_op_dist_attr
.
outputs_dist_attrs
[
origin_output_name
]
quant_op_dist_attr
.
set_output_dist_attr
(
output_name
,
origin_output_dist_attr
)
if
output_name
not
in
main_program
.
blocks
[
ib
].
vars
:
origin_output_var
=
main_program
.
blocks
[
ib
].
vars
[
origin_output_name
]
origin_out_tensor_dist_attr
=
dist_context
.
get_dist_tensor_for_program
(
origin_output_var
).
dist_attr
quant_output_var
=
block
.
vars
[
output_name
]
dist_context
.
set_tensor_dist_attr_for_program
(
quant_output_var
,
origin_out_tensor_dist_attr
)
dist_context
.
set_op_dist_attr_for_program
(
quant_op
,
quant_op_dist_attr
)
# recover vars' dist_attr
for
name
,
dst_var
in
block
.
vars
.
items
():
if
name
in
main_program
.
blocks
[
ib
].
vars
:
src_var
=
main_program
.
blocks
[
ib
].
vars
[
name
]
dist_tensor
=
dist_context
.
get_dist_tensor_for_program
(
src_var
)
if
not
dist_tensor
:
continue
dist_context
.
set_tensor_dist_attr_for_program
(
dst_var
,
dist_tensor
.
dist_attr
)
context
.
set_attr
(
"main_program"
,
quant_program
)
context
.
set_attr
(
"startup_program"
,
startup_program
)
context
.
set_attr
(
"params_grads"
,
new_params_grads
)
python/paddle/distributed/passes/auto_parallel_recompute.py
浏览文件 @
61bc016c
...
@@ -236,14 +236,14 @@ class RecomputePass(PassBase):
...
@@ -236,14 +236,14 @@ class RecomputePass(PassBase):
def
_check_conflict
(
self
,
other_pass
):
def
_check_conflict
(
self
,
other_pass
):
return
True
return
True
def
_apply_single_impl
(
self
,
main_program
s
,
startup_programs
,
context
):
def
_apply_single_impl
(
self
,
main_program
,
startup_program
,
context
):
checkpoints
=
self
.
get_attr
(
"checkpoints"
)
checkpoints
=
self
.
get_attr
(
"checkpoints"
)
loss
=
self
.
get_attr
(
"loss"
)
loss
=
self
.
get_attr
(
"loss"
)
no_grad_set
=
self
.
get_attr
(
"no_grad_set"
)
no_grad_set
=
self
.
get_attr
(
"no_grad_set"
)
self
.
_dist_context
=
self
.
get_attr
(
"dist_context"
)
self
.
_dist_context
=
self
.
get_attr
(
"dist_context"
)
main_block
=
main_program
s
.
global_block
()
main_block
=
main_program
.
global_block
()
no_grad_set_name
=
_get_stop_gradients
(
main_program
s
,
no_grad_set
)
no_grad_set_name
=
_get_stop_gradients
(
main_program
,
no_grad_set
)
# get op_path which is related to loss
# get op_path which is related to loss
op_path
=
_find_op_path_
(
main_block
,
[
loss
],
[],
no_grad_set_name
)
op_path
=
_find_op_path_
(
main_block
,
[
loss
],
[],
no_grad_set_name
)
...
@@ -373,7 +373,7 @@ class RecomputePass(PassBase):
...
@@ -373,7 +373,7 @@ class RecomputePass(PassBase):
ckpt_ops_dict
[
fwd_op_id
][
0
]
=
False
ckpt_ops_dict
[
fwd_op_id
][
0
]
=
False
main_block
.
_sync_with_cpp
()
main_block
.
_sync_with_cpp
()
main_program
s
.
_sync_with_cpp
()
main_program
.
_sync_with_cpp
()
def
reset_op_dist_attr
(
self
,
op
,
var_name_dict
):
def
reset_op_dist_attr
(
self
,
op
,
var_name_dict
):
op_dist_attr
=
self
.
_dist_context
.
get_op_dist_attr_for_program
(
op
)
op_dist_attr
=
self
.
_dist_context
.
get_op_dist_attr_for_program
(
op
)
...
...
python/paddle/fluid/contrib/slim/quantization/__init__.py
浏览文件 @
61bc016c
...
@@ -25,7 +25,8 @@ from .post_training_quantization import *
...
@@ -25,7 +25,8 @@ from .post_training_quantization import *
from
.
import
imperative
from
.
import
imperative
from
.imperative
import
*
from
.imperative
import
*
__all__
=
quantization_pass
.
__all__
__all__
=
[]
__all__
+=
quantization_pass
.
__all__
__all__
+=
quant_int8_mkldnn_pass
.
__all__
__all__
+=
quant_int8_mkldnn_pass
.
__all__
__all__
+=
quant2_int8_mkldnn_pass
.
__all__
__all__
+=
quant2_int8_mkldnn_pass
.
__all__
__all__
+=
post_training_quantization
.
__all__
__all__
+=
post_training_quantization
.
__all__
...
...
python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
浏览文件 @
61bc016c
...
@@ -42,6 +42,17 @@ _logger = get_logger(__name__,
...
@@ -42,6 +42,17 @@ _logger = get_logger(__name__,
fmt
=
'%(asctime)s-%(levelname)s: %(message)s'
)
fmt
=
'%(asctime)s-%(levelname)s: %(message)s'
)
def
lazy_import_fleet
(
layer_name_map
,
fake_quant_input_layers
):
from
paddle.distributed
import
fleet
layer_name_map
[
'ColumnParallelLinear'
]
=
fleet
.
meta_parallel
.
parallel_layers
.
mp_layers
.
ColumnParallelLinear
layer_name_map
[
'RowParallelLinear'
]
=
fleet
.
meta_parallel
.
parallel_layers
.
mp_layers
.
RowParallelLinear
fake_quant_input_layers
.
append
(
fleet
.
meta_parallel
.
RowParallelLinear
)
fake_quant_input_layers
.
append
(
fleet
.
meta_parallel
.
ColumnParallelLinear
)
return
layer_name_map
,
fake_quant_input_layers
class
ImperativeQuantAware
(
object
):
class
ImperativeQuantAware
(
object
):
"""
"""
Applying quantization aware training (QAT) to the dgraph model.
Applying quantization aware training (QAT) to the dgraph model.
...
@@ -300,13 +311,15 @@ class ImperativeQuantizeInputs(object):
...
@@ -300,13 +311,15 @@ class ImperativeQuantizeInputs(object):
Please refer to the args of ImperativeQuantAware.
Please refer to the args of ImperativeQuantAware.
"""
"""
super
(
ImperativeQuantizeInputs
,
self
).
__init__
()
super
(
ImperativeQuantizeInputs
,
self
).
__init__
()
self
.
layer_name_map
,
self
.
fake_quant_input_layers
=
lazy_import_fleet
(
utils
.
layer_name_map
,
utils
.
fake_quant_input_layers
)
self
.
_quantizable_layer_type
=
tuple
(
self
.
_quantizable_layer_type
=
tuple
(
utils
.
layer_name_map
[
layer
]
if
layer
in
self
.
layer_name_map
[
layer
]
if
layer
in
utils
.
layer_name_map
else
layer
for
layer
in
quantizable_layer_type
)
self
.
layer_name_map
else
layer
for
layer
in
quantizable_layer_type
)
for
layer
in
self
.
_quantizable_layer_type
:
for
layer
in
self
.
_quantizable_layer_type
:
assert
not
isinstance
(
layer
,
str
)
\
assert
not
isinstance
(
layer
,
str
)
\
and
layer
in
utils
.
fake_quant_input_layers
,
\
and
layer
in
self
.
fake_quant_input_layers
,
\
"%s is unspported to be quantized."
%
layer
"%s is unspported to be quantized."
%
layer
quantize_type
=
{
quantize_type
=
{
...
@@ -383,7 +396,7 @@ class ImperativeQuantizeInputs(object):
...
@@ -383,7 +396,7 @@ class ImperativeQuantizeInputs(object):
def
_get_input_quantized_layer
(
self
,
layer
):
def
_get_input_quantized_layer
(
self
,
layer
):
quant_layer_name
=
None
quant_layer_name
=
None
for
key
,
value
in
utils
.
layer_name_map
.
items
():
for
key
,
value
in
self
.
layer_name_map
.
items
():
if
isinstance
(
layer
,
value
):
if
isinstance
(
layer
,
value
):
quant_layer_name
=
'Quantized'
+
key
quant_layer_name
=
'Quantized'
+
key
break
break
...
...
python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
浏览文件 @
61bc016c
...
@@ -16,63 +16,38 @@ import math
...
@@ -16,63 +16,38 @@ import math
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
from
paddle.distributed
import
fleet
import
paddle.nn.quant.quant_layers
as
quant_layers
import
paddle.nn.quant.quant_layers
as
quant_layers
from
..utils
import
_get_op_input_var_names
,
_get_op_output_var_names
,
_get_output_name_index
,
_get_input_name_index
from
..utils
import
_get_op_input_var_names
,
_get_op_output_var_names
,
_get_output_name_index
,
_get_input_name_index
layer_name_map
=
{
layer_name_map
=
{
'Conv2DTranspose'
:
'Conv2DTranspose'
:
paddle
.
nn
.
Conv2DTranspose
,
paddle
.
nn
.
Conv2DTranspose
,
'Conv2D'
:
paddle
.
nn
.
Conv2D
,
'Conv2D'
:
'Linear'
:
paddle
.
nn
.
Linear
,
paddle
.
nn
.
Conv2D
,
'AdaptiveAvgPool2D'
:
paddle
.
nn
.
AdaptiveAvgPool2D
,
'Linear'
:
'AdaptiveMaxPool2D'
:
paddle
.
nn
.
AdaptiveMaxPool2D
,
paddle
.
nn
.
Linear
,
'AvgPool2D'
:
paddle
.
nn
.
AvgPool2D
,
'AdaptiveAvgPool2D'
:
'MaxPool2D'
:
paddle
.
nn
.
MaxPool2D
,
paddle
.
nn
.
AdaptiveAvgPool2D
,
'Hardswish'
:
paddle
.
nn
.
Hardswish
,
'AdaptiveMaxPool2D'
:
'LeakyReLU'
:
paddle
.
nn
.
LeakyReLU
,
paddle
.
nn
.
AdaptiveMaxPool2D
,
'PReLU'
:
paddle
.
nn
.
PReLU
,
'AvgPool2D'
:
'ReLU'
:
paddle
.
nn
.
ReLU
,
paddle
.
nn
.
AvgPool2D
,
'ReLU6'
:
paddle
.
nn
.
ReLU6
,
'MaxPool2D'
:
'Sigmoid'
:
paddle
.
nn
.
Sigmoid
,
paddle
.
nn
.
MaxPool2D
,
'Softmax'
:
paddle
.
nn
.
Softmax
,
'Hardswish'
:
'Swish'
:
paddle
.
nn
.
Swish
,
paddle
.
nn
.
Hardswish
,
'Tanh'
:
paddle
.
nn
.
Tanh
,
'LeakyReLU'
:
'Hardswish'
:
paddle
.
nn
.
Hardswish
,
paddle
.
nn
.
LeakyReLU
,
'BatchNorm'
:
paddle
.
nn
.
BatchNorm
,
'PReLU'
:
'GroupNorm'
:
paddle
.
nn
.
GroupNorm
,
paddle
.
nn
.
PReLU
,
'LayerNorm'
:
paddle
.
nn
.
LayerNorm
,
'ReLU'
:
paddle
.
nn
.
ReLU
,
'ReLU6'
:
paddle
.
nn
.
ReLU6
,
'Sigmoid'
:
paddle
.
nn
.
Sigmoid
,
'Softmax'
:
paddle
.
nn
.
Softmax
,
'Swish'
:
paddle
.
nn
.
Swish
,
'Tanh'
:
paddle
.
nn
.
Tanh
,
'Hardswish'
:
paddle
.
nn
.
Hardswish
,
'BatchNorm'
:
paddle
.
nn
.
BatchNorm
,
'GroupNorm'
:
paddle
.
nn
.
GroupNorm
,
'LayerNorm'
:
paddle
.
nn
.
LayerNorm
,
'ColumnParallelLinear'
:
fleet
.
meta_parallel
.
parallel_layers
.
mp_layers
.
ColumnParallelLinear
,
'RowParallelLinear'
:
fleet
.
meta_parallel
.
parallel_layers
.
mp_layers
.
RowParallelLinear
}
}
# Apply fake quant for the inputs of these layers
# Apply fake quant for the inputs of these layers
fake_quant_input_layers
=
[
fake_quant_input_layers
=
[
paddle
.
nn
.
Conv2D
,
paddle
.
nn
.
Linear
,
paddle
.
nn
.
Conv2DTranspose
,
paddle
.
nn
.
Conv2D
,
fleet
.
meta_parallel
.
RowParallel
Linear
,
paddle
.
nn
.
Linear
,
fleet
.
meta_parallel
.
ColumnParallelLinear
paddle
.
nn
.
Conv2DTranspose
,
]
]
# Apply fake quant for the output of these layers
# Apply fake quant for the output of these layers
...
...
python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
浏览文件 @
61bc016c
...
@@ -65,4 +65,5 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
...
@@ -65,4 +65,5 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
py_test_modules
(
test_process_mesh_v2 MODULES test_process_mesh_v2
)
py_test_modules
(
test_process_mesh_v2 MODULES test_process_mesh_v2
)
py_test_modules
(
test_dist_attr_v2 MODULES test_dist_attr_v2
)
py_test_modules
(
test_dist_attr_v2 MODULES test_dist_attr_v2
)
py_test_modules
(
test_lr_grad_clip MODULES test_lr_grad_clip
)
py_test_modules
(
test_lr_grad_clip MODULES test_lr_grad_clip
)
py_test_modules
(
test_quantization MODULES test_quantization
)
endif
()
endif
()
python/paddle/fluid/tests/unittests/auto_parallel/test_quantization.py
0 → 100644
浏览文件 @
61bc016c
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
sys
import
numpy
as
np
import
paddle
import
paddle.distributed.fleet
as
fleet
import
paddle.distributed.auto_parallel
as
auto
from
paddle.distributed.auto_parallel.engine
import
Engine
from
paddle.distributed.auto_parallel.utils
import
print_program_with_dist_attr
sys
.
path
.
append
(
".."
)
import
auto_parallel_gpt_model
as
modeling
from
auto_parallel_gpt_model
import
GPTModel
,
GPTForPretraining
,
GPTPretrainingCriterion
paddle
.
enable_static
()
class
FakeDataset
:
def
__init__
(
self
,
num_samples
,
sequence_len
,
vocab_size
):
self
.
num_samples
=
num_samples
self
.
sequence_len
=
sequence_len
self
.
vocab_size
=
vocab_size
def
__getitem__
(
self
,
idx
):
tokens
=
np
.
random
.
randint
(
self
.
vocab_size
,
size
=
self
.
sequence_len
)
position_ids
=
np
.
arange
(
self
.
sequence_len
)
attention_mask
=
np
.
tril
(
np
.
ones
(
self
.
sequence_len
)).
reshape
(
(
1
,
self
.
sequence_len
,
self
.
sequence_len
)).
astype
(
np
.
float32
)
labels
=
np
.
random
.
randint
(
self
.
vocab_size
,
size
=
self
.
sequence_len
)
loss_mask
=
np
.
ones
(
self
.
sequence_len
).
astype
(
np
.
float32
)
return
tokens
,
position_ids
,
attention_mask
,
labels
,
loss_mask
def
__len__
(
self
):
return
self
.
num_samples
def
apply_pass
():
dist_strategy
=
fleet
.
DistributedStrategy
()
dist_strategy
.
semi_auto
=
True
dist_strategy
.
qat
=
True
dist_strategy
.
qat_configs
=
{
'channel_wise_abs_max'
:
True
,
'weight_bits'
:
8
,
'activation_bits'
:
8
,
'not_quant_pattern'
:
[
'skip_quant'
],
}
return
dist_strategy
def
create_data_holder
(
batch_size
,
sequence_len
):
tokens
=
paddle
.
static
.
InputSpec
(
name
=
"tokens"
,
shape
=
[
batch_size
,
sequence_len
],
dtype
=
'int64'
)
position_ids
=
paddle
.
static
.
InputSpec
(
name
=
"position_ids"
,
shape
=
[
batch_size
,
sequence_len
],
dtype
=
'int64'
)
attention_mask
=
paddle
.
static
.
InputSpec
(
name
=
"attention_mask"
,
shape
=
[
batch_size
,
1
,
sequence_len
,
sequence_len
],
dtype
=
'float32'
)
labels
=
paddle
.
static
.
InputSpec
(
name
=
"labels"
,
shape
=
[
batch_size
,
sequence_len
],
dtype
=
'int64'
)
loss_mask
=
paddle
.
static
.
InputSpec
(
name
=
"loss_mask"
,
shape
=
[
batch_size
,
sequence_len
],
dtype
=
'float32'
)
return
[
tokens
,
position_ids
,
attention_mask
],
[
labels
,
loss_mask
]
def
get_gpt_model
():
modeling
.
init_global
()
modeling
.
_global_parallel_strategy
=
"serial"
modeling
.
_global_process_mesh
=
auto
.
ProcessMesh
(
mesh
=
[
0
])
gpt
=
GPTModel
(
vocab_size
=
1000
,
hidden_size
=
64
,
num_hidden_layers
=
2
,
num_attention_heads
=
8
,
intermediate_size
=
256
,
hidden_act
=
"gelu"
,
hidden_dropout_prob
=
0.0
,
attention_probs_dropout_prob
=
0.0
,
max_position_embeddings
=
1024
,
type_vocab_size
=
1
,
initializer_range
=
0.02
,
pad_token_id
=
0
,
eos_token_id
=
7
,
bos_token_id
=
0
,
eol_token_id
=
3
)
model
=
GPTForPretraining
(
gpt
,
vocab_size
=
1000
,
hidden_size
=
64
,
initializer_range
=
0.02
)
criterion
=
GPTPretrainingCriterion
()
return
model
,
criterion
class
TestQuantizationPass
(
unittest
.
TestCase
):
def
test_qat_pass
(
self
):
batch_size
=
8
batch_num
=
10
sequence_len
=
512
vocab_size
=
1000
strategy
=
apply_pass
()
model
,
loss
=
get_gpt_model
()
opt
=
paddle
.
optimizer
.
AdamW
(
learning_rate
=
0.00001
)
inputs_spec
,
labels_spec
=
create_data_holder
(
batch_size
=
batch_size
,
sequence_len
=
sequence_len
)
engine
=
Engine
(
model
,
inputs_spec
,
labels_spec
,
strategy
=
strategy
)
engine
.
prepare
(
optimizer
=
opt
,
loss
=
loss
)
dataset
=
FakeDataset
(
batch_size
*
batch_num
,
sequence_len
,
vocab_size
)
engine
.
fit
(
train_data
=
dataset
,
batch_size
=
batch_size
)
self
.
check_program
(
engine
.
main_program
)
def
check_program
(
self
,
program
):
quantizable_op_and_inputs
=
{
'matmul_v2'
:
[
'X'
,
'Y'
]}
quantizable_grad_op_inputs
=
{
'matmul_v2_grad'
:
[
'X'
,
'Y'
]}
quantized_ops
=
set
()
for
block
in
program
.
blocks
:
for
op
in
block
.
ops
:
is_quntized
=
False
if
op
.
type
in
quantizable_op_and_inputs
:
for
arg_name
in
op
.
input_arg_names
:
if
".quantized"
in
arg_name
:
is_quntized
=
True
if
not
is_quntized
:
continue
# check forward
if
op
.
type
in
quantizable_op_and_inputs
:
for
arg_name
in
op
.
input_arg_names
:
assert
arg_name
.
endswith
(
'.quantized.dequantized'
)
quantized_ops
.
add
(
arg_name
)
for
op
in
block
.
ops
:
is_quntized
=
False
if
op
.
type
in
quantizable_grad_op_inputs
:
for
pname
in
quantizable_grad_op_inputs
[
op
.
type
]:
arg_name
=
op
.
input
(
pname
)[
0
]
if
".quantized"
in
arg_name
:
is_quntized
=
True
if
not
is_quntized
:
continue
# check backward
if
op
.
type
in
quantizable_grad_op_inputs
:
for
pname
in
quantizable_grad_op_inputs
[
op
.
type
]:
arg_name
=
op
.
input
(
pname
)[
0
]
assert
arg_name
.
endswith
(
'.quantized.dequantized'
)
assert
arg_name
in
quantized_ops
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录