未验证 提交 00c85a74 编写于 作者: C cc 提交者: GitHub

[Dygraph QAT] Save all scales to target ops and Move quant layers to paddle.nn.quant (#33871)

* Save all scales to target ops
* Move quant layers to paddle.nn.quant
上级 ea1a0d45
......@@ -14,9 +14,6 @@
from __future__ import print_function
from . import quant_nn
from .quant_nn import *
from . import qat
from .qat import *
......@@ -33,7 +30,6 @@ from . import ptq_registry
from .ptq_registry import *
__all__ = []
__all__ += quant_nn.__all__
__all__ += qat.__all__
__all__ += ptq.__all__
__all__ += ptq_config.__all__
......
......@@ -20,6 +20,7 @@ import os
import warnings
import paddle
import paddle.nn.quant.quant_layers as quant_layers
from paddle.fluid import dygraph, core, framework, unique_name
from paddle.fluid.executor import Executor, global_scope
from paddle.fluid.param_attr import ParamAttr
......@@ -28,7 +29,6 @@ from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.fluid.io import load_inference_model, save_inference_model
from paddle.fluid.log_helper import get_logger
from .. import quantization_pass
from . import quant_nn
from . import utils
__all__ = ['ImperativeQuantAware']
......@@ -39,7 +39,7 @@ _logger = get_logger(
class ImperativeQuantAware(object):
"""
Applying quantization aware training (QAT) to dgraph model.
Applying quantization aware training (QAT) to the dgraph model.
"""
def __init__(self,
......@@ -329,12 +329,12 @@ class ImperativeQuantizeInputs(object):
"The layer %s is unsupported to be quantized." \
% layer.full_name()
return quant_nn.__dict__[quant_layer_name](layer, **self._kwargs)
return quant_layers.__dict__[quant_layer_name](layer, **self._kwargs)
class ImperativeQuantizeOutputs(object):
"""
Calculate the output scales for some layers.
Calculate the output scales for target layers.
"""
def __init__(self, moving_rate=0.9):
......@@ -371,11 +371,11 @@ class ImperativeQuantizeOutputs(object):
utils.find_parent_layer_and_sub_name(model, cur_name)
if isinstance(cur_layer, tuple(utils.fake_quant_output_layers)):
cur_quant_layer = quant_nn.FakeQuantMAOutputScaleLayer(
cur_quant_layer = quant_layers.FakeQuantMAOutputScaleLayer(
cur_layer, self._moving_rate)
else:
cur_quant_layer = quant_nn.MAOutputScaleLayer(cur_layer,
self._moving_rate)
cur_quant_layer = quant_layers.MAOutputScaleLayer(
cur_layer, self._moving_rate)
setattr(parent_layer, sub_name, cur_quant_layer)
......@@ -433,7 +433,7 @@ class ImperativeQuantizeOutputs(object):
model_filename=model_filename,
params_filename=params_filename))
self._save_output_scale(infer_program, scope)
self._gather_scales(infer_program, scope)
self._set_skip_quant_attr(infer_program)
......@@ -455,36 +455,79 @@ class ImperativeQuantizeOutputs(object):
"""
flag = False
if isinstance(layer, dygraph.Layer):
# exclude fake_quant ops in quant_nn file
# exclude fake_quant ops in quant_layers file
if utils.is_leaf_layer(layer) and \
not isinstance(layer, tuple(utils.fake_quant_leaf_layers)):
flag = True
# consider QuantizedConv2D and QuantizedLinear ops
if isinstance(layer, tuple(utils.fake_quant_wrap_layers)):
flag = True
if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer):
flag = True
if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer):
flag = True
return flag
def _save_output_scale(self, program, scope):
def _gather_scales(self, program, scope):
"""
Save all output scales to the corresponding ops in static
inference program and delete 'moving_average_abs_max_scale' ops.
Get all scales from fake ops, save them into the corresponding ops
and delete all moving_average_abs_max_scale ops.
"""
for block in program.blocks:
for op in block.ops:
if op.type == "moving_average_abs_max_scale":
in_var_name = op.input('X')[0]
out_var_name = op.output('Out')[0]
out_scale_name = op.output('OutScale')[0]
out_scale = utils.load_variable_data(scope, out_scale_name)
previous_op = utils.find_previous_op(block, in_var_name)
previous_op._set_attr("out_threshold", float(out_scale))
next_ops = utils.find_next_ops(block, out_var_name)
for next_op in next_ops:
next_op._rename_input(out_var_name, in_var_name)
def _gather_input_scale():
target_ops = []
skip_ops = utils.fake_quantize_dequantize_op_types + \
["moving_average_abs_max_scale"]
for block in program.blocks:
for op in block.ops:
if op.type not in skip_ops:
target_ops.append(op)
for op in target_ops:
for in_var_name in utils._get_op_input_var_names(op):
previous_op = utils.find_previous_op(op.block, in_var_name)
if previous_op is not None and \
("quantize_dequantize" in previous_op.type or \
previous_op.type == "moving_average_abs_max_scale"):
scale_name = previous_op.output('OutScale')[0]
in_scale = utils.load_variable_data(scope, scale_name)
in_scale = utils.fp_numpy_to_naive(in_scale)
argname, index = utils._get_input_name_index(
op, in_var_name)
op._set_attr(argname + str(index) + "_threshold",
in_scale)
def _gather_output_scale():
target_ops = []
for block in program.blocks:
for op in block.ops:
if op.type == "moving_average_abs_max_scale":
target_ops.append(op)
for op in target_ops:
in_var_name = op.input('X')[0]
out_var_name = op.output('Out')[0]
block = op.block
previous_op = utils.find_previous_op(block, in_var_name)
next_ops = utils.find_next_ops(block, out_var_name)
out_scale_name = op.output('OutScale')[0]
out_scale = utils.load_variable_data(scope, out_scale_name)
out_scale = utils.fp_numpy_to_naive(out_scale)
if previous_op.type != "feed":
argname, index = utils._get_output_name_index(previous_op,
in_var_name)
previous_op._set_attr(argname + str(index) + "_threshold",
out_scale)
previous_op._set_attr("out_threshold", out_scale)
for next_op in next_ops:
next_op._rename_input(out_var_name, in_var_name)
_gather_input_scale()
_gather_output_scale()
def _set_skip_quant_attr(self, program):
"""
......
......@@ -16,8 +16,12 @@ import math
import numpy as np
import paddle
import paddle.nn.quant.quant_layers as quant_layers
from . import quant_nn
from ..quantization_pass import _get_op_input_var_names
from ..quantization_pass import _get_op_output_var_names
from ..quantization_pass import _get_output_name_index
from ..quantization_pass import _get_input_name_index
layer_name_map = {
'Conv2D': paddle.nn.Conv2D,
......@@ -54,13 +58,15 @@ fake_quant_output_layers = [
]
fake_quant_leaf_layers = [
quant_nn.FakeQuantAbsMax,
quant_nn.FakeQuantChannelWiseAbsMax,
quant_nn.FakeQuantMovingAverageAbsMax,
quant_nn.MovingAverageAbsMaxScale,
quant_layers.FakeQuantAbsMax,
quant_layers.FakeQuantChannelWiseAbsMax,
quant_layers.FakeQuantMovingAverageAbsMax,
quant_layers.MovingAverageAbsMaxScale,
]
fake_quant_wrap_layers = [quant_nn.QuantizedConv2D, quant_nn.QuantizedLinear]
fake_quant_wrap_layers = [
quant_layers.QuantizedConv2D, quant_layers.QuantizedLinear
]
# The weight format of these layers is Cin * Cout * H * W
spec_channel_axis_layers = [paddle.nn.Conv2D, paddle.nn.Conv2DTranspose]
......@@ -94,6 +100,7 @@ def find_previous_op(block, var_name):
for op in block.ops:
if var_name in op.output_arg_names:
return op
return None
def find_next_ops(block, var_name):
......@@ -244,3 +251,10 @@ def cal_kl_scaling_factor(hist, abs_max, bits):
break
min_kl_index = starting_iter
return (min_kl_index + 0.5) * bin_width
def fp_numpy_to_naive(x_np):
if x_np.size == 1:
return float(x_np)
else:
return x_np.tolist()
......@@ -141,12 +141,21 @@ _channelwise_quant_axis1_ops = ['conv2d_transpose', 'mul']
def _get_op_input_var_names(op):
""" """
"""
Get the input var names of the op.
Args:
op(IrNode, Operator): the input op.
Returns:
input_var_names or None.
"""
assert isinstance(op, (IrNode, Operator)), \
"The input op should be IrNode or Operator."
var_names = []
op_name = op.name() if isinstance(op, IrNode) \
else op.type
if op_name not in _op_real_in_out_name:
return []
name_list = _op_real_in_out_name[op_name][0]
for name in name_list:
var_name = op.input(name)
......@@ -163,6 +172,9 @@ def _get_input_name_index(op, input_var_name):
"The input op should be IrNode or Operator."
op_name = op.name() if isinstance(op, IrNode) \
else op.type
if op_name not in _op_real_in_out_name:
return None
res = None
for argname in _op_real_in_out_name[op_name][0]:
var_names = op.input(argname)
......@@ -179,6 +191,9 @@ def _get_op_output_var_names(op):
var_names = []
op_name = op.name() if isinstance(op, IrNode) \
else op.type
if op_name not in _op_real_in_out_name:
return []
name_list = _op_real_in_out_name[op_name][1]
for name in name_list:
var_name = op.output(name)
......@@ -195,6 +210,9 @@ def _get_output_name_index(op, output_var_name):
"The input op should be IrNode or Operator."
op_name = op.name() if isinstance(op, IrNode) \
else op.type
if op_name not in _op_real_in_out_name:
return None
name_list = _op_real_in_out_name[op_name][1]
res = None
for name in name_list:
......
......@@ -31,7 +31,7 @@ from paddle.fluid.dygraph.container import Sequential
from paddle.nn import Linear, Conv2D, Softmax
from paddle.fluid.log_helper import get_logger
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.fluid.contrib.slim.quantization.imperative.quant_nn import QuantizedConv2D
from paddle.nn.quant.quant_layers import QuantizedConv2D
from imperative_test_utils import fix_model_dict, ImperativeLenet
......
......@@ -20,7 +20,7 @@ import paddle
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.contrib.slim.quantization.imperative import quant_nn
import paddle.nn.quant.quant_layers as quant_layers
paddle.enable_static()
......@@ -45,7 +45,7 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
name='image', shape=[784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
fc_tmp = fluid.layers.fc(image, size=10, act='softmax')
out_scale = quant_nn.MovingAverageAbsMaxScale(
out_scale = quant_layers.MovingAverageAbsMaxScale(
name=fc_tmp.name, dtype=fc_tmp.dtype)
fc_tmp_1 = out_scale(fc_tmp)
cross_entropy = fluid.layers.softmax_with_cross_entropy(fc_tmp,
......
......@@ -21,5 +21,6 @@ from .functional_layers import reshape # noqa: F401
from .functional_layers import transpose # noqa: F401
from .functional_layers import concat # noqa: F401
from .functional_layers import flatten # noqa: F401
from .quant_layers import QuantStub # noqa: F401
__all__ = []
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册