未验证 提交 973dab86 编写于 作者: W whs 提交者: GitHub

Enhance the quantization API with some new features (#50816)

上级 262358e8
...@@ -37,6 +37,7 @@ class LinearQuanterDequanter(Layer): ...@@ -37,6 +37,7 @@ class LinearQuanterDequanter(Layer):
@staticmethod @staticmethod
def from_quanter(quanter): def from_quanter(quanter):
assert quanter is not None
return LinearQuanterDequanter( return LinearQuanterDequanter(
LinearQuanter.from_quanter(quanter), LinearQuanter.from_quanter(quanter),
LinearDequanter.from_quanter(quanter), LinearDequanter.from_quanter(quanter),
...@@ -208,6 +209,8 @@ class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta): ...@@ -208,6 +209,8 @@ class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta):
self, quanter_name self, quanter_name
), f"{quanter_name} is not attribute of current layer." ), f"{quanter_name} is not attribute of current layer."
quanter = getattr(self, quanter_name) quanter = getattr(self, quanter_name)
if quanter is None:
return None
quanter = LinearQuanterDequanter.from_quanter(quanter) quanter = LinearQuanterDequanter.from_quanter(quanter)
setattr(self, quanter_name, quanter) setattr(self, quanter_name, quanter)
self._sub_layers[quanter_name] = quanter self._sub_layers[quanter_name] = quanter
...@@ -224,6 +227,7 @@ class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta): ...@@ -224,6 +227,7 @@ class ConvertibleQuantedLayer(Layer, metaclass=abc.ABCMeta):
assert not self.converted, "The model should be converted only once." assert not self.converted, "The model should be converted only once."
for weight_name, quanter_name in self.weights_to_quanters(): for weight_name, quanter_name in self.weights_to_quanters():
qdq = self._convert_quanter_to_qdq(quanter_name) qdq = self._convert_quanter_to_qdq(quanter_name)
if qdq is not None:
self._quant_weights(weight_name, qdq._quanter) self._quant_weights(weight_name, qdq._quanter)
qdq._quanter = None qdq._quanter = None
qdq._sub_layers['_quanter'] = None qdq._sub_layers['_quanter'] = None
......
...@@ -89,6 +89,7 @@ class QuantConfig(object): ...@@ -89,6 +89,7 @@ class QuantConfig(object):
self._type2config = {} self._type2config = {}
self._model = None self._model = None
self._qat_layer_mapping = copy.deepcopy(DEFAULT_QAT_LAYER_MAPPINGS) self._qat_layer_mapping = copy.deepcopy(DEFAULT_QAT_LAYER_MAPPINGS)
self._customized_qat_layer_mapping = dict()
self._customized_leaves = [] self._customized_leaves = []
...@@ -259,6 +260,7 @@ class QuantConfig(object): ...@@ -259,6 +260,7 @@ class QuantConfig(object):
source, paddle.nn.Layer source, paddle.nn.Layer
), "The target layer should be a subclass of paddle.nn.qat.Layer" ), "The target layer should be a subclass of paddle.nn.qat.Layer"
self._qat_layer_mapping[source] = target self._qat_layer_mapping[source] = target
self._customized_qat_layer_mapping[source] = target
def add_customized_leaf(self, layer_type: type): def add_customized_leaf(self, layer_type: type):
r""" r"""
...@@ -296,7 +298,11 @@ class QuantConfig(object): ...@@ -296,7 +298,11 @@ class QuantConfig(object):
def _get_qat_layer(self, layer: Layer): def _get_qat_layer(self, layer: Layer):
q_config = self._get_config_by_layer(layer) q_config = self._get_config_by_layer(layer)
return self.qat_layer_mappings[type(layer)](layer, q_config)
target_type = self._customized_qat_layer_mapping.get(
type(layer), self.qat_layer_mappings.get(type(layer))
)
return target_type(layer, q_config)
def _has_observer_config(self, layer: Layer): def _has_observer_config(self, layer: Layer):
r""" r"""
...@@ -397,6 +403,7 @@ class QuantConfig(object): ...@@ -397,6 +403,7 @@ class QuantConfig(object):
for child in model.children(): for child in model.children():
layer_prefix = child.full_name() layer_prefix = child.full_name()
config = self._layer2config.get(model, self.global_config) config = self._layer2config.get(model, self.global_config)
config = self._type2config.get(type(child), config) config = self._type2config.get(type(child), config)
config = self._prefix2config.get(layer_prefix, config) config = self._prefix2config.get(layer_prefix, config)
if config is not None: if config is not None:
...@@ -413,11 +420,11 @@ class QuantConfig(object): ...@@ -413,11 +420,11 @@ class QuantConfig(object):
return self._details_helper(self._model) return self._details_helper(self._model)
def _details_helper(self, layer: Layer): def _details_helper(self, layer: Layer):
extra_lines = []
sublayer_lines = [] sublayer_lines = []
for name, sublayer in layer.named_children(): for name, sublayer in layer.named_children():
sublayer_str = self._details_helper(sublayer) sublayer_str = self._details_helper(sublayer)
sublayer_str = self._addindent(sublayer_str, 2) sublayer_str = self._addindent(sublayer_str, 2)
if sublayer in self._layer2config:
sublayer_lines.append( sublayer_lines.append(
'(' '('
+ name + name
...@@ -428,11 +435,6 @@ class QuantConfig(object): ...@@ -428,11 +435,6 @@ class QuantConfig(object):
) )
final_str = layer.__class__.__name__ + '(' final_str = layer.__class__.__name__ + '('
if extra_lines:
if len(extra_lines) > 1:
final_str += '\n ' + '\n '.join(extra_lines) + '\n'
elif len(extra_lines) == 1:
final_str += extra_lines[0]
if sublayer_lines: if sublayer_lines:
final_str += '\n ' + '\n '.join(sublayer_lines) + '\n' final_str += '\n ' + '\n '.join(sublayer_lines) + '\n'
......
...@@ -12,9 +12,11 @@ ...@@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle
from paddle import _legacy_C_ops from paddle import _legacy_C_ops
from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.fluid.framework import _varbase_creator from paddle.fluid.framework import _varbase_creator
from paddle.framework import ParamAttr from paddle.framework import ParamAttr, core
from paddle.nn.initializer import Constant from paddle.nn.initializer import Constant
from paddle.utils import unique_name from paddle.utils import unique_name
...@@ -142,7 +144,7 @@ class FakeQuanterWithAbsMaxObserverLayer(BaseQuanter): ...@@ -142,7 +144,7 @@ class FakeQuanterWithAbsMaxObserverLayer(BaseQuanter):
) )
self._accum.stop_gradient = True self._accum.stop_gradient = True
def forward(self, input): def dynamic_forward(self, input):
attrs = ( attrs = (
'moving_rate', 'moving_rate',
self._moving_rate, self._moving_rate,
...@@ -181,6 +183,46 @@ class FakeQuanterWithAbsMaxObserverLayer(BaseQuanter): ...@@ -181,6 +183,46 @@ class FakeQuanterWithAbsMaxObserverLayer(BaseQuanter):
return out return out
def static_forward(self, input):
check_variable_and_dtype(
input, 'input', ['float32'], "FakeQuantMovingAverageAbsMax"
)
attrs = {
'moving_rate': self._moving_rate,
'bit_length': self._bit_length,
'is_test': not self.training,
}
inputs = {"X": [input], "InScale": [self._scale]}
quant_out = self._helper.create_variable(
name="{}.quantized.dequantized".format(input.name),
dtype=input.dtype,
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
stop_gradient=False,
)
outputs = {"Out": [quant_out], "OutScale": [self._scale]}
if self.training:
inputs['InState'] = [self._state]
inputs['InAccum'] = [self._accum]
outputs['OutState'] = [self._state]
outputs['OutAccum'] = [self._accum]
self._helper.append_op(
type="fake_quantize_dequantize_moving_average_abs_max",
inputs=inputs,
outputs=outputs,
attrs=attrs,
)
return quant_out
def forward(self, input):
if paddle.framework.in_dynamic_mode():
return self.dynamic_forward(input)
else:
return self.static_forward(input)
def bit_length(self): def bit_length(self):
return self._bit_length return self._bit_length
......
...@@ -84,11 +84,13 @@ class Quantization(object, metaclass=abc.ABCMeta): ...@@ -84,11 +84,13 @@ class Quantization(object, metaclass=abc.ABCMeta):
def _convert_to_quant_layers(self, model: Layer, config: QuantConfig): def _convert_to_quant_layers(self, model: Layer, config: QuantConfig):
replaced = {} replaced = {}
for name, child in model.named_children(): for name, child in model.named_children():
if config._is_quantifiable(child): if (
if type(child) not in config.qat_layer_mappings: config._is_quantifiable(child)
self._convert_to_quant_layers(child, config) and type(child) in config.qat_layer_mappings
else: ):
replaced[name] = config._get_qat_layer(child) replaced[name] = config._get_qat_layer(child)
else:
self._convert_to_quant_layers(child, config)
for key, value in replaced.items(): for key, value in replaced.items():
model._sub_layers[key] = value model._sub_layers[key] = value
......
# copyright (c) 2023 paddlepaddle authors. all rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""The quantizer layers should be traced by paddle.jit.save function."""
import os
import tempfile
import unittest
import paddle
from paddle.quantization import QAT, QuantConfig
from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
from paddle.quantization.quanters.abs_max import (
FakeQuanterWithAbsMaxObserverLayer,
)
from paddle.vision.models import resnet18
class TestPTQ(unittest.TestCase):
def setUp(self):
self.temp_dir = tempfile.TemporaryDirectory(dir="./")
self.path = os.path.join(self.temp_dir.name, 'ptq')
def tearDown(self):
self.temp_dir.cleanup()
def _get_model_for_qat(self):
observer = FakeQuanterWithAbsMaxObserver()
model = resnet18()
model.train()
q_config = QuantConfig(activation=None, weight=None)
q_config.add_type_config(
paddle.nn.Conv2D, activation=observer, weight=observer
)
qat = QAT(q_config)
quant_model = qat.quantize(model)
return quant_model, qat
def _count_layers(self, model, layer_type):
count = 0
for _layer in model.sublayers(True):
if isinstance(_layer, layer_type):
count += 1
return count
def test_trace(self):
quant_model, ptq = self._get_model_for_qat()
image = paddle.rand([1, 3, 32, 32], dtype="float32")
quantizer_count_in_dygraph = self._count_layers(
quant_model, FakeQuanterWithAbsMaxObserverLayer
)
save_path = os.path.join(self.path, 'int8_infer')
paddle.jit.save(quant_model, save_path, [image])
print(f"quant_model is saved into {save_path}")
paddle.enable_static()
exe = paddle.static.Executor(paddle.CPUPlace())
main_program = paddle.static.Program()
startup_program = paddle.static.Program()
with paddle.static.program_guard(main_program, startup_program):
[
inference_program,
feed_target_names,
fetch_targets,
] = paddle.static.load_inference_model(save_path, exe)
quantizer_count_in_static_model = 0
for _op in inference_program.global_block().ops:
if _op.type == "fake_quantize_dequantize_moving_average_abs_max":
quantizer_count_in_static_model += 1
self.assertEqual(
quantizer_count_in_dygraph, quantizer_count_in_static_model
)
paddle.disable_static()
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册