From 8967a66a21758680fda8f6d658ed627e471429ae Mon Sep 17 00:00:00 2001 From: XGZhang <46363693+XGZhang11@users.noreply.github.com> Date: Wed, 18 Aug 2021 14:18:23 +0800 Subject: [PATCH] support quantization of conv2d_transpose (#34547) --- .../slim/quantization/imperative/qat.py | 101 +++++++++++++---- .../slim/quantization/imperative/utils.py | 19 +++- .../contrib/slim/tests/test_imperative_qat.py | 10 +- .../tests/test_imperative_qat_user_defined.py | 19 ++++ python/paddle/nn/quant/quant_layers.py | 107 ++++++++++++++++++ tools/sampcd_processor.py | 1 + 6 files changed, 225 insertions(+), 32 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index b8c0e47e9bb..6208b43c9e9 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -42,17 +42,18 @@ class ImperativeQuantAware(object): Applying quantization aware training (QAT) to the dgraph model. """ - def __init__(self, - quantizable_layer_type=['Conv2D', 'Linear'], - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max', - weight_bits=8, - activation_bits=8, - moving_rate=0.9, - weight_preprocess_layer=None, - act_preprocess_layer=None, - weight_quantize_layer=None, - act_quantize_layer=None): + def __init__( + self, + quantizable_layer_type=['Conv2D', 'Linear', 'Conv2DTranspose'], + weight_quantize_type='abs_max', + activation_quantize_type='moving_average_abs_max', + weight_bits=8, + activation_bits=8, + moving_rate=0.9, + weight_preprocess_layer=None, + act_preprocess_layer=None, + weight_quantize_layer=None, + act_quantize_layer=None): """ The constructor for ImperativeQuantAware. @@ -212,9 +213,44 @@ class ImperativeQuantAware(object): the out_scale value of outputs would be calculated. Args: - model(fluid.dygraph.Layer): the model to be quantized. + model(paddle.nn.Layer): the model to be quantized. Returns: None + + Examples: + .. code-block:: python + + import paddle + from paddle.fluid.contrib.slim.quantization \ + import ImperativeQuantAware + + class ImperativeModel(paddle.nn.Layer): + def __init__(self): + super(ImperativeModel, self).__init__() + # self.linear_0 would skip the quantization. + self.linear_0 = paddle.nn.Linear(784, 400) + self.linear_0.skip_quant = True + + # self.linear_1 would not skip the quantization. + self.linear_1 = paddle.nn.Linear(400, 10) + self.linear_1.skip_quant = False + + def forward(self, inputs): + x = self.linear_0(inputs) + x = self.linear_1(inputs) + return x + + model = ImperativeModel() + imperative_qat = ImperativeQuantAware( + weight_quantize_type='abs_max', + activation_quantize_type='moving_average_abs_max') + + # Add the fake quant logical. + # The original model will be rewrite. + # + # There is only one Layer(self.linear1) would be added the + # fake quant logical. + imperative_qat.quantize(model) """ assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." @@ -232,17 +268,18 @@ class ImperativeQuantizeInputs(object): logic both for activation inputs and weight inputs. """ - def __init__(self, - quantizable_layer_type=['Conv2D', 'Linear'], - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max', - weight_bits=8, - activation_bits=8, - moving_rate=0.9, - weight_preprocess_layer=None, - act_preprocess_layer=None, - weight_quantize_layer=None, - act_quantize_layer=None): + def __init__( + self, + quantizable_layer_type=['Conv2D', 'Linear', 'Conv2DTranspose'], + weight_quantize_type='abs_max', + activation_quantize_type='moving_average_abs_max', + weight_bits=8, + activation_bits=8, + moving_rate=0.9, + weight_preprocess_layer=None, + act_preprocess_layer=None, + weight_quantize_layer=None, + act_quantize_layer=None): """ The constructor for ImperativeQuantizeInputs. @@ -303,6 +340,18 @@ class ImperativeQuantizeInputs(object): } def apply(self, model): + """ + Quantize the weights and activations to calculate for specific + layers. + + Args: + model(paddle.nn.Layer): The target model which would + calculate the input quantization scale. + + Returns: + None + """ + assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." @@ -354,7 +403,7 @@ class ImperativeQuantizeOutputs(object): output scales for specific layers in the dygraph model. Args: - model(fluid.dygraph.Layer): The target model which would be + model(paddle.nn.Layer): The target model which would be calculate the output quantization scale. Returns: @@ -544,7 +593,9 @@ class ImperativeQuantizeOutputs(object): 1. the type of input op should be conv2d, depthwise_conv2d or matmul 2. the previous ops of the input op are not fake_quantize_dequantize ops """ - target_op_types = ["conv2d", "depthwise_conv2d", "matmul"] + target_op_types = [ + "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose" + ] if in_op.type not in target_op_types: return False diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index a9d52c5a87a..009ce372b4f 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -24,6 +24,7 @@ from ..quantization_pass import _get_output_name_index from ..quantization_pass import _get_input_name_index layer_name_map = { + 'Conv2DTranspose': paddle.nn.Conv2DTranspose, 'Conv2D': paddle.nn.Conv2D, 'Linear': paddle.nn.Linear, 'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D, @@ -46,8 +47,9 @@ layer_name_map = { } # Apply fake quant for the inputs of these layers -# TODO (jc): support paddle.nn.Conv2DTranspose -fake_quant_input_layers = [paddle.nn.Conv2D, paddle.nn.Linear] +fake_quant_input_layers = [ + paddle.nn.Conv2D, paddle.nn.Linear, paddle.nn.Conv2DTranspose +] # Apply fake quant for the output of these layers # TODO(jc): fix the problem of adding duplicate fake_quant ops @@ -65,7 +67,8 @@ fake_quant_leaf_layers = [ ] fake_quant_wrap_layers = [ - quant_layers.QuantizedConv2D, quant_layers.QuantizedLinear + quant_layers.QuantizedConv2D, quant_layers.QuantizedLinear, + quant_layers.QuantizedConv2DTranspose ] # The weight format of these layers is Cin * Cout * H * W @@ -84,9 +87,9 @@ fake_quantize_dequantize_op_types = [ def load_variable_data(scope, var_name): - ''' + """ Load variable value from scope - ''' + """ var_node = scope.find_var(var_name) assert var_node is not None, \ "Can not find " + var_name + " in the scope." @@ -120,6 +123,12 @@ def find_parent_layer_and_sub_name(model, name): the sub_name of the layer. For example, if name is 'block_1/convbn_1/conv_1', the parent layer is 'block_1/convbn_1' and the sub_name is `conv_1`. + Args: + model(paddle.nn.Layer): the model to be quantized. + name(string): the name of a layer + + Returns: + parent_layer, subname """ assert isinstance(model, paddle.nn.Layer), \ "The model must be the instance of paddle.nn.Layer." diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index 14fa291ee07..677ccb52e24 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -28,10 +28,10 @@ from paddle.fluid import core from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.dygraph.container import Sequential -from paddle.nn import Linear, Conv2D, Softmax +from paddle.nn import Linear, Conv2D, Softmax, Conv2DTranspose from paddle.fluid.log_helper import get_logger from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX -from paddle.nn.quant.quant_layers import QuantizedConv2D +from paddle.nn.quant.quant_layers import QuantizedConv2D, QuantizedConv2DTranspose from imperative_test_utils import fix_model_dict, ImperativeLenet @@ -75,6 +75,12 @@ class TestImperativeQat(unittest.TestCase): data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') quant_conv1(fluid.dygraph.to_variable(data)) + conv_transpose = Conv2DTranspose(4, 6, (3, 3)) + quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose) + x_var = paddle.uniform( + (2, 4, 8, 8), dtype='float32', min=-1.0, max=1.0) + quant_conv_transpose(x_var) + seed = 1 np.random.seed(seed) fluid.default_main_program().random_seed = seed diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py index 621213beb31..270e8ee566a 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_user_defined.py @@ -28,6 +28,7 @@ from paddle.nn import Sequential from paddle.fluid.dygraph import Conv2D from paddle.fluid.dygraph import Pool2D from paddle.fluid.dygraph import Linear +from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose from paddle.fluid.log_helper import get_logger os.environ["CPU_NUM"] = "1" @@ -100,6 +101,19 @@ class CustomQAT(nn.Layer): return x +class ModelForConv2dT(nn.Layer): + def __init__(self, num_classes=10): + super(ModelForConv2dT, self).__init__() + self.features = nn.Conv2DTranspose(4, 6, (3, 3)) + self.fc = Linear(input_dim=600, output_dim=num_classes) + + def forward(self, inputs): + x = self.features(inputs) + x = paddle.flatten(x, 1) + x = self.fc(x) + return x + + class ImperativeLenet(paddle.nn.Layer): def __init__(self, num_classes=10, classifier_activation='softmax'): super(ImperativeLenet, self).__init__() @@ -168,6 +182,11 @@ class TestUserDefinedActPreprocess(unittest.TestCase): imperative_qat.quantize(lenet) adam = Adam(learning_rate=0.001, parameters=lenet.parameters()) dynamic_loss_rec = [] + #for CI coverage + conv_transpose = ModelForConv2dT() + imperative_qat.quantize(conv_transpose) + x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.) + conv_transpose(x_var) def train(model): adam = Adam(learning_rate=0.001, parameters=model.parameters()) diff --git a/python/paddle/nn/quant/quant_layers.py b/python/paddle/nn/quant/quant_layers.py index 5573683ebd0..040b04f5e7b 100644 --- a/python/paddle/nn/quant/quant_layers.py +++ b/python/paddle/nn/quant/quant_layers.py @@ -31,6 +31,7 @@ __all__ = [ 'FakeQuantMovingAverageAbsMax', 'FakeQuantChannelWiseAbsMax', 'QuantizedConv2D', + 'QuantizedConv2DTranspose', 'QuantizedLinear', 'MovingAverageAbsMaxScale', 'MAOutputScaleLayer', @@ -481,6 +482,112 @@ class QuantizedConv2D(layers.Layer): data_format=self._data_format) +class QuantizedConv2DTranspose(layers.Layer): + """ + The computational logic of QuantizedConv2DTranspose is the same with Conv2DTranspose. + The only difference is that its inputs are all fake quantized. + + Examples: + .. code-block:: python + import paddle + import paddle.nn as nn + from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose + x_var = paddle.uniform((2, 4, 8, 8), dtype='float32', min=-1., max=1.) + conv = nn.Conv2DTranspose(4, 6, (3, 3)) + conv_quantized = QuantizedConv2DTranspose(conv) + y_quantized = conv_quantized(x_var) + y_var = conv(x_var) + y_quantized_np = y_quantized.numpy() + y_np = y_var.numpy() + print(y_np.shape, y_quantized_np.shape) + # (2, 6, 10, 10), (2, 6, 10, 10) + """ + + def __init__(self, + layer, + weight_bits=8, + activation_bits=8, + moving_rate=0.9, + weight_quantize_type='abs_max', + activation_quantize_type='abs_max', + weight_pre_layer=None, + act_pre_layer=None, + weight_quant_layer=None, + act_quant_layer=None): + r""" + Constructor. + + The arguments are the same as ImperativeQuantAware. + """ + super(QuantizedConv2DTranspose, self).__init__() + # For Conv2DTranspose + self._groups = getattr(layer, '_groups') + self._stride = getattr(layer, '_stride') + self._padding = getattr(layer, '_padding') + self._output_padding = getattr(layer, 'output_padding') + self._dilation = getattr(layer, '_dilation') + self._data_format = getattr(layer, '_data_format') + self.weight = getattr(layer, 'weight') + self.bias = getattr(layer, 'bias') + # For FakeQuant + self._conv2d_transpose_quant_axis = 1 + if weight_quant_layer is not None: + self._fake_quant_weight = weight_quant_layer() + else: + self._fake_quant_weight = _get_fake_quant_type( + weight_quantize_type, + name=self.weight.name, + moving_rate=moving_rate, + quant_bits=weight_bits, + dtype=self._dtype, + quant_on_weight=True, + channel_num=self.weight.shape[ + self._conv2d_transpose_quant_axis], + quant_axis=self._conv2d_transpose_quant_axis) + if act_quant_layer is not None: + self._fake_quant_input = act_quant_layer() + else: + self._fake_quant_input = _get_fake_quant_type( + activation_quantize_type, + name=layer.full_name(), + moving_rate=moving_rate, + quant_bits=activation_bits, + dtype=self._dtype, + quant_on_weight=False) + + self._act_preprocess = act_pre_layer( + ) if act_pre_layer is not None else None + self._weight_preprocess = weight_pre_layer( + ) if weight_pre_layer is not None else None + + def forward(self, input, output_size=None): + if self._act_preprocess is not None: + input = self._act_preprocess(input) + quant_input = self._fake_quant_input(input) + + weight = self.weight + if self._weight_preprocess is not None: + weight = self._weight_preprocess(self.weight) + quant_weight = self._fake_quant_weight(weight) + + if output_size is None: + output_padding = self._output_padding + else: + output_padding = 0 + + return F.conv2d_transpose( + quant_input, + quant_weight, + bias=self.bias, + padding=self._padding, + output_padding=output_padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + output_size=output_size, + data_format=self._data_format) + + class QuantizedLinear(layers.Layer): """ The computational logic of QuantizedLinear is the same with Linear. diff --git a/tools/sampcd_processor.py b/tools/sampcd_processor.py index 3ec12c11a70..d8cb70c9dd1 100644 --- a/tools/sampcd_processor.py +++ b/tools/sampcd_processor.py @@ -440,6 +440,7 @@ def get_filenames(full_test=False): ''' global whl_error import paddle + import paddle.fluid.contrib.slim.quantization whl_error = [] if full_test: get_full_api_from_pr_spec() -- GitLab