From 40f54537256c4780593064191c1c1a3d5409d4cc Mon Sep 17 00:00:00 2001 From: huangxu96 <46740794+huangxu96@users.noreply.github.com> Date: Wed, 25 Nov 2020 11:46:30 +0800 Subject: [PATCH] Quant nn2.0 (#28764) * Impelement 2.0 API version Conv2d and Linear layer quantization in imperative mode. * use cudnn softmax in static Lenet * Modified ChannelwiseQAT Unittest for 2.0 API. * For CI python coverage. --- .../slim/quantization/imperative/qat.py | 7 +- .../slim/quantization/imperative/quant_nn.py | 117 ++++-------------- .../contrib/slim/tests/test_imperative_qat.py | 66 +++++----- .../tests/test_imperative_qat_channelwise.py | 54 ++++---- 4 files changed, 94 insertions(+), 150 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index 7364655107b..bcd2ad2b1fa 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -20,7 +20,8 @@ import paddle from paddle.fluid import dygraph, core, framework from paddle.fluid.executor import Executor from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX -from paddle.fluid.dygraph.nn import Conv2D, Linear, BatchNorm, Pool2D, Conv2DTranspose +from paddle.nn import Linear, Conv2D +from paddle.fluid.dygraph.nn import BatchNorm, Pool2D, Conv2DTranspose from paddle.fluid.io import load_inference_model, save_inference_model from paddle.nn.layer.activation import ReLU, LeakyReLU, Sigmoid, ReLU6, Tanh, Softmax, PReLU from paddle.fluid.log_helper import get_logger @@ -142,6 +143,8 @@ class ImperativeQuantAware(object): self._weight_bits = weight_bits self._activation_bits = activation_bits self._moving_rate = moving_rate + self._activation_quantize_type = activation_quantize_type + self._weight_quantize_type = weight_quantize_type self._weight_pre_layer = weight_preprocess_layer self._act_pre_layer = act_preprocess_layer @@ -172,8 +175,6 @@ class ImperativeQuantAware(object): "Unknown weight_quantize_type: '%s'. It can only be " "'abs_max' or 'moving_average_abs_max' or 'channel_wise_abs_max' now." % (str(weight_quantize_type))) - self._activation_quantize_type = activation_quantize_type - self._weight_quantize_type = weight_quantize_type self._quant_layers_map = {'Conv2D': Conv2D, 'Linear': Linear} self._quantizable_layer_type = tuple( diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index 5acc4c30bc0..3b3e0abf45c 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -21,6 +21,7 @@ from paddle.fluid.framework import _varbase_creator from paddle.fluid.framework import in_dygraph_mode from paddle.fluid.initializer import Constant from paddle.fluid.data_feeder import check_variable_and_dtype +from paddle.nn import functional as F __all__ = [ 'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'QuantizedConv2D', @@ -144,7 +145,6 @@ class FakeQuantAbsMax(layers.Layer): quant_on_weight=False): super(FakeQuantAbsMax, self).__init__() self._quant_bits = quant_bits - self._dtype = dtype self._name = name scale_prefix = "{}.scale".format( name) if name else 'quant_dequant.scale' @@ -342,16 +342,17 @@ class QuantizedConv2D(layers.Layer): self._groups = getattr(layer, '_groups') self._stride = getattr(layer, '_stride') self._padding = getattr(layer, '_padding') + self._padding_mode = getattr(layer, '_padding_mode') + if self._padding_mode != 'zeros': + self._reversed_padding_repeated_twice = getattr( + layer, '_reversed_padding_repeated_twice') self._dilation = getattr(layer, '_dilation') - self._act = getattr(layer, '_act') - self._use_cudnn = getattr(layer, '_use_cudnn') - self._dtype = getattr(layer, '_dtype') - self._l_type = getattr(layer, '_l_type') + self._data_format = getattr(layer, '_data_format') self.weight = getattr(layer, 'weight') self.bias = getattr(layer, 'bias') + # For FakeQuant self._conv2d_quant_axis = 0 - if weight_quant_layer is not None: self._fake_quant_weight = weight_quant_layer() else: @@ -390,52 +391,22 @@ class QuantizedConv2D(layers.Layer): weight = self._weight_preprocess(self.weight) quant_weight = self._fake_quant_weight(weight) - if in_dygraph_mode() and self._l_type == 'conv2d': - attrs = ('strides', self._stride, 'paddings', self._padding, - 'dilations', self._dilation, 'groups', self._groups - if self._groups else 1, 'use_cudnn', self._use_cudnn) - pre_bias = core.ops.conv2d(quant_input, quant_weight, *attrs) - - pre_act = dygraph_utils._append_bias_in_dygraph(pre_bias, self.bias, - 1) - return dygraph_utils._append_activation_in_dygraph(pre_act, - self._act) - check_variable_and_dtype(quant_input, 'input', - ['float16', 'float32', 'float64'], - 'QuantizedConv2D') - attrs = { - 'strides': self._stride, - 'paddings': self._padding, - 'dilations': self._dilation, - 'groups': self._groups if self._groups else 1, - 'use_cudnn': self._use_cudnn, - 'use_mkldnn': False, - } - pre_bias = self._helper.create_variable_for_type_inference( - dtype=self._dtype) - - self._helper.append_op( - type=self._l_type, - inputs={ - 'Input': quant_input, - 'Filter': quant_weight, - }, - outputs={"Output": pre_bias}, - attrs=attrs) - - if self.bias is not None: - pre_act = self._helper.create_variable_for_type_inference( - dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [pre_bias], - 'Y': [self.bias]}, - outputs={'Out': [pre_act]}, - attrs={'axis': 1}) - else: - pre_act = pre_bias + if self._padding_mode != 'zeros': + quant_input = F.pad(quant_input, + self._reversed_padding_repeated_twice, + mode=self._padding_mode, + data_format=self._data_format) + self._padding = 0 - return self._helper.append_activation(pre_act, act=self._act) + return F.conv2d( + quant_input, + quant_weight, + bias=self.bias, + padding=self._padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) class QuantizedLinear(layers.Layer): @@ -457,10 +428,9 @@ class QuantizedLinear(layers.Layer): act_quant_layer=None): super(QuantizedLinear, self).__init__() # For Linear - self._act = getattr(layer, '_act') - self._dtype = getattr(layer, '_dtype') self.weight = getattr(layer, 'weight') self.bias = getattr(layer, 'bias') + self.name = getattr(layer, 'name') # For FakeQuant self._linear_quant_axis = 1 @@ -503,44 +473,9 @@ class QuantizedLinear(layers.Layer): weight = self._weight_preprocess(self.weight) quant_weight = self._fake_quant_weight(weight) - if in_dygraph_mode(): - pre_bias = _varbase_creator(dtype=input.dtype) - core.ops.matmul(quant_input, quant_weight, pre_bias, 'transpose_X', - False, 'transpose_Y', False, "alpha", 1) - pre_act = dygraph_utils._append_bias_in_dygraph( - pre_bias, self.bias, axis=len(input.shape) - 1) - - return dygraph_utils._append_activation_in_dygraph(pre_act, - self._act) - - check_variable_and_dtype(input, 'input', - ['float16', 'float32', 'float64'], - "QuantizedLinear") - attrs = { - "transpose_X": False, - "transpose_Y": False, - "alpha": 1, - } - inputs = {"X": [quant_input], "Y": [quant_weight]} - mul_out = self._helper.create_variable_for_type_inference(self._dtype) - - self._helper.append_op( - type="matmul", - inputs=inputs, - outputs={"Out": [mul_out]}, - attrs=attrs) - if self.bias is not None: - pre_activation = self._helper.create_variable_for_type_inference( - dtype=self._dtype) - self._helper.append_op( - type='elementwise_add', - inputs={'X': [mul_out], - 'Y': [self.bias]}, - outputs={'Out': [pre_activation]}, - attrs={'axis': len(input.shape) - 1}) - else: - pre_activation = mul_out - return self._helper.append_activation(pre_activation, act=self._act) + out = F.linear( + x=quant_input, weight=quant_weight, bias=self.bias, name=self.name) + return out class MovingAverageAbsMaxScale(layers.Layer): diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index eb924e13a7e..96b3b67103b 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -27,11 +27,11 @@ from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass from paddle.fluid.dygraph.container import Sequential -from paddle.fluid.dygraph.nn import Conv2D +from paddle.nn import Linear, Conv2D, Softmax from paddle.fluid.dygraph.nn import Pool2D -from paddle.fluid.dygraph.nn import Linear from paddle.fluid.log_helper import get_logger from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX +from paddle.fluid.contrib.slim.quantization.imperative.quant_nn import QuantizedConv2D paddle.enable_static() @@ -43,7 +43,7 @@ _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') -def StaticLenet(data, num_classes=10, classifier_activation='softmax'): +def StaticLenet(data, num_classes=10): conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") fc_w1_attr = fluid.ParamAttr(name="fc_w_1") @@ -85,15 +85,15 @@ def StaticLenet(data, num_classes=10, classifier_activation='softmax'): bias_attr=fc_b2_attr) fc3 = fluid.layers.fc(input=fc2, size=num_classes, - act=classifier_activation, param_attr=fc_w3_attr, bias_attr=fc_b3_attr) + fc4 = fluid.layers.softmax(fc3, use_cudnn=True) - return fc3 + return fc4 class ImperativeLenet(fluid.dygraph.Layer): - def __init__(self, num_classes=10, classifier_activation='softmax'): + def __init__(self, num_classes=10): super(ImperativeLenet, self).__init__() conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") @@ -107,47 +107,46 @@ class ImperativeLenet(fluid.dygraph.Layer): fc_b3_attr = fluid.ParamAttr(name="fc_b_3") self.features = Sequential( Conv2D( - num_channels=1, - num_filters=6, - filter_size=3, + in_channels=1, + out_channels=6, + kernel_size=3, stride=1, padding=1, - param_attr=conv2d_w1_attr, + weight_attr=conv2d_w1_attr, bias_attr=conv2d_b1_attr), Pool2D( pool_size=2, pool_type='max', pool_stride=2), Conv2D( - num_channels=6, - num_filters=16, - filter_size=5, + in_channels=6, + out_channels=16, + kernel_size=5, stride=1, padding=0, - param_attr=conv2d_w2_attr, + weight_attr=conv2d_w2_attr, bias_attr=conv2d_b2_attr), Pool2D( pool_size=2, pool_type='max', pool_stride=2)) self.fc = Sequential( Linear( - input_dim=400, - output_dim=120, - param_attr=fc_w1_attr, + in_features=400, + out_features=120, + weight_attr=fc_w1_attr, bias_attr=fc_b1_attr), Linear( - input_dim=120, - output_dim=84, - param_attr=fc_w2_attr, + in_features=120, + out_features=84, + weight_attr=fc_w2_attr, bias_attr=fc_b2_attr), Linear( - input_dim=84, - output_dim=num_classes, - act=classifier_activation, - param_attr=fc_w3_attr, - bias_attr=fc_b3_attr)) + in_features=84, + out_features=num_classes, + weight_attr=fc_w3_attr, + bias_attr=fc_b3_attr), + Softmax()) def forward(self, inputs): x = self.features(inputs) - x = fluid.layers.flatten(x, 1) x = self.fc(x) return x @@ -162,8 +161,19 @@ class TestImperativeQat(unittest.TestCase): imperative_qat = ImperativeQuantAware( weight_quantize_type='abs_max', activation_quantize_type='moving_average_abs_max') - with fluid.dygraph.guard(): + # For CI coverage + conv1 = Conv2D( + in_channels=3, + out_channels=2, + kernel_size=3, + stride=1, + padding=1, + padding_mode='replicate') + quant_conv1 = QuantizedConv2D(conv1) + data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') + quant_conv1(fluid.dygraph.to_variable(data)) + lenet = ImperativeLenet() imperative_qat.quantize(lenet) adam = AdamOptimizer( @@ -286,7 +296,7 @@ class TestImperativeQat(unittest.TestCase): activation_quant_type = 'moving_average_abs_max' param_init_map = {} seed = 1000 - lr = 0.1 + lr = 0.01 # imperative train _logger.info( diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py index ddf37a0ebf8..caa9ea5b4d7 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py @@ -27,9 +27,8 @@ from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass from paddle.fluid.dygraph.container import Sequential -from paddle.fluid.dygraph.nn import Conv2D +from paddle.nn import Linear, Conv2D, Softmax from paddle.fluid.dygraph.nn import Pool2D -from paddle.fluid.dygraph.nn import Linear from paddle.fluid.log_helper import get_logger from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX @@ -43,7 +42,7 @@ _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') -def StaticLenet(data, num_classes=10, classifier_activation='softmax'): +def StaticLenet(data, num_classes=10): conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") fc_w1_attr = fluid.ParamAttr(name="fc_w_1") @@ -85,15 +84,15 @@ def StaticLenet(data, num_classes=10, classifier_activation='softmax'): bias_attr=fc_b2_attr) fc3 = fluid.layers.fc(input=fc2, size=num_classes, - act=classifier_activation, param_attr=fc_w3_attr, bias_attr=fc_b3_attr) + fc4 = fluid.layers.softmax(fc3, use_cudnn=True) - return fc3 + return fc4 class ImperativeLenet(fluid.dygraph.Layer): - def __init__(self, num_classes=10, classifier_activation='softmax'): + def __init__(self, num_classes=10): super(ImperativeLenet, self).__init__() conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") @@ -107,53 +106,52 @@ class ImperativeLenet(fluid.dygraph.Layer): fc_b3_attr = fluid.ParamAttr(name="fc_b_3") self.features = Sequential( Conv2D( - num_channels=1, - num_filters=6, - filter_size=3, + in_channels=1, + out_channels=6, + kernel_size=3, stride=1, padding=1, - param_attr=conv2d_w1_attr, + weight_attr=conv2d_w1_attr, bias_attr=conv2d_b1_attr), Pool2D( pool_size=2, pool_type='max', pool_stride=2), Conv2D( - num_channels=6, - num_filters=16, - filter_size=5, + in_channels=6, + out_channels=16, + kernel_size=5, stride=1, padding=0, - param_attr=conv2d_w2_attr, + weight_attr=conv2d_w2_attr, bias_attr=conv2d_b2_attr), Pool2D( pool_size=2, pool_type='max', pool_stride=2)) self.fc = Sequential( Linear( - input_dim=400, - output_dim=120, - param_attr=fc_w1_attr, + in_features=400, + out_features=120, + weight_attr=fc_w1_attr, bias_attr=fc_b1_attr), Linear( - input_dim=120, - output_dim=84, - param_attr=fc_w2_attr, + in_features=120, + out_features=84, + weight_attr=fc_w2_attr, bias_attr=fc_b2_attr), Linear( - input_dim=84, - output_dim=num_classes, - act=classifier_activation, - param_attr=fc_w3_attr, - bias_attr=fc_b3_attr)) + in_features=84, + out_features=num_classes, + weight_attr=fc_w3_attr, + bias_attr=fc_b3_attr), + Softmax()) def forward(self, inputs): x = self.features(inputs) - x = fluid.layers.flatten(x, 1) x = self.fc(x) return x -class TestImperativeQat(unittest.TestCase): +class TestImperativeQatChannelWise(unittest.TestCase): """ QAT = quantization-aware training """ @@ -286,7 +284,7 @@ class TestImperativeQat(unittest.TestCase): activation_quant_type = 'moving_average_abs_max' param_init_map = {} seed = 1000 - lr = 0.1 + lr = 0.001 # imperative train _logger.info( -- GitLab