From 6bbb6e7f458bc9dae8c8af91a147f1faee7bde19 Mon Sep 17 00:00:00 2001 From: guofei <52460041+gfwm2013@users.noreply.github.com> Date: Wed, 14 Oct 2020 21:43:15 +0800 Subject: [PATCH] Implement the function of OutScaleForTraining/OutScaleForInference in dygraph (#26601) * Implement the function of OueScaleForTraining/OutScaleForInference in dygraph test=develop --- .../ir/shuffle_channel_detect_pass.cc | 2 + paddle/fluid/pybind/op_function_generator.cc | 3 + .../slim/quantization/imperative/qat.py | 229 ++++++++- .../slim/quantization/imperative/quant_nn.py | 78 ++- .../slim/tests/test_imperative_out_scale.py | 461 ++++++++++++++++++ .../test_moving_average_abs_max_scale_op.py | 83 ++++ 6 files changed, 850 insertions(+), 6 deletions(-) create mode 100644 python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py create mode 100644 python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py diff --git a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc index 92e995579fa..b9bd660043b 100644 --- a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc +++ b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include "paddle/fluid/framework/ir/shuffle_channel_detect_pass.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index ee6e541c9e6..8288f1852c2 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -51,6 +51,7 @@ std::map> op_ins_map = { {"warpctc", {"Logits", "Label", "LogitsLength", "LabelLength"}}, {"hierarchical_sigmoid", {"X", "W", "Label", "PathTable", "PathCode", "Bias"}}, + {"moving_average_abs_max_scale", {"X", "InAccum", "InState"}}, }; // NOTE(zhiqiu): Like op_ins_map. @@ -75,6 +76,7 @@ std::map> op_outs_map = { {"collect_fpn_proposals", {"FpnRois", "RoisNum"}}, {"distribute_fpn_proposals", {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}}, + {"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}}, }; // NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are @@ -118,6 +120,7 @@ std::map> op_passing_outs_map = { {"check_finite_and_unscale", {"Out", "FoundInfinite"}}, {"update_loss_scaling", {"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}}, + {"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}}, }; // clang-format off diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index 8d7ebcf4caa..7fc177e7ad7 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -15,18 +15,37 @@ import logging import numpy as np import sys +import os import paddle -from paddle.fluid import dygraph -from paddle.fluid.dygraph.nn import Conv2D -from paddle.fluid.dygraph.nn import Linear +from paddle.fluid import dygraph, core, framework +from paddle.fluid.executor import Executor +from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX +from paddle.fluid.dygraph.nn import Conv2D, Linear, BatchNorm, Pool2D, Conv2DTranspose +from paddle.fluid.io import load_inference_model, save_inference_model +from paddle.nn.layer.activation import ReLU, LeakyReLU, Sigmoid, ReLU6, Tanh, Softmax, PReLU from paddle.fluid.log_helper import get_logger from . import quant_nn -__all__ = ['ImperativeQuantAware'] +__all__ = ['ImperativeQuantAware', 'ImperativeCalcOutScale'] _logger = get_logger( __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') +_op_real_in_out_name = { + "conv2d": [["Input", "Filter"], ["Output"]], + "conv2d_transpose": [["Input", "Filter"], ["Output"]], + "pool2d": [["X"], ["Out"]], + "elementwise_add": [["X", "Y"], ["Out"]], + "softmax": [["X"], ["Out"]], + "relu": [["X"], ["Out"]], + "relu6": [["X"], ["Out"]], + "leaky_relu": [["X"], ["Out"]], + "prelu": [["X"], ["Out"]], + "tanh": [["X"], ["Out"]], + "batch_norm": [["X"], ["Y"]], + "sigmoid": [["X"], ["Out"]], +} + class ImperativeQuantAware(object): """ @@ -141,7 +160,6 @@ class ImperativeQuantAware(object): for name, layer in model.named_sublayers(): if not isinstance(layer, self._quantizable_layer_type): continue - scopes = name.split('.') target = scopes[-1] obj = model @@ -173,3 +191,204 @@ class ImperativeQuantAware(object): layer, self._weight_bits, self._activation_bits, self._moving_rate, self._weight_quantize_type, self._activation_quantize_type) return quantized_layer + + +class ImperativeCalcOutScale(object): + def __init__(self, + moving_rate=0.9, + target_layer_types=[ + 'BatchNorm', 'Conv2D', 'Conv2DTranspose', 'LeakyReLU', + 'Linear', 'PReLU', 'Pool2D', 'ReLU', 'ReLU6', 'Sigmoid', + 'Softmax', 'Tanh' + ]): + """ + Add the logic of calculating and setting output quantization scales of some layers. + These output quantization scales may be used by tensorRT or some other inference engines. + + Args: + moving_rate(float): The decay coefficient of moving average. The default value is 0.9. + quantizable_op_type(list[str]): List the type of layers that will be calculated out_scale. + Default is ['Conv2D', 'ReLU', 'PReLU', 'LeakyReLU', 'Linear', 'Sigmoid', 'BatchNorm', 'ReLU6', 'Tanh', 'Softmax', 'Conv2DTranspose'] + """ + super(ImperativeCalcOutScale, self).__init__() + self._moving_rate = moving_rate + self._out_scale_layers_map = { + 'BatchNorm': BatchNorm, + 'Conv2D': Conv2D, + 'Conv2DTranspose': Conv2DTranspose, + 'LeakyReLU': LeakyReLU, + 'Linear': Linear, + 'PReLU': PReLU, + 'Pool2D': Pool2D, + 'ReLU': ReLU, + 'ReLU6': ReLU6, + 'Sigmoid': Sigmoid, + 'Softmax': Softmax, + 'Tanh': Tanh + } + self._out_scale_layer_type = tuple( + self._out_scale_layers_map[layer] + if layer in self._out_scale_layers_map else layer + for layer in target_layer_types) + for layer in self._out_scale_layer_type: + assert not isinstance( + layer, str), "{} is unspported to be out_scaled.".format(layer) + self._register_hook_handle_list = [] + self._out_scale_dict = {} + + def calc_out_scale(self, model): + """ + Insert the `moving_average_abs_max_scale` op to calculate output scale of Specific layers in model. + + Args: + model(fluid.dygraph.Layer): The target model which would be calculate the output quantization scale. + + Returns: + None + """ + assert isinstance( + model, dygraph.Layer), "model must be the instance of dygraph.Layer" + for _, layer in model.named_sublayers(): + if not isinstance(layer, self._out_scale_layer_type): + continue + forward_post_hook_handle = layer.register_forward_post_hook( + self._forward_post_hook) + self._register_hook_handle_list.append(forward_post_hook_handle) + + # Get the output var name of the op + def _get_op_output_names(self, op): + assert isinstance( + op, framework.Operator), "The input op should be Operator." + var_names = [] + name_list = _op_real_in_out_name[op.type][1] + for name in name_list: + var_name = op.output(name) + if isinstance(var_name, list): + var_names.extend(var_name) + else: + var_names.append(var_name) + return var_names + + def save_quantized_model(self, layer, path, input_spec=None, **config): + """ + Save the quantized model for the inference. + + Args: + layer (Layer): The Layer to be saved. + path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``. + input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward + method, which can be described by InputSpec or example Tensor. If None, all input variables of + the original Layer's forward method would be the inputs of the saved model. Default None. + **configs (dict, optional): Other save configuration options for compatibility. We do not + recommend using these configurations, they may be removed in the future. If not necessary, + DO NOT use them. Default None. + The following options are currently supported: + (1) output_spec (list[Tensor]): Selects the output targets of the saved model. + By default, all return variables of original Layer's forward method are kept as the + output of the saved model. If the provided ``output_spec`` list is not all output variables, + the saved model will be pruned according to the given ``output_spec`` list. + + Returns: + None + """ + + assert isinstance( + layer, dygraph.Layer), "model must be the instance of dygraph.Layer" + with dygraph.guard(): + layer.eval() + for handle in self._register_hook_handle_list: + handle.remove() + for key in self._out_scale_dict: + self._out_scale_dict[key] = float(self._out_scale_dict[key] + .numpy()) + + paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config) + + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + exe = Executor(place) + + file_prefix = os.path.basename(path) + dirname = os.path.dirname(path) + model_filename = file_prefix + INFER_MODEL_SUFFIX + params_filename = file_prefix + INFER_PARAMS_SUFFIX + + [inference_program, feed_target_names, fetch_targets] = ( + load_inference_model( + dirname=dirname, + executor=exe, + model_filename=model_filename, + params_filename=params_filename)) + + # Traverse all ops in the program and find out the op matching + # the Layer in the dynamic graph. + layer_var_dict = {} + for block in inference_program.blocks: + for op in block.ops: + if op.type in _op_real_in_out_name: + output_var_names = self._get_op_output_names(op) + for output_var_name in output_var_names: + output_var_tensor = block.var(output_var_name) + if output_var_tensor.dtype not in [ + core.VarDesc.VarType.FP64, + core.VarDesc.VarType.FP32 + ]: + continue + # Because the Layer in dygraph may correspond to multiple ops + # in static program after being saved. To ensure correctness, + # the outscale collected for output of dygraph Layer can only + # be set to the last op in the corresponding ops in static program. + # + # We can judge the execution order of the ops which corresponding + # to dygraph Layer by the name of output. And use dict to save + # the corresponding relationship between the dygraph Layer and the + # static graph op that needs to set the outscale attribute. + dynamic_layer_name, var_name_suffix = output_var_name.split( + ".") + if dynamic_layer_name in layer_var_dict: + if layer_var_dict[dynamic_layer_name][ + 0] < var_name_suffix: + layer_var_dict[dynamic_layer_name] = [ + var_name_suffix, op + ] + else: + layer_var_dict[ + dynamic_layer_name] = [var_name_suffix, op] + + # Because the naming styles of static and dynamic graph are different, + # in order to avoid mistakes, we unify the name here. + for (layer_name, var_name_op_list) in layer_var_dict.items(): + if 'prelu' in layer_name: + layer_name = layer_name.replace('prelu', 'p_re_lu') + if 'relu' in layer_name: + layer_name = layer_name.replace('relu', 're_lu') + if layer_name not in self._out_scale_dict: + continue + var_name_op_list[1]._set_attr('out_threshold', + self._out_scale_dict[layer_name]) + + # Save the processed program. + save_inference_model( + dirname=dirname, + feeded_var_names=feed_target_names, + target_vars=fetch_targets, + executor=exe, + main_program=inference_program.clone(), + model_filename=model_filename, + params_filename=params_filename) + + def _forward_post_hook(self, layer, input, output): + assert isinstance( + output, core.VarBase + ), "Multiple outputs are not currently supported in ImperativeOutScale." + if output.dtype not in [ + core.VarDesc.VarType.FP32, core.VarDesc.VarType.FP64 + ]: + return + if not hasattr(layer, "_out_scale"): + layer._out_scale = quant_nn.MovingAverageAbsMaxScale( + output.name, self._moving_rate, output.dtype) + scale_out = layer._out_scale(output) + self._out_scale_dict[layer.full_name()] = scale_out diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index 2e35ac288c7..bbaae56439e 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -24,7 +24,8 @@ from paddle.fluid.data_feeder import check_variable_and_dtype __all__ = [ 'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'QuantizedConv2D', - 'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax' + 'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax', + 'MovingAverageAbsMaxScale' ] @@ -494,3 +495,78 @@ class QuantizedLinear(layers.Layer): else: pre_activation = mul_out return self._helper.append_activation(pre_activation, act=self._act) + + +class MovingAverageAbsMaxScale(layers.Layer): + def __init__(self, name=None, moving_rate=0.9, dtype='float32'): + """ + MovingAverageMaxScale layer is used to calculating the output quantization scale of Layer. + Its computational formula is described as below: + + :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)` + :math:`Out = X` + """ + super(MovingAverageAbsMaxScale, self).__init__() + self._moving_rate = moving_rate + self._dtype = dtype + + scale_prefix = '{}.scale'.format(name) if name else 'outscale.scale' + name = unique_name.generate(scale_prefix) + scale_attr = ParamAttr( + name=name, initializer=Constant(1), trainable=False) + self._scale = self.create_parameter( + shape=[1], attr=scale_attr, dtype=self._dtype) + self._scale.stop_gradient = True + + state_prefix = "{}.state".format(name) if name else 'outscale.state' + state_attr = ParamAttr( + name=unique_name.generate(state_prefix), + initializer=Constant(1), + trainable=False) + self._state = self.create_parameter( + shape=[1], attr=state_attr, dtype=self._dtype) + self._state.stop_gradient = True + + accum_prefix = "{}.accum".format(name) if name else 'outscale.accum' + accum_attr = ParamAttr( + name=unique_name.generate(accum_prefix), + initializer=Constant(1), + trainable=False) + self._accum = self.create_parameter( + shape=[1], attr=accum_attr, dtype=self._dtype) + self._accum.stop_gradient = True + MovingAverageAbsMaxScale._has_create = True + + def forward(self, input): + if in_dygraph_mode(): + attrs = ('moving_rate', self._moving_rate, 'is_test', + not self.training) + state = self._state if self.training else None + accum = self._accum if self.training else None + + out_scale, _, _ = core.ops.moving_average_abs_max_scale( + input, accum, state, self._scale, state, accum, *attrs) + return out_scale + + check_variable_and_dtype(input, 'input', ['float32', 'float64'], + 'MovingAverageAbsMaxScale') + + scale_out = self._scale + attrs = {'moving_rate': self._moving_rate, 'is_test': not self.training} + + inputs = {"X": [input]} + outputs = {"OutScale": [scale_out]} + + if self.training: + inputs['InState'] = [self._state] + inputs['InAccum'] = [self._accum] + outputs['OutState'] = [self._state] + outputs['OutAccum'] = [self._accum] + + self._helper.append_op( + type="moving_average_abs_max_scale", + inputs=inputs, + outputs=outputs, + attrs=attrs) + + return scale_out diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py new file mode 100644 index 00000000000..3fc8352493d --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py @@ -0,0 +1,461 @@ +# copyright (c) 2018 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +from __future__ import print_function + +import os +import numpy as np +import random +import unittest +import logging +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers as layers +from paddle.fluid import core +from paddle.fluid.optimizer import AdamOptimizer +from paddle.fluid.framework import IrGraph +from paddle.fluid.contrib.slim.quantization import ImperativeCalcOutScale +from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass, OutScaleForInferencePass +from paddle.fluid.dygraph.container import Sequential +from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX +from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6 +from paddle.fluid.dygraph.nn import BatchNorm, Conv2D, Linear, Pool2D +from paddle.fluid.log_helper import get_logger + +paddle.enable_static() + +os.environ["CPU_NUM"] = "1" +if core.is_compiled_with_cuda(): + fluid.set_flags({"FLAGS_cudnn_deterministic": True}) + +_logger = get_logger( + __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') + + +def StaticLenet(data, num_classes=10, classifier_activation='softmax'): + conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") + conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") + fc_w1_attr = fluid.ParamAttr(name="fc_w_1") + fc_w2_attr = fluid.ParamAttr(name="fc_w_2") + fc_w3_attr = fluid.ParamAttr(name="fc_w_3") + conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") + conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") + fc_b1_attr = fluid.ParamAttr(name="fc_b_1") + fc_b2_attr = fluid.ParamAttr(name="fc_b_2") + fc_b3_attr = fluid.ParamAttr(name="fc_b_3") + conv1 = fluid.layers.conv2d( + data, + num_filters=6, + filter_size=3, + stride=1, + padding=1, + param_attr=conv2d_w1_attr, + bias_attr=conv2d_b1_attr) + batch_norm1 = layers.batch_norm(conv1) + relu1 = layers.relu(batch_norm1) + pool1 = fluid.layers.pool2d( + relu1, pool_size=2, pool_type='max', pool_stride=2) + conv2 = fluid.layers.conv2d( + pool1, + num_filters=16, + filter_size=5, + stride=1, + padding=0, + param_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr) + batch_norm2 = layers.batch_norm(conv2) + relu6_1 = layers.relu6(batch_norm2) + pool2 = fluid.layers.pool2d( + relu6_1, pool_size=2, pool_type='max', pool_stride=2) + + fc1 = fluid.layers.fc(input=pool2, + size=120, + param_attr=fc_w1_attr, + bias_attr=fc_b1_attr) + leaky_relu1 = layers.leaky_relu(fc1, alpha=0.01) + fc2 = fluid.layers.fc(input=leaky_relu1, + size=84, + param_attr=fc_w2_attr, + bias_attr=fc_b2_attr) + sigmoid1 = layers.sigmoid(fc2) + fc3 = fluid.layers.fc(input=sigmoid1, + size=num_classes, + act=classifier_activation, + param_attr=fc_w3_attr, + bias_attr=fc_b3_attr) + return fc3 + + +class ImperativeLenet(fluid.dygraph.Layer): + def __init__(self, num_classes=10, classifier_activation='softmax'): + super(ImperativeLenet, self).__init__() + conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") + conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") + fc_w1_attr = fluid.ParamAttr(name="fc_w_1") + fc_w2_attr = fluid.ParamAttr(name="fc_w_2") + fc_w3_attr = fluid.ParamAttr(name="fc_w_3") + conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") + conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") + fc_b1_attr = fluid.ParamAttr(name="fc_b_1") + fc_b2_attr = fluid.ParamAttr(name="fc_b_2") + fc_b3_attr = fluid.ParamAttr(name="fc_b_3") + self.features = Sequential( + Conv2D( + num_channels=1, + num_filters=6, + filter_size=3, + stride=1, + padding=1, + param_attr=conv2d_w1_attr, + bias_attr=conv2d_b1_attr), + BatchNorm(6), + ReLU(), + Pool2D( + pool_size=2, pool_type='max', pool_stride=2), + Conv2D( + num_channels=6, + num_filters=16, + filter_size=5, + stride=1, + padding=0, + param_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr), + BatchNorm(16), + ReLU6(), + Pool2D( + pool_size=2, pool_type='max', pool_stride=2)) + + self.fc = Sequential( + Linear( + input_dim=400, + output_dim=120, + param_attr=fc_w1_attr, + bias_attr=fc_b1_attr), + LeakyReLU(), + Linear( + input_dim=120, + output_dim=84, + param_attr=fc_w2_attr, + bias_attr=fc_b2_attr), + Sigmoid(), + Linear( + input_dim=84, + act=classifier_activation, + output_dim=num_classes, + param_attr=fc_w3_attr, + bias_attr=fc_b3_attr)) + + def forward(self, inputs): + x = self.features(inputs) + + x = fluid.layers.flatten(x, 1) + x = self.fc(x) + return x + + +class TestImperativeOutSclae(unittest.TestCase): + def test_calc_out_scale_save(self): + imperative_out_scale = ImperativeCalcOutScale() + + with fluid.dygraph.guard(): + lenet = ImperativeLenet() + adam = AdamOptimizer( + learning_rate=0.001, parameter_list=lenet.parameters()) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=32, drop_last=True) + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=32) + imperative_out_scale.calc_out_scale(lenet) + epoch_num = 1 + for epoch in range(epoch_num): + lenet.train() + for batch_id, data in enumerate(train_reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = fluid.dygraph.to_variable(x_data) + label = fluid.dygraph.to_variable(y_data) + out = lenet(img) + acc = fluid.layers.accuracy(out, label) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.mean(loss) + avg_loss.backward() + adam.minimize(avg_loss) + lenet.clear_gradients() + if batch_id % 100 == 0: + _logger.info( + "Train | At epoch {} step {}: loss = {:}, acc= {:}". + format(epoch, batch_id, + avg_loss.numpy(), acc.numpy())) + lenet.eval() + for batch_id, data in enumerate(test_reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = fluid.dygraph.to_variable(x_data) + label = fluid.dygraph.to_variable(y_data) + + out = lenet(img) + acc_top1 = fluid.layers.accuracy( + input=out, label=label, k=1) + acc_top5 = fluid.layers.accuracy( + input=out, label=label, k=5) + + if batch_id % 100 == 0: + _logger.info( + "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". + format(epoch, batch_id, + acc_top1.numpy(), acc_top5.numpy())) + + # save weights + model_dict = lenet.state_dict() + fluid.save_dygraph(model_dict, "save_temp") + + # test the correctness of `save_quantized_model` + data = next(test_reader()) + test_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + test_img = fluid.dygraph.to_variable(test_data) + lenet.eval() + before_save = lenet(test_img) + + # save inference quantized model + path = "./outscale_infer_model/lenet" + save_dir = "./outscale_infer_model" + imperative_out_scale.save_quantized_model( + layer=lenet, + path=path, + input_spec=[ + paddle.static.InputSpec( + shape=[None, 1, 28, 28], dtype='float32') + ]) + + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + exe = fluid.Executor(place) + [inference_program, feed_target_names, fetch_targets] = ( + fluid.io.load_inference_model( + dirname=save_dir, + executor=exe, + model_filename="lenet" + INFER_MODEL_SUFFIX, + params_filename="lenet" + INFER_PARAMS_SUFFIX)) + after_save, = exe.run(inference_program, + feed={feed_target_names[0]: test_data}, + fetch_list=fetch_targets) + + self.assertTrue( + np.allclose(after_save, before_save.numpy()), + msg='Failed to save the inference quantized model.') + + def test_out_scale_acc(self): + def _build_static_lenet(main, startup, is_test=False, seed=1000): + with fluid.unique_name.guard(): + with fluid.program_guard(main, startup): + main.random_seed = seed + startup.random_seed = seed + img = fluid.layers.data( + name='image', shape=[1, 28, 28], dtype='float32') + label = fluid.layers.data( + name='label', shape=[1], dtype='int64') + prediction = StaticLenet(img) + if not is_test: + loss = fluid.layers.cross_entropy( + input=prediction, label=label) + avg_loss = fluid.layers.mean(loss) + else: + avg_loss = prediction + return img, label, avg_loss + + reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=32, drop_last=True) + param_init_map = {} + seed = 1000 + lr = 0.1 + dynamic_out_scale_list = [] + static_out_scale_list = [] + + # imperative train + _logger.info( + "--------------------------dynamic graph qat--------------------------" + ) + imperative_out_scale = ImperativeCalcOutScale() + + with fluid.dygraph.guard(): + np.random.seed(seed) + fluid.default_main_program().random_seed = seed + fluid.default_startup_program().random_seed = seed + lenet = ImperativeLenet() + fixed_state = {} + for name, param in lenet.named_parameters(): + p_shape = param.numpy().shape + p_value = param.numpy() + if name.endswith("bias"): + value = np.zeros_like(p_value).astype('float32') + else: + value = np.random.normal( + loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( + p_shape).astype('float32') + fixed_state[name] = value + param_init_map[param.name] = value + lenet.set_dict(fixed_state) + imperative_out_scale.calc_out_scale(lenet) + adam = AdamOptimizer( + learning_rate=lr, parameter_list=lenet.parameters()) + dynamic_loss_rec = [] + lenet.train() + for batch_id, data in enumerate(reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = fluid.dygraph.to_variable(x_data) + label = fluid.dygraph.to_variable(y_data) + + out = lenet(img) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.mean(loss) + avg_loss.backward() + adam.minimize(avg_loss) + lenet.clear_gradients() + dynamic_loss_rec.append(avg_loss.numpy()[0]) + if batch_id % 100 == 0: + _logger.info('{}: {}'.format('loss', avg_loss.numpy())) + + lenet.eval() + op_object_list = (Conv2D, ReLU, ReLU6, LeakyReLU, Sigmoid, Pool2D, + BatchNorm) + + path = "./dynamic_outscale_infer_model/lenet" + save_dir = "./dynamic_outscale_infer_model" + + imperative_out_scale.save_quantized_model( + layer=lenet, + path=path, + input_spec=[ + paddle.static.InputSpec( + shape=[None, 1, 28, 28], dtype='float32') + ]) + + _logger.info( + "--------------------------static graph qat--------------------------" + ) + static_loss_rec = [] + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + exe = fluid.Executor(place) + + main = fluid.Program() + infer = fluid.Program() + startup = fluid.Program() + static_img, static_label, static_loss = _build_static_lenet( + main, startup, False, seed) + infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, + seed) + with fluid.unique_name.guard(): + with fluid.program_guard(main, startup): + opt = AdamOptimizer(learning_rate=lr) + opt.minimize(static_loss) + + scope = core.Scope() + with fluid.scope_guard(scope): + exe.run(startup) + for param in main.all_parameters(): + param_tensor = scope.var(param.name).get_tensor() + param_tensor.set(param_init_map[param.name], place) + main_graph = IrGraph(core.Graph(main.desc), for_test=False) + infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) + transform_pass = OutScaleForTrainingPass(scope=scope, place=place) + transform_pass.apply(main_graph) + build_strategy = fluid.BuildStrategy() + build_strategy.fuse_all_reduce_ops = False + binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( + loss_name=static_loss.name, build_strategy=build_strategy) + + feeder = fluid.DataFeeder( + feed_list=[static_img, static_label], place=place) + with fluid.scope_guard(scope): + for batch_id, data in enumerate(reader()): + loss_v, = exe.run(binary, + feed=feeder.feed(data), + fetch_list=[static_loss]) + static_loss_rec.append(loss_v[0]) + if batch_id % 100 == 0: + _logger.info('{}: {}'.format('loss', loss_v)) + scale_inference_pass = OutScaleForInferencePass(scope=scope) + scale_inference_pass.apply(infer_graph) + + out_scale_op_list = [ + "batch_norm", "conv2d", "leaky_relu", "pool2d", "relu6", "relu", + "sigmoid", "tanh", "relu6", "softmax", "conv2d_transpose", + "elementwise_add" + ] + op_nodes = infer_graph.all_op_nodes() + for op_node in op_nodes: + if op_node.name() in out_scale_op_list: + static_out_scale_list.append(op_node.op().attr("out_threshold")) + + save_program = infer_graph.to_program() + with fluid.scope_guard(scope): + fluid.io.save_inference_model("./static_mnist", [infer_img.name], + [infer_pre], exe, save_program) + rtol = 1e-05 + atol = 1e-08 + for i, (loss_d, + loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): + diff = np.abs(loss_d - loss_s) + if diff > (atol + rtol * np.abs(loss_s)): + _logger.info( + "diff({}) at {}, dynamic loss = {}, static loss = {}". + format(diff, i, loss_d, loss_s)) + break + + self.assertTrue( + np.allclose( + np.array(dynamic_loss_rec), + np.array(static_loss_rec), + rtol=rtol, + atol=atol, + equal_nan=True), + msg='Failed to do the imperative qat.') + # load dynamic model + [inference_program, feed_target_names, fetch_targets] = ( + fluid.io.load_inference_model( + dirname=save_dir, + executor=exe, + model_filename="lenet" + INFER_MODEL_SUFFIX, + params_filename="lenet" + INFER_PARAMS_SUFFIX)) + + global_block = inference_program.global_block() + for op in global_block.ops: + if op.has_attr('out_threshold'): + dynamic_out_scale_list.append(op.attr('out_threshold')) + + check_list = [ + False for item in dynamic_out_scale_list + if item not in static_out_scale_list + ] + self.assertTrue(len(check_list) == 0) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py new file mode 100644 index 00000000000..c947eeb31fc --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/test_moving_average_abs_max_scale_op.py @@ -0,0 +1,83 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from paddle.fluid.contrib.slim.quantization.imperative import quant_nn + +paddle.enable_static() + + +def init_data(batch_size=32, img_shape=[784], label_range=9): + np.random.seed(5) + assert isinstance(img_shape, list) + input_shape = [batch_size] + img_shape + img = np.random.random(size=input_shape).astype(np.float32) + label = np.array( + [np.random.randint(0, label_range) for _ in range(batch_size)]).reshape( + (-1, 1)).astype("int64") + return img, label + + +class TestMovingAverageAbsMaxScaleOp(unittest.TestCase): + def check_backward(self, use_cuda): + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + image = fluid.layers.data( + name='image', shape=[784], dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + fc_tmp = fluid.layers.fc(image, size=10, act='softmax') + out_scale = quant_nn.MovingAverageAbsMaxScale( + name=fc_tmp.name, dtype=fc_tmp.dtype) + fc_tmp_1 = out_scale(fc_tmp) + cross_entropy = fluid.layers.softmax_with_cross_entropy(fc_tmp, + label) + loss = fluid.layers.reduce_mean(cross_entropy) + sgd = fluid.optimizer.SGD(learning_rate=1e-3) + sgd.minimize(loss) + + moving_average_abs_max_scale_ops = [ + op for op in main_program.blocks[0].ops + if op.type == u'moving_average_abs_max_scale' + ] + assert len( + moving_average_abs_max_scale_ops + ) == 1, "The number of moving_average_abs_max_scale_ops should be 1." + + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + + binary = fluid.compiler.CompiledProgram( + main_program).with_data_parallel(loss_name=loss.name) + + img, label = init_data() + feed_dict = {"image": img, "label": label} + res = exe.run(binary, feed_dict) + + def test_fw_bw(self): + if core.is_compiled_with_cuda(): + self.check_backward(use_cuda=True) + self.check_backward(use_cuda=False) + + +if __name__ == '__main__': + unittest.main() -- GitLab