import numpy as np import time import sys import logging import paddle import paddle.fluid as fluid import six import math import copy from ..dist import merge from ..core.graph_wrapper import GraphWrapper from ..common import get_logger from paddle.fluid.contrib.slim.quantization import utils _logger = get_logger(__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') GAMMA = -0.1 ZETA = 1.1 __all__ = [ 'RoundingOptimizer', ] class RoundingOptimizerLoss(object): def __init__(self, program, weight_block_names=None, round_loss_mode='relaxation', rec_loss_mode='mse', beta_mode='const', weight=0.1,): """ The loss function of Rounding Optimizer. Args: program(Program): The student program. weight_block_names(list, optional): The weight names inside a block. round_loss_mode(str): The rounding loss function mode. rec_loss_mode(str): The reconstruction loss function mode. beta_mode(str): The parameter beta mode. Returns: total_loss(Variable): The sum of rounding loss and reconstruction loss. rec_loss(Variable): The reconstruction loss. round_loss(Variable): The rounding loss. """ self.program = program self.round_loss_mode = round_loss_mode self.weight = weight self.rec_loss_mode = rec_loss_mode self.weight_block_names = weight_block_names self.beta_mode = beta_mode def compute_soft_rounding(self, alpha_v): return paddle.clip(paddle.nn.functional.sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA, 0, 1) def get_loss(self, student_tensor, teacher_tensor, scheduler): if self.rec_loss_mode == 'mse': rec_loss = paddle.nn.functional.mse_loss(student_tensor, teacher_tensor) else: raise ValueError('Not supported reconstruction loss function: {}'.format(self.rec_loss)) if self.beta_mode == 'const': self.beta = 3 else: self.beta = scheduler.get_lr() if self.round_loss_mode == 'relaxation': round_loss = 0.0 for name in self.weight_block_names: alpha_v = self.program.global_block().var(name+'.alpha') h_v = self.compute_soft_rounding(alpha_v) round_loss += self.weight * paddle.sum(-paddle.pow(paddle.abs(2 * h_v-1), self.beta) + 1) else: raise NotImplementedError total_loss = rec_loss+round_loss return total_loss, rec_loss, round_loss class RoundingOptimizer(object): def __init__(self, data_loader, fp32_program, feed_list, fetch_list, exe, scope, place, quantized_op_pairs, weight_quantize_type, scale_dict, blocks, block_weights_names, round_type, num_iterations=1000, lr=0.1, bias_correction=False, epochs=20, ): ''' Rounding Optimizer, used to optimize the rounding policy by reconstructing the intermediate output. Args: data_loader(Python Generator, Paddle.io.DataLoader, optional): The Generator or Dataloader provides calibrate data, and it could return a batch every time. executor(fluid.Executor): The executor to load, run and save the quantized model. scope(fluid.Scope, optional): The scope of the program, use it to load and save variables. If scope=None, get scope by global_scope(). place(CPUPlace()|CUDAPlace(N)): This parameter represents paddle run on which device. quantized_op_pairs(dict, optional): Mapping of op's weight name and output var name, where key of dict is the weight name of op, and value is the output var name of op. weight_quantize_type(str): quantization type for weights, support 'abs_max' and 'channel_wise_abs_max'. This param only specifies the fake ops in saving quantized model, and we save the scale obtained by post training quantization in fake ops. Compared to 'abs_max', the model accuracy is usually higher when it is 'channel_wise_abs_max'. scale_dict(dict, optional): Mapping of var's name and var's scales, where key of dict is the var name, and value is the quant scales of var. round_type(str, optional): The rounding policy of converting the quantized weights value float->int. Currently supports ['round', 'brecq', 'qdrop'] methods. 'adaround' is refer to https://arxiv.org/abs/2004.10568, 'brecq' is refer to https://arxiv.org/pdf/2102.05426, 'qdrop' is refer to https://arxiv.org/pdf/2203.05740. blocks(list[list], optional): The list of some blocks, each block is subgraph of fp32 program and it will have exact 1 input operation and 1 output operation. block_weights_names(list[list], optional): The weight names inside every block. lr(float, optional): The learning rate of Rounding Optimizer. bias_correction(bool, optional): If set as True, use the bias correction method of https://arxiv.org/abs/1810.05723. Default is False. Returns: None ''' assert round_type in ['adaround', 'brecq', 'qdrop'] if round_type in ['brecq', 'qdrop']: assert blocks is not None, "The blocks cannot be None." assert block_weights_names is not None, "The block_weights_names cannot be None." self._program = fp32_program self._data_loader = data_loader self._round_type = round_type self._feed_list = feed_list self._fetch_list = fetch_list self._exe = exe self._scope = scope self._place = place self._quantized_op_pairs = quantized_op_pairs self._weight_var_names = list(self._quantized_op_pairs.keys()) self._weight_quantize_type = weight_quantize_type self._scale_dict = scale_dict self._num_iterations = num_iterations self._epochs = epochs self._lr = lr self._blocks = blocks self._block_weights_names = block_weights_names self._bias_correction = bias_correction if round_type in ['adaround']: blocks, block_weights_names = self._get_layers() self._blocks = blocks self._block_weights_names = block_weights_names def _get_layers(self): blocks = [] block_weights_names = [] persistable_var_names = self._all_persistable_var_names() self._input_weight_pairs = {} for block_id in range(len(self._program.blocks)): for op in self._program.blocks[block_id].ops: in_var_names = utils._get_op_input_var_names(op) for in_var_name in in_var_names: if in_var_name in persistable_var_names: in_var_names.remove(in_var_name) self._input_weight_pairs[in_var_name] = in_var_names break for name in self._weight_var_names: block_weights_names.append([name]) block_ = [] block_.append(self._input_weight_pairs[name][0]) block_.append(self._quantized_op_pairs[name]) blocks.append(block_) return blocks, block_weights_names def _preprocess(self): data_name_map = {} for name in self._feed_list: data_name_map[name] = name self._student_program = self._program.clone() merge( self._program, self._student_program, data_name_map, self._place, teacher_scope=None, name_prefix="teacher_", merge_feed=True) for name in self._weight_var_names: weight_np = utils.load_variable_data(self._scope, name) scale = self._scale_dict[name] weight_np_floor = np.floor(utils.quant_tensor(weight_np, scale)) utils.set_variable_data(self._scope, self._place, name, weight_np_floor) self._graph = GraphWrapper(self._student_program) if self._round_type == 'qdrop': self._insert_drop_quant_dequant() self._insert_soft_rounding() self._isolate_blocks() def _run(self): self._preprocess() startup_program = paddle.static.Program() for k in range(len(self._blocks)): block_ = self._blocks[k] names = self._block_weights_names[k] tmp_program = self._student_program.clone() quant_op_out_name = block_[1] with paddle.static.program_guard(tmp_program, startup_program): loss_function = RoundingOptimizerLoss(tmp_program, names) quant_op_out_name = block_[1] student_var = tmp_program.global_block().var(quant_op_out_name) teacher_var = tmp_program.global_block().var("teacher_"+quant_op_out_name) scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=20, eta_min=2, T_max=2000, verbose=True) total_loss, recon_loss, round_loss = loss_function.get_loss(student_var, teacher_var, scheduler) train_fetches_loss = {"total_loss":total_loss, "recon_loss":recon_loss, "round_loss":round_loss} optimizer = paddle.optimizer.Adam(learning_rate=self._lr) optimizer.minimize(total_loss) self._exe.run(startup_program) start_time = time.time() prev_start_time = start_time for epoch in range(self._epochs): for i, data in enumerate(self._data_loader()): prev_start_time = start_time start_time = time.time() out = self._exe.run( tmp_program, feed=data, fetch_list=[v.name for v in train_fetches_loss.values()], return_numpy=True) _logger.info( "Iter {:d}, lr {}, total_loss {:.5f}, recon_loss {:.5f}, round_loss {:.5f}, time {:.5f}s" .format(epoch, self._lr, np.mean(out[0]), np.mean(out[1]), np.mean(out[2]), start_time - prev_start_time)) sys.stdout.flush() if i == self._num_iterations: break self._update_weights_to_int() if self._bias_correction: self._bias_correction_w() return self._program def _init_alpha(self, name, scale): _tensor = utils.load_variable_data(self._scope, "teacher_"+name) tensor_scaled = utils.quant_tensor(_tensor, scale) tensor_floor = np.floor(tensor_scaled) tensor = tensor_scaled - tensor_floor alpha = -np.log((ZETA - GAMMA) / (tensor - GAMMA) - 1) return alpha def _soft_rounding(self, weight, scale, weight_bits=8): """ Define network of soft rounding. Args: weight: The quanted weight with dtype=float32 """ bnt = (1 << (weight_bits - 1)) - 1 def _dequant(x, scale): s = (scale+1e-8)/bnt dequant_x = s * x return dequant_x quantized_weight = paddle.static.data(shape=weight.shape, dtype=weight.dtype, name=weight.name+'_quant') v = paddle.static.create_parameter(shape=weight.shape, dtype=weight.dtype, name=weight.name+".alpha", default_initializer=fluid.initializer.NumpyArrayInitializer(self._alpha)) h_v = paddle.clip(paddle.nn.functional.sigmoid(v) * (ZETA - GAMMA) + GAMMA, 0, 1) if self._weight_quantize_type=='channel_wise_abs_max': scale_var = paddle.static.create_parameter( dtype=weight.dtype, shape=weight.shape, name=weight.name+'.scale', default_initializer=fluid.initializer.NumpyArrayInitializer(scale), ) else: scale_var = scale w = _dequant(quantized_weight+h_v, scale_var) return w def _insert_soft_rounding(self): for name in self._weight_var_names: weight = self._graph.var(name) scale = self._scale_dict[name] shape = weight.shape() self._alpha = self._init_alpha(name, scale) if self._weight_quantize_type=='channel_wise_abs_max': scale = np.array(scale) scale = scale.reshape(scale.shape[0], 1) if len(shape)==2: scale = scale.repeat(shape[0], axis=0) else: scale = scale.repeat(shape[1]*shape[2]*shape[3], axis=1) scale = scale.reshape(shape) self._insert_func(var=weight, scale=scale, func="_soft_rounding") def _drop_quant_dequant(self, inputs, scale, weight_bits=8): x = paddle.static.data(shape=inputs.shape, dtype=inputs.dtype, name=inputs.name+'.tmp') bnt = (1 << (weight_bits - 1)) - 1 scale = scale / bnt dequantized_tensor = paddle.round(x / scale) * scale quant_noise = x - dequantized_tensor random_noise = paddle.nn.functional.dropout(quant_noise, p=0.5) return x + random_noise def _insert_drop_quant_dequant(self): for op in self._graph.ops(): if op.type() in ['conv2d', 'depthwise_conv2d', 'mul']: if op.type() in ['conv2d', 'depthwise_conv2d']: if op.inputs("Filter")[0].name().startswith("teacher"): break else: input = op.inputs("Input")[0] if op.type() in ['mul']: if op.inputs("Y")[0].name().startswith("teacher"): break else: input = op.inputs("X")[0] if input.name() in self._scale_dict.keys(): self._insert_func(var=input, scale=self._scale_dict[input.name()], func="_drop_quant_dequant") def _insert_func(self, var, scale, func): program = var._graph.program ops = var.outputs() inputs = var._var startup_program = paddle.static.Program() new_program = paddle.static.Program() with paddle.static.program_guard(new_program, startup_program): if func=="_soft_rounding": out = self._soft_rounding(inputs, scale) elif func=="_drop_quant_dequant": out = self._drop_quant_dequant(inputs, scale) self._exe.run(startup_program) #create var in program for new_var in new_program.list_vars(): if new_var.name == var._var.name+'_quant' or new_var.name == var._var.name+'.tmp': continue elif new_var.name == var._var.name+'.alpha': program.global_block().create_parameter( name=new_var.name, shape=new_var.shape, dtype=new_var.dtype, type=new_var.type, stop_gradient=new_var.stop_gradient) elif new_var.name == var._var.name+'.scale': program.global_block().create_parameter( name=new_var.name, shape=new_var.shape, dtype=new_var.dtype, type=new_var.type, stop_gradient=True, trainable=False) else: if func=="_soft_rounding": program.global_block().create_var( name=new_var.name+'.rounding', shape=new_var.shape, dtype=new_var.dtype, type=new_var.type, persistable=new_var.persistable, stop_gradient=new_var.stop_gradient) else: program.global_block().create_var( name=new_var.name, shape=new_var.shape, dtype=new_var.dtype, type=new_var.type, persistable=new_var.persistable, stop_gradient=new_var.stop_gradient) op_list = new_program.global_block().ops op_list = list(reversed(op_list)) block = var._var.block #prepend new_program's op in program for _op in ops: if _op.type() not in ['conv2d', 'depthwise_conv2d', 'mul']: continue idx = block.ops.index(_op._op) for op in op_list: # _attrs = op.all_attrs() _type = op.type _attrs={ 'use_mkldnn': False, 'with_quant_attr' :False} if _type=='clip': _attrs={ 'use_mkldnn': False, 'with_quant_attr' :False, 'max':op.attr('max'), 'min':op.attr('min')} elif _type=='scale': _attrs={ 'use_mkldnn': False, 'with_quant_attr' :False, 'scale': op.attr('scale'), 'bias_after_scale':op.attr('bias_after_scale')} elif _type=='elementwise_mul': _attrs={ 'use_mkldnn': False, 'with_quant_attr' :False, 'Scale_out':op.attr('Scale_out'), 'Scale_x':op.attr('Scale_x'), 'Scale_y':op.attr('Scale_y'), 'axis':op.attr('axis')} if func=="_soft_rounding": _outputs = {'Out':op.output('Out')[0]+'.rounding'} if _type=="elementwise_add": _inputs = { 'X': var._var, #replace tmp var conv.weight_quant with var conv.weight 'Y': op.input('Y')[0]+'.rounding', } elif _type=="elementwise_mul": _inputs = { 'X':op.input('X')[0]+'.rounding', 'Y':op.input('Y')[0]+'.rounding', } elif (_type=='scale' and op.input('X')[0].endswith('scale')) or _type=='sigmoid': _inputs = {'X':op.input('X')[0]} else: _inputs = {'X':op.input('X')[0]+'.rounding'} elif func=="_drop_quant_dequant": if _type=='dropout': _outputs = {'Out':op.output('Out')[0], 'Mask':op.output('Mask')[0]} else: _outputs = {'Out':op.output('Out')[0]} if _type=='elementwise_add' or _type=='elementwise_sub': _inputs = { 'X': var._var, #replace tmp var conv.weight_quant with var conv.weight 'Y': op.input('Y'), } elif _type=='scale' and op.input('X')[0]==inputs.name+'.tmp': _inputs = {'X': var._var} else: _inputs = {'X':op.input('X')[0]} block._insert_op( idx, type=_type, attrs=_attrs, inputs=_inputs, outputs=_outputs, ) for op in ops: if op.type() not in ['conv2d', 'depthwise_conv2d', 'mul']: continue if op.type() in ['conv2d', 'depthwise_conv2d'] and op.inputs('Filter')[0].name().startswith('teacher'): continue if op.type() in ['mul'] and op.inputs('Y')[0].name().startswith('teacher'): continue if func=='_soft_rounding': op._op._rename_input(inputs.name, out.name+'.rounding') else: op._op._rename_input(inputs.name, out.name) def _isolate_blocks(self): starts = [block[0] for block in self._blocks] var2duplications = self._duplicate_vars(starts) for vars_ in var2duplications.values(): for var_ in vars_: var_.stop_gradients = True def _duplicate_vars(self, var_names): result = {} for var_name in var_names: var = self._graph.var(var_name) result[var_name] = self._duplicate_var(var) return result def _duplicate_var(self, var): vars = [] block = var._var.block index = 0 for op in var.outputs(): var_ = var._var op_ = op._op duplicated_var = block.create_var(name=var_.name+".assign"+str(index), type=var_.type, shape=var_.shape, dtype=var_.dtype) vars.append(duplicated_var) index += 1 idx = block.ops.index(op_) block._insert_op(idx, type="assign", inputs={"X": var_}, outputs={"Out": duplicated_var}) op_._rename_input(var_.name, duplicated_var.name) return vars def _update_weights_to_int(self): for weight_var_name in self._weight_var_names: alpha_tensor = utils.load_variable_data(self._scope, weight_var_name+'.alpha') h_alpha_tensor = self._compute_soft_rounding_np(alpha_tensor) weight_quant_tensor = utils.load_variable_data(self._scope, weight_var_name) utils.set_variable_data(self._scope, self._place, weight_var_name, np.round(weight_quant_tensor+h_alpha_tensor)) def _bias_correction_w(self): for weight_var_name in self._weight_var_names: weight_var_tensor = utils.load_variable_data(self._scope, "teacher_"+weight_var_name) weight_quant_tensor = utils.load_variable_data(self._scope, weight_var_name) scale = self._scale_dict[weight_var_name] final_weight_tensor = utils.bias_correction_w( weight_var_tensor, weight_quant_tensor, scale, quant_axis=0, weight_bits=8) utils.set_variable_data(self._scope, self._place, weight_var_name, final_weight_tensor) def _compute_soft_rounding_np(self, alpha_v): return np.clip(utils.stable_sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA, a_min=0, a_max=1) def _all_persistable_var_names(self): persistable_var_names = [] for var in self._program.list_vars(): if var.persistable: persistable_var_names.append(var.name) return persistable_var_names