未验证 提交 460b5c63 编写于 作者: G gushiqiao 提交者: GitHub

Fixed naming conflicts and fc layer quantization (#1494)

上级 10b87911
...@@ -29,7 +29,7 @@ from paddle.fluid.contrib.slim.quantization import utils ...@@ -29,7 +29,7 @@ from paddle.fluid.contrib.slim.quantization import utils
from ..dist import merge from ..dist import merge
from ..core.graph_wrapper import GraphWrapper from ..core.graph_wrapper import GraphWrapper
from ..common import get_logger from ..common import get_logger, recover_program
__all__ = ['ReconstructionQuantization', ] __all__ = ['ReconstructionQuantization', ]
...@@ -75,7 +75,6 @@ class ReconstructionQuantization(PostTrainingQuantization): ...@@ -75,7 +75,6 @@ class ReconstructionQuantization(PostTrainingQuantization):
Load the FP32 model, and use the calibrate data to calculate the forward-stage. Load the FP32 model, and use the calibrate data to calculate the forward-stage.
Based on the sample data, we can get the quantization information, and obtain Based on the sample data, we can get the quantization information, and obtain
the final quantized model. the final quantized model.
Args: Args:
None None
Returns: Returns:
...@@ -156,6 +155,7 @@ class ReconstructionQuantization(PostTrainingQuantization): ...@@ -156,6 +155,7 @@ class ReconstructionQuantization(PostTrainingQuantization):
scope=self._scope, scope=self._scope,
place=self._place, place=self._place,
quantized_op_pairs=self._quantized_op_pairs, quantized_op_pairs=self._quantized_op_pairs,
weight_op_pairs=self._weight_op_pairs,
weight_quantize_type=self._weight_quantize_type, weight_quantize_type=self._weight_quantize_type,
activation_bits=self._activation_bits, activation_bits=self._activation_bits,
weight_bits=self._weight_bits, weight_bits=self._weight_bits,
...@@ -167,8 +167,13 @@ class ReconstructionQuantization(PostTrainingQuantization): ...@@ -167,8 +167,13 @@ class ReconstructionQuantization(PostTrainingQuantization):
num_iterations=self._batch_nums, num_iterations=self._batch_nums,
lr=self._config['lr'], lr=self._config['lr'],
bias_correction=self._bias_correction, bias_correction=self._bias_correction,
epochs=self._config['epochs'], ) epochs=self._config['epochs'])
self._program = reconstruction_quanter._run() self._program, self._scale_dict = reconstruction_quanter._run()
if self._algo in ["KL", "hist"]:
self._quantized_var_threshold = self._scale_dict
else:
self._quantized_threshold = self._scale_dict
def _postprocessing(self): def _postprocessing(self):
if self._algo is 'min_max': if self._algo is 'min_max':
...@@ -211,6 +216,7 @@ class ReconstructionQuanter(object): ...@@ -211,6 +216,7 @@ class ReconstructionQuanter(object):
scope, scope,
place, place,
quantized_op_pairs, quantized_op_pairs,
weight_op_pairs,
weight_quantize_type, weight_quantize_type,
activation_bits, activation_bits,
weight_bits, weight_bits,
...@@ -227,7 +233,6 @@ class ReconstructionQuanter(object): ...@@ -227,7 +233,6 @@ class ReconstructionQuanter(object):
''' '''
Reconstruction Quanter, used to optimize the rounding policy Reconstruction Quanter, used to optimize the rounding policy
by reconstructing the intermediate output. by reconstructing the intermediate output.
Args: Args:
data_loader(Python Generator, Paddle.io.DataLoader, optional): The data_loader(Python Generator, Paddle.io.DataLoader, optional): The
Generator or Dataloader provides calibrate data, and it could Generator or Dataloader provides calibrate data, and it could
...@@ -284,6 +289,7 @@ class ReconstructionQuanter(object): ...@@ -284,6 +289,7 @@ class ReconstructionQuanter(object):
self._scope = scope self._scope = scope
self._place = place self._place = place
self._quantized_op_pairs = quantized_op_pairs self._quantized_op_pairs = quantized_op_pairs
self._weight_op_pairs = weight_op_pairs
self._weight_var_names = list(self._quantized_op_pairs.keys()) self._weight_var_names = list(self._quantized_op_pairs.keys())
self._weight_quantize_type = weight_quantize_type self._weight_quantize_type = weight_quantize_type
self._scale_dict = scale_dict self._scale_dict = scale_dict
...@@ -323,6 +329,12 @@ class ReconstructionQuanter(object): ...@@ -323,6 +329,12 @@ class ReconstructionQuanter(object):
return regions, region_weights_names return regions, region_weights_names
def _preprocess(self): def _preprocess(self):
for name in self._weight_var_names:
for i, s in enumerate(self._scale_dict[name]):
if s == 0.0:
self._scale_dict[name][i] = 1e-8
data_name_map = {} data_name_map = {}
for name in self._feed_list: for name in self._feed_list:
data_name_map[name] = name data_name_map[name] = name
...@@ -335,17 +347,7 @@ class ReconstructionQuanter(object): ...@@ -335,17 +347,7 @@ class ReconstructionQuanter(object):
teacher_scope=None, teacher_scope=None,
name_prefix="teacher_", name_prefix="teacher_",
merge_feed=True, ) merge_feed=True, )
for name in self._weight_var_names:
weight_np = utils.load_variable_data(self._scope, name)
scale = self._scale_dict[name]
weight_np_floor = np.floor(
utils.quant_tensor(
x=weight_np, scale=scale, weight_bits=self._weight_bits))
utils.set_variable_data(
self._scope,
self._place,
name,
weight_np_floor, )
self._graph = GraphWrapper(self._student_program) self._graph = GraphWrapper(self._student_program)
if self._simulate_activation_quant: if self._simulate_activation_quant:
...@@ -362,7 +364,8 @@ class ReconstructionQuanter(object): ...@@ -362,7 +364,8 @@ class ReconstructionQuanter(object):
tmp_program = self._student_program.clone() tmp_program = self._student_program.clone()
quant_op_out_name = region_[1] quant_op_out_name = region_[1]
with paddle.static.program_guard(tmp_program, startup_program): with paddle.static.program_guard(tmp_program, startup_program):
loss_function = ReconstructionQuanterLoss(tmp_program, names) loss_function = ReconstructionQuanterLoss(
program=tmp_program, weight_region_names=names)
student_var = tmp_program.global_block().var(quant_op_out_name) student_var = tmp_program.global_block().var(quant_op_out_name)
teacher_var = tmp_program.global_block().var("teacher_" + teacher_var = tmp_program.global_block().var("teacher_" +
quant_op_out_name) quant_op_out_name)
...@@ -382,11 +385,11 @@ class ReconstructionQuanter(object): ...@@ -382,11 +385,11 @@ class ReconstructionQuanter(object):
} }
optimizer = paddle.optimizer.Adam(learning_rate=self._lr) optimizer = paddle.optimizer.Adam(learning_rate=self._lr)
optimizer.minimize(total_loss) optimizer.minimize(total_loss)
self._exe.run(startup_program) self._exe.run(startup_program)
start_time = time.time() start_time = time.time()
prev_start_time = start_time prev_start_time = start_time
loader = self._data_loader() loader = self._data_loader()
for epoch in range(self._epochs): for epoch in range(self._epochs):
for i, data in ( for i, data in (
enumerate(loader) if enumerate(loader) if
...@@ -412,14 +415,21 @@ class ReconstructionQuanter(object): ...@@ -412,14 +415,21 @@ class ReconstructionQuanter(object):
sys.stdout.flush() sys.stdout.flush()
if i == self._num_iterations: if i == self._num_iterations:
break break
self._update_scale()
self._update_weights_to_int() self._update_weights_to_int()
if self._bias_correction: if self._bias_correction:
self._bias_correction_w() self._bias_correction_w()
return self._program return self._program, self._scale_dict
def _init_alpha(self, name, scale): def _init_alpha(self, name, scale):
_tensor = utils.load_variable_data(self._scope, "teacher_" + name) _tensor = utils.load_variable_data(self._scope, "teacher_" + name)
tensor_scaled = utils.quant_tensor(_tensor, scale) tensor_scaled = utils.quant_tensor(
x=_tensor,
scale=scale,
weight_bits=self._weight_bits,
quant_axis=0 if self._weight_op_pairs[name] not in
utils._channelwise_quant_axis1_ops else 1)
tensor_floor = np.floor(tensor_scaled) tensor_floor = np.floor(tensor_scaled)
tensor = tensor_scaled - tensor_floor tensor = tensor_scaled - tensor_floor
alpha = -np.log((ZETA - GAMMA) / (tensor - GAMMA) - 1) alpha = -np.log((ZETA - GAMMA) / (tensor - GAMMA) - 1)
...@@ -433,15 +443,20 @@ class ReconstructionQuanter(object): ...@@ -433,15 +443,20 @@ class ReconstructionQuanter(object):
""" """
bnt = (1 << (self._weight_bits - 1)) - 1 bnt = (1 << (self._weight_bits - 1)) - 1
def _quant(x, scale):
s = scale / bnt
quant_x = x / s
return quant_x
def _dequant(x, scale): def _dequant(x, scale):
s = (scale + 1e-8) / bnt s = scale / bnt
dequant_x = s * x dequant_x = s * x
return dequant_x return dequant_x
quantized_weight = paddle.static.data( weight_copy = paddle.static.data(
shape=weight.shape, shape=weight.shape,
dtype=weight.dtype, dtype=weight.dtype,
name=weight.name + '_quant', ) name=weight.name + '_copy', )
v = paddle.static.create_parameter( v = paddle.static.create_parameter(
shape=weight.shape, shape=weight.shape,
...@@ -461,10 +476,15 @@ class ReconstructionQuanter(object): ...@@ -461,10 +476,15 @@ class ReconstructionQuanter(object):
shape=weight.shape, shape=weight.shape,
name=weight.name + '.scale', name=weight.name + '.scale',
default_initializer=fluid.initializer.NumpyArrayInitializer( default_initializer=fluid.initializer.NumpyArrayInitializer(
scale, ), ) scale, ))
else: else:
scale_var = scale scale_var = scale
w = _dequant(quantized_weight + h_v, scale_var)
quantized_weight = _quant(weight_copy, scale_var)
floor_weight = (paddle.floor(quantized_weight) - quantized_weight
).detach() + quantized_weight
clip_weight = paddle.clip(floor_weight + h_v, -bnt, bnt)
w = _dequant(clip_weight, scale_var)
return w return w
def _insert_soft_rounding(self): def _insert_soft_rounding(self):
...@@ -477,6 +497,7 @@ class ReconstructionQuanter(object): ...@@ -477,6 +497,7 @@ class ReconstructionQuanter(object):
scale = np.array(scale) scale = np.array(scale)
scale = scale.reshape(scale.shape[0], 1) scale = scale.reshape(scale.shape[0], 1)
if len(shape) == 2: if len(shape) == 2:
print(name)
scale = scale.repeat(shape[0], axis=1).T scale = scale.repeat(shape[0], axis=1).T
else: else:
scale = scale.repeat(shape[1] * shape[2] * shape[3], axis=1) scale = scale.repeat(shape[1] * shape[2] * shape[3], axis=1)
...@@ -498,13 +519,14 @@ class ReconstructionQuanter(object): ...@@ -498,13 +519,14 @@ class ReconstructionQuanter(object):
def _insert_drop_quant_dequant(self): def _insert_drop_quant_dequant(self):
for op in self._graph.ops(): for op in self._graph.ops():
if op.type() in ['conv2d', 'depthwise_conv2d', 'mul']: if op.type(
) in ['conv2d', 'depthwise_conv2d', 'mul', 'matmul', 'matmul_v2']:
if op.type() in ['conv2d', 'depthwise_conv2d']: if op.type() in ['conv2d', 'depthwise_conv2d']:
if op.inputs("Filter")[0].name().startswith("teacher"): if op.inputs("Filter")[0].name().startswith("teacher"):
break break
else: else:
input = op.inputs("Input")[0] input = op.inputs("Input")[0]
if op.type() in ['mul']: if op.type() in ['mul', 'matmul', 'matmul_v2']:
if op.inputs("Y")[0].name().startswith("teacher"): if op.inputs("Y")[0].name().startswith("teacher"):
break break
else: else:
...@@ -529,7 +551,7 @@ class ReconstructionQuanter(object): ...@@ -529,7 +551,7 @@ class ReconstructionQuanter(object):
self._exe.run(startup_program) self._exe.run(startup_program)
# create var in program # create var in program
for new_var in new_program.list_vars(): for new_var in new_program.list_vars():
if new_var.name == var._var.name + '_quant' or new_var.name == var._var.name + '.tmp': if new_var.name == var._var.name + '_copy' or new_var.name == var._var.name + '.tmp':
continue continue
elif new_var.name == var._var.name + '.alpha': elif new_var.name == var._var.name + '.alpha':
program.global_block().create_parameter( program.global_block().create_parameter(
...@@ -537,14 +559,16 @@ class ReconstructionQuanter(object): ...@@ -537,14 +559,16 @@ class ReconstructionQuanter(object):
shape=new_var.shape, shape=new_var.shape,
dtype=new_var.dtype, dtype=new_var.dtype,
type=new_var.type, type=new_var.type,
stop_gradient=new_var.stop_gradient, ) stop_gradient=False,
trainable=True)
elif new_var.name == var._var.name + '.scale': elif new_var.name == var._var.name + '.scale':
program.global_block().create_parameter( program.global_block().create_parameter(
name=new_var.name, name=new_var.name,
shape=new_var.shape, shape=new_var.shape,
dtype=new_var.dtype, dtype=new_var.dtype,
type=new_var.type, type=new_var.type,
stop_gradient=True, ) stop_gradient=True,
trainable=False)
else: else:
if func == "_soft_rounding": if func == "_soft_rounding":
program.global_block().create_var( program.global_block().create_var(
...@@ -556,7 +580,7 @@ class ReconstructionQuanter(object): ...@@ -556,7 +580,7 @@ class ReconstructionQuanter(object):
stop_gradient=new_var.stop_gradient, ) stop_gradient=new_var.stop_gradient, )
else: else:
program.global_block().create_var( program.global_block().create_var(
name=new_var.name, name=new_var.name + '.qdrop',
shape=new_var.shape, shape=new_var.shape,
dtype=new_var.dtype, dtype=new_var.dtype,
type=new_var.type, type=new_var.type,
...@@ -567,11 +591,12 @@ class ReconstructionQuanter(object): ...@@ -567,11 +591,12 @@ class ReconstructionQuanter(object):
block = var._var.block block = var._var.block
# prepend new_program's op in program # prepend new_program's op in program
for _op in ops: for _op in ops:
if _op.type() not in ['conv2d', 'depthwise_conv2d', 'mul']: if _op.type() not in [
'conv2d', 'depthwise_conv2d', 'mul', 'matmul', 'matmul_v2'
]:
continue continue
idx = block.ops.index(_op._op) idx = block.ops.index(_op._op)
for op in op_list: for op in op_list:
# _attrs = op.all_attrs()
_type = op.type _type = op.type
_attrs = { _attrs = {
'use_mkldnn': False, 'use_mkldnn': False,
...@@ -591,7 +616,7 @@ class ReconstructionQuanter(object): ...@@ -591,7 +616,7 @@ class ReconstructionQuanter(object):
'scale': op.attr('scale'), 'scale': op.attr('scale'),
'bias_after_scale': op.attr('bias_after_scale'), 'bias_after_scale': op.attr('bias_after_scale'),
} }
elif _type == 'elementwise_mul': elif _type in ['elementwise_mul', 'elementwise_div']:
_attrs = { _attrs = {
'use_mkldnn': False, 'use_mkldnn': False,
'with_quant_attr': False, 'with_quant_attr': False,
...@@ -603,15 +628,17 @@ class ReconstructionQuanter(object): ...@@ -603,15 +628,17 @@ class ReconstructionQuanter(object):
if func == "_soft_rounding": if func == "_soft_rounding":
_outputs = {'Out': op.output('Out')[0] + '.rounding'} _outputs = {'Out': op.output('Out')[0] + '.rounding'}
if _type == "elementwise_add": if _type in [
"elementwise_add", "elementwise_sub",
"elementwise_mul"
]:
_inputs = { _inputs = {
'X': var. 'X': op.input('X')[0] + '.rounding',
_var, # replace tmp var conv.weight_quant with var conv.weight
'Y': op.input('Y')[0] + '.rounding', 'Y': op.input('Y')[0] + '.rounding',
} }
elif _type == "elementwise_mul": elif _type == "elementwise_div":
_inputs = { _inputs = {
'X': op.input('X')[0] + '.rounding', 'X': var._var,
'Y': op.input('Y')[0] + '.rounding', 'Y': op.input('Y')[0] + '.rounding',
} }
elif (_type == 'scale' and elif (_type == 'scale' and
...@@ -623,23 +650,22 @@ class ReconstructionQuanter(object): ...@@ -623,23 +650,22 @@ class ReconstructionQuanter(object):
elif func == "_drop_quant_dequant": elif func == "_drop_quant_dequant":
if _type == 'dropout': if _type == 'dropout':
_outputs = { _outputs = {
'Out': op.output('Out')[0], 'Out': op.output('Out')[0] + '.qdrop',
'Mask': op.output('Mask')[0], 'Mask': op.output('Mask')[0] + '.qdrop',
} }
else: else:
_outputs = {'Out': op.output('Out')[0]} _outputs = {'Out': op.output('Out')[0] + '.qdrop'}
if _type == 'elementwise_add' or _type == 'elementwise_sub': if _type == 'elementwise_add' or _type == 'elementwise_sub':
_inputs = { _inputs = {
'X': var. 'X': var._var,
_var, # replace tmp var conv.weight_quant with var conv.weight 'Y': op.input('Y')[0] + '.qdrop',
'Y': op.input('Y'),
} }
elif _type == 'scale' and op.input('X')[ elif _type == 'scale' and op.input('X')[
0] == inputs.name + '.tmp': 0] == inputs.name + '.tmp':
_inputs = {'X': var._var} _inputs = {'X': var._var}
else: else:
_inputs = {'X': op.input('X')[0]} _inputs = {'X': op.input('X')[0] + '.qdrop'}
block._insert_op( block._insert_op(
idx, idx,
...@@ -648,18 +674,20 @@ class ReconstructionQuanter(object): ...@@ -648,18 +674,20 @@ class ReconstructionQuanter(object):
inputs=_inputs, inputs=_inputs,
outputs=_outputs, ) outputs=_outputs, )
for op in ops: for op in ops:
if op.type() not in ['conv2d', 'depthwise_conv2d', 'mul']: if op.type() not in [
'conv2d', 'depthwise_conv2d', 'mul', 'matmul', 'matmul_v2'
]:
continue continue
if op.type() in ['conv2d', 'depthwise_conv2d'] and op.inputs( if op.type() in ['conv2d', 'depthwise_conv2d'] and op.inputs(
'Filter')[0].name().startswith('teacher'): 'Filter')[0].name().startswith('teacher'):
continue continue
if op.type() in ['mul'] and op.inputs('Y')[0].name().startswith( if op.type() in ['mul', 'matmul', 'matmul_v2'] and op.inputs('Y')[
'teacher'): 0].name().startswith('teacher'):
continue continue
if func == '_soft_rounding': if func == '_soft_rounding':
op._op._rename_input(inputs.name, out.name + '.rounding') op._op._rename_input(inputs.name, out.name + '.rounding')
else: else:
op._op._rename_input(inputs.name, out.name) op._op._rename_input(inputs.name, out.name + '.qdrop')
def _isolate_regions(self): def _isolate_regions(self):
starts = [region[0] for region in self._regions] starts = [region[0] for region in self._regions]
...@@ -698,20 +726,41 @@ class ReconstructionQuanter(object): ...@@ -698,20 +726,41 @@ class ReconstructionQuanter(object):
op_._rename_input(var_.name, duplicated_var.name) op_._rename_input(var_.name, duplicated_var.name)
return vars return vars
def _update_scale(self):
for _name in self._weight_var_names:
scale_name = _name + '.scale'
scale_tensor = utils.load_variable_data(self._scope, scale_name)
scale_list = []
if self._weight_op_pairs[
_name] in utils._channelwise_quant_axis1_ops:
scale_list = list(scale_tensor[0])
else:
for i in range(scale_tensor.shape[0]):
scale_list.append(scale_tensor[i][0][0][0])
self._scale_dict[scale_name] = scale_list
def _update_weights_to_int(self): def _update_weights_to_int(self):
for weight_var_name in self._weight_var_names: for weight_var_name in self._weight_var_names:
alpha_tensor = utils.load_variable_data( alpha_tensor = utils.load_variable_data(
self._scope, self._scope,
weight_var_name + '.alpha', ) weight_var_name + '.alpha', )
h_alpha_tensor = self._compute_soft_rounding_np(alpha_tensor) h_alpha_tensor = self._compute_soft_rounding_np(alpha_tensor)
weight_quant_tensor = utils.load_variable_data( weight_tensor = utils.load_variable_data(
self._scope, self._scope,
weight_var_name, ) weight_var_name, )
weight_quant_tensor = utils.quant_tensor(
x=weight_tensor,
scale=self._scale_dict[weight_var_name],
weight_bits=self._weight_bits,
quant_axis=0 if self._weight_op_pairs[weight_var_name] not in
utils._channelwise_quant_axis1_ops else 1)
utils.set_variable_data( utils.set_variable_data(
self._scope, self._scope,
self._place, self._place,
weight_var_name, weight_var_name,
np.round(weight_quant_tensor + h_alpha_tensor, ), ) np.floor(weight_quant_tensor) + h_alpha_tensor, )
def _bias_correction_w(self): def _bias_correction_w(self):
for weight_var_name in self._weight_var_names: for weight_var_name in self._weight_var_names:
...@@ -726,7 +775,8 @@ class ReconstructionQuanter(object): ...@@ -726,7 +775,8 @@ class ReconstructionQuanter(object):
weight_var_tensor, weight_var_tensor,
weight_quant_tensor, weight_quant_tensor,
scale, scale,
quant_axis=0, quant_axis=0 if self._weight_op_pairs[weight_var_name] not in
utils._channelwise_quant_axis1_ops else 1,
weight_bits=self._weight_bits, ) weight_bits=self._weight_bits, )
utils.set_variable_data( utils.set_variable_data(
self._scope, self._scope,
...@@ -758,7 +808,6 @@ class ReconstructionQuanterLoss(object): ...@@ -758,7 +808,6 @@ class ReconstructionQuanterLoss(object):
weight=0.1): weight=0.1):
""" """
The loss function of Rounding Optimizer. The loss function of Rounding Optimizer.
Args: Args:
program(Program): The student program. program(Program): The student program.
weight_region_names(list, optional): The weight names inside a region. weight_region_names(list, optional): The weight names inside a region.
...@@ -829,9 +878,8 @@ def quant_recon_static(executor, ...@@ -829,9 +878,8 @@ def quant_recon_static(executor,
hist_percent=0.9999, hist_percent=0.9999,
bias_correction=False, bias_correction=False,
quantizable_op_type=[ quantizable_op_type=[
"conv2d", "conv2d", "depthwise_conv2d", "mul", "matmul",
"depthwise_conv2d", "matmul_v2"
"mul",
], ],
is_full_quantize=False, is_full_quantize=False,
weight_bits=8, weight_bits=8,
...@@ -853,7 +901,6 @@ def quant_recon_static(executor, ...@@ -853,7 +901,6 @@ def quant_recon_static(executor,
quantize the fp32 model. It uses calibrate data to calculate the quantize the fp32 model. It uses calibrate data to calculate the
scale factor of quantized variables, and inserts fake quantization scale factor of quantized variables, and inserts fake quantization
and dequantization operators to obtain the quantized model. and dequantization operators to obtain the quantized model.
Args: Args:
executor(paddle.static.Executor): The executor to load, run and save the executor(paddle.static.Executor): The executor to load, run and save the
quantized model. quantized model.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册