From b47478efc2caf9247fd73245a3b87154ec3e81a1 Mon Sep 17 00:00:00 2001 From: cc <52520497+juncaipeng@users.noreply.github.com> Date: Fri, 26 Mar 2021 19:15:56 +0800 Subject: [PATCH] [dygraph qat] Use layer to calculate output scale (#31861) * Use layer to calculate output scale * add backward for moving_average_abs_max_scale and save output scales to op's attr --- paddle/fluid/operators/fake_quantize_op.cc | 69 +++-- paddle/fluid/operators/fake_quantize_op.cu | 4 +- paddle/fluid/operators/fake_quantize_op.h | 16 +- paddle/fluid/pybind/op_function_generator.cc | 6 +- .../slim/quantization/imperative/qat.py | 268 +++++------------- .../slim/quantization/imperative/quant_nn.py | 111 ++++---- .../slim/quantization/imperative/utils.py | 47 +-- .../slim/tests/test_imperative_out_scale.py | 25 -- .../tests/unittests/test_fake_quantize_op.py | 5 +- 9 files changed, 222 insertions(+), 329 deletions(-) diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index abfc88e515..4544386718 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -649,13 +649,18 @@ class MovingAverageAbsMaxScaleOp : public framework::OperatorWithKernel { "MovingAverageAbsMaxScale"); OP_INOUT_CHECK(ctx->HasOutput("OutScale"), "Output", "OutScale", "MovingAverageAbsMaxScale"); + if (ctx->HasOutput("OutState")) { ctx->SetOutputDim("OutState", {1}); } if (ctx->HasOutput("OutAccum")) { ctx->SetOutputDim("OutAccum", {1}); } - ctx->SetOutputDim("OutScale", {1}); + if (ctx->HasOutput("Out")) { + ctx->SetOutputDim("Out", ctx->GetInputDim("X")); + ctx->SetOutputDim("OutScale", {1}); + ctx->ShareLoD("X", /*->*/ "Out"); + } } protected: @@ -673,6 +678,9 @@ class MovingAverageAbsMaxScaleOpMaker AddInput("X", "(Tensor) Input is float data type."); AddInput("InAccum", "Last accum.").AsDispensable(); AddInput("InState", "Last state.").AsDispensable(); + AddOutput("Out", + "(Tensor) Output tensor is just equivalent to the input tensor.") + .AsDispensable(); AddOutput("OutScale", " Current scale"); AddOutput("OutState", "(Tensor) state buffer.").AsDispensable(); AddOutput("OutAccum", "(Tensor) accum buffer.").AsDispensable(); @@ -693,7 +701,7 @@ $$Out = X$$ } }; -class FakeQuantDequantGradOp : public framework::OperatorWithKernel { +class StrightThroughEstimatorGradOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; @@ -701,9 +709,9 @@ class FakeQuantDequantGradOp : public framework::OperatorWithKernel { auto out_grad_name = framework::GradVarName("Out"); auto x_grad_name = framework::GradVarName("X"); OP_INOUT_CHECK(ctx->HasInput(out_grad_name), "Input", out_grad_name, - "FakeQuantDequantGradOp"); + "StrightThroughEstimatorGradOp"); OP_INOUT_CHECK(ctx->HasOutput(x_grad_name), "Output", x_grad_name, - "FakeQuantDequantGradOp"); + "StrightThroughEstimatorGradOp"); ctx->SetOutputDim(x_grad_name, ctx->GetInputDim(out_grad_name)); } @@ -717,13 +725,13 @@ class FakeQuantDequantGradOp : public framework::OperatorWithKernel { }; template -class FakeQuantDequantGradMaker : public framework::SingleGradOpMaker { +class StrightThroughEstimatorMaker : public framework::SingleGradOpMaker { public: using framework::SingleGradOpMaker::SingleGradOpMaker; protected: void Apply(GradOpPtr grad_op) const override { - grad_op->SetType("fake_quantize_dequantize_grad"); + grad_op->SetType("stright_throuth_estimator_grad"); grad_op->SetInput(framework::GradVarName("Out"), this->OutputGrad("Out")); grad_op->SetOutput(framework::GradVarName("X"), this->InputGrad("X")); grad_op->SetAttrMap(this->Attrs()); @@ -744,11 +752,11 @@ REGISTER_OPERATOR( REGISTER_OP_CPU_KERNEL(fake_quantize_abs_max, ops::FakeQuantizeAbsMaxKernel); -REGISTER_OPERATOR(fake_quantize_dequantize_abs_max, - ops::FakeQuantOrWithDequantAbsMaxOp, - ops::FakeQuantOrWithDequantAbsMaxOpMaker, - ops::FakeQuantDequantGradMaker, - ops::FakeQuantDequantGradMaker); +REGISTER_OPERATOR( + fake_quantize_dequantize_abs_max, ops::FakeQuantOrWithDequantAbsMaxOp, + ops::FakeQuantOrWithDequantAbsMaxOpMaker, + ops::StrightThroughEstimatorMaker, + ops::StrightThroughEstimatorMaker); REGISTER_OP_CPU_KERNEL(fake_quantize_dequantize_abs_max, ops::FakeQuantizeDequantizeAbsMaxKernel); @@ -769,11 +777,12 @@ REGISTER_OPERATOR( REGISTER_OP_CPU_KERNEL(fake_quantize_moving_average_abs_max, ops::FakeQuantizeMovingAverageAbsMaxKernel); -REGISTER_OPERATOR(fake_quantize_dequantize_moving_average_abs_max, - ops::FakeQuantOrWithDequantMovingAverageAbsMaxOp, - ops::FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker, - ops::FakeQuantDequantGradMaker, - ops::FakeQuantDequantGradMaker); +REGISTER_OPERATOR( + fake_quantize_dequantize_moving_average_abs_max, + ops::FakeQuantOrWithDequantMovingAverageAbsMaxOp, + ops::FakeQuantOrWithDequantMovingAverageAbsMaxOpMaker, + ops::StrightThroughEstimatorMaker, + ops::StrightThroughEstimatorMaker); REGISTER_OP_CPU_KERNEL( fake_quantize_dequantize_moving_average_abs_max, ops::FakeQuantizeDequantizeMovingAverageAbsMaxKernel); @@ -789,20 +798,22 @@ REGISTER_OP_CPU_KERNEL(fake_channel_wise_quantize_abs_max, REGISTER_OPERATOR( moving_average_abs_max_scale, ops::MovingAverageAbsMaxScaleOp, ops::MovingAverageAbsMaxScaleOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); + ops::StrightThroughEstimatorMaker, + ops::StrightThroughEstimatorMaker); REGISTER_OP_CPU_KERNEL(moving_average_abs_max_scale, ops::MovingAverageAbsMaxScaleKernel); -REGISTER_OPERATOR(fake_quantize_dequantize_grad, ops::FakeQuantDequantGradOp); -REGISTER_OP_CPU_KERNEL(fake_quantize_dequantize_grad, - ops::FakeQuantDequantGradKernel); +REGISTER_OPERATOR(stright_throuth_estimator_grad, + ops::StrightThroughEstimatorGradOp); +REGISTER_OP_CPU_KERNEL(stright_throuth_estimator_grad, + ops::StrightThroughEstimatorGradKernel); -REGISTER_OPERATOR(fake_channel_wise_quantize_dequantize_abs_max, - ops::FakeChannelWiseQuantizeDequantizeAbsMaxOp, - ops::FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker, - ops::FakeQuantDequantGradMaker, - ops::FakeQuantDequantGradMaker); +REGISTER_OPERATOR( + fake_channel_wise_quantize_dequantize_abs_max, + ops::FakeChannelWiseQuantizeDequantizeAbsMaxOp, + ops::FakeChannelWiseQuantizeDequantizeAbsMaxOpMaker, + ops::StrightThroughEstimatorMaker, + ops::StrightThroughEstimatorMaker); REGISTER_OP_CPU_KERNEL( fake_channel_wise_quantize_dequantize_abs_max, ops::FakeChannelWiseQuantizeDequantizeAbsMaxKernel); @@ -820,4 +831,8 @@ REGISTER_OP_VERSION(moving_average_abs_max_scale) "Out", "Delete output in order to make the inference model not " "save moving_average_abs_max_scale operator. This will " - "make the quantitative model be correctly applied in inference.")); + "make the quantitative model be correctly applied in inference.")) + .AddCheckpoint( + R"ROC(Incompatible upgrade of output [Out])ROC", + paddle::framework::compatible::OpVersionDesc().NewOutput( + "Out", "In order to support dygraph qat, add output again.")); diff --git a/paddle/fluid/operators/fake_quantize_op.cu b/paddle/fluid/operators/fake_quantize_op.cu index 92127f9aeb..78052179f6 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu +++ b/paddle/fluid/operators/fake_quantize_op.cu @@ -543,8 +543,8 @@ REGISTER_OP_CUDA_KERNEL(moving_average_abs_max_scale, REGISTER_OP_CUDA_KERNEL( fake_quantize_dequantize_moving_average_abs_max, ops::FakeQuantizeDequantizeMovingAverageAbsMaxKernel); -REGISTER_OP_CUDA_KERNEL(fake_quantize_dequantize_grad, - ops::FakeQuantDequantGradKernel); +REGISTER_OP_CUDA_KERNEL(stright_throuth_estimator_grad, + ops::StrightThroughEstimatorGradKernel); REGISTER_OP_CUDA_KERNEL( fake_channel_wise_quantize_dequantize_abs_max, ops::FakeChannelWiseQuantizeDequantizeAbsMaxKernel); diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index 94a75f930b..11a2d2de8b 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -314,6 +314,12 @@ class MovingAverageAbsMaxScaleKernel : public framework::OpKernel { auto* in = context.Input("X"); auto& dev_ctx = context.template device_context(); + if (context.HasOutput("Out")) { + auto* out = context.Output("Out"); + out->mutable_data(context.GetPlace()); + framework::TensorCopy(*in, context.GetPlace(), dev_ctx, out); + } + bool is_test = context.Attr("is_test"); // testing if (is_test) { @@ -344,17 +350,17 @@ class MovingAverageAbsMaxScaleKernel : public framework::OpKernel { }; template -class FakeQuantDequantGradKernel : public framework::OpKernel { +class StrightThroughEstimatorGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto* d_out = context.Input(framework::GradVarName("Out")); auto x_grad_name = framework::GradVarName("X"); auto* d_x = context.Output(x_grad_name); - PADDLE_ENFORCE_NOT_NULL( - d_x, platform::errors::PreconditionNotMet( - "FakeQuantDequantGradOp doesn't have the output named %s.", - x_grad_name)); + PADDLE_ENFORCE_NOT_NULL(d_x, platform::errors::PreconditionNotMet( + "StrightThroughEstimatorGradKernel " + "doesn't have the output named %s.", + x_grad_name)); // Initialize dx as same as d_out d_x->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/pybind/op_function_generator.cc b/paddle/fluid/pybind/op_function_generator.cc index b1c42d91df..69856fa4fa 100644 --- a/paddle/fluid/pybind/op_function_generator.cc +++ b/paddle/fluid/pybind/op_function_generator.cc @@ -84,7 +84,8 @@ std::map> op_outs_map = { {"matrix_nms", {"Out", "Index", "RoisNum"}}, {"distribute_fpn_proposals", {"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}}, - {"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}}, + {"moving_average_abs_max_scale", + {"Out", "OutScale", "OutAccum", "OutState"}}, {"multiclass_nms3", {"Out", "NmsRoisNum"}}, {"generate_proposals_v2", {"RpnRois", "RpnRoiProbs", "RpnRoisNum"}}, {"momentum", {"ParamOut", "VelocityOut"}}, @@ -137,7 +138,8 @@ std::map> op_passing_outs_map = { {"check_finite_and_unscale", {"Out", "FoundInfinite"}}, {"update_loss_scaling", {"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}}, - {"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}}, + {"moving_average_abs_max_scale", + {"Out", "OutScale", "OutAccum", "OutState"}}, {"lamb", {"ParamOut", "Moment1Out", "Moment2Out", "Beta1PowOut", "Beta2PowOut"}}, {"rnn", {"DropoutState"}}, diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index ea2e8e073b..f4620ff000 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -21,14 +21,14 @@ import warnings import paddle from paddle.fluid import dygraph, core, framework, unique_name -from paddle.fluid.executor import Executor +from paddle.fluid.executor import Executor, global_scope from paddle.fluid.param_attr import ParamAttr from paddle.fluid.initializer import Constant from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.fluid.io import load_inference_model, save_inference_model from paddle.fluid.log_helper import get_logger -from . import quant_nn from .. import quantization_pass +from . import quant_nn from . import utils __all__ = ['ImperativeQuantAware'] @@ -201,7 +201,7 @@ class ImperativeQuantAware(object): self._quantize_inputs = ImperativeQuantizeInputs(**kwargs) - self._calc_output_scale = ImperativeCalcOutputScale() + self._quantize_outputs = ImperativeQuantizeOutputs() def quantize(self, model): """ @@ -219,11 +219,11 @@ class ImperativeQuantAware(object): assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." self._quantize_inputs.apply(model) - self._calc_output_scale.apply(model) + self._quantize_outputs.apply(model) def save_quantized_model(self, layer, path, input_spec=None, **config): - self._calc_output_scale.save_quantized_model(layer, path, input_spec, - **config) + self._quantize_outputs.save_quantized_model(layer, path, input_spec, + **config) class ImperativeQuantizeInputs(object): @@ -323,10 +323,10 @@ class ImperativeQuantizeInputs(object): idx += 1 target = name[last_idx:idx] - quant_layer = self._get_quantized_layer(layer) + quant_layer = self._get_input_quantized_layer(layer) setattr(obj, target, quant_layer) - def _get_quantized_layer(self, layer): + def _get_input_quantized_layer(self, layer): quant_layer_name = None for key, value in utils.quant_input_layers_map.items(): if isinstance(layer, value): @@ -343,24 +343,26 @@ class ImperativeQuantizeInputs(object): return quant_nn.__dict__[quant_layer_name](layer, **self._kwargs) -class ImperativeCalcOutputScale(object): +class ImperativeQuantizeOutputs(object): + """ + Calculate the output scales for some layers. + """ + def __init__(self, moving_rate=0.9): """ - Add the logic of calculating and setting output scales of some layers. + The constructor for ImperativeQuantizeOutputs. Args: moving_rate(float): The decay coefficient of moving average. The default value is 0.9. """ - super(ImperativeCalcOutputScale, self).__init__() + super(ImperativeQuantizeOutputs, self).__init__() self._moving_rate = moving_rate - self._register_hook_handle_list = [] - self._out_scale_dict = collections.OrderedDict() def apply(self, model): """ - Insert the `moving_average_abs_max_scale` op to calculate output - scale of specific layers in model. + Insert the `moving_average_abs_max_scale` layers to calculate the + output scales for specific layers in the dygraph model. Args: model(fluid.dygraph.Layer): The target model which would be @@ -372,14 +374,25 @@ class ImperativeCalcOutputScale(object): assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." - # Calculate the target ops's output scale, and don't consider - # the skip_quant attr - for _, layer in model.named_sublayers(): - if self._is_target_layer(layer): - self._init_scale_params(layer) - hook_handle = layer.register_forward_post_hook( - self._calc_output_scale_hook) - self._register_hook_handle_list.append(hook_handle) + for name, layer in model.named_sublayers(): + if not self._is_target_layer(layer): + continue + + # TODO(jc): optimize this module + last_idx = 0 + idx = 0 + obj = model + while idx < len(name): + if (name[idx] == '.'): + if hasattr(obj, name[last_idx:idx]): + obj = getattr(obj, name[last_idx:idx]) + last_idx = idx + 1 + idx += 1 + target = name[last_idx:idx] + + quant_layer = quant_nn.__dict__["QuantizedOutputLayer"]( + layer, self._moving_rate) + setattr(obj, target, quant_layer) def save_quantized_model(self, layer, path, input_spec=None, **config): """ @@ -409,33 +422,18 @@ class ImperativeCalcOutputScale(object): Returns: None """ - assert isinstance(layer, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." - self._gather_output_scale(layer) - - with dygraph.guard(): - layer.eval() - for handle in self._register_hook_handle_list: - handle.remove() paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config) - if len(self._out_scale_dict) == 0: - warnings.warn("Warning: No Layer of the model while to be " \ - "saved contains the out_threshold attribute, so the " \ - "generated inference model would not contain the " \ - "out_threshold.") - return - - # load static model is_dynamic_mode = False if paddle.in_dynamic_mode(): is_dynamic_mode = True paddle.enable_static() - place = core.CUDAPlace(0) if core.is_compiled_with_cuda() \ - else core.CPUPlace() + place = core.CPUPlace() + scope = global_scope() exe = Executor(place) dirname = os.path.dirname(path) @@ -450,20 +448,10 @@ class ImperativeCalcOutputScale(object): model_filename=model_filename, params_filename=params_filename)) - # TODO(jc): analyse whether the dygraph model has - # several blocks before applying qat - assert infer_program.num_blocks == 1, \ - "Quantization aware training (QAT) requires the program " \ - "only has a block for now. When the model has if-else or " \ - "while, the program will have several blocks." + self._save_output_scale(infer_program, scope) - # set output scales to the static model - self._save_output_scale(infer_program) - - # process skip quant self._set_skip_quant_attr(infer_program) - # save the final quantized model that has output scales save_inference_model( dirname=dirname, feeded_var_names=feed_target_names, @@ -476,144 +464,42 @@ class ImperativeCalcOutputScale(object): if is_dynamic_mode: paddle.disable_static() - def _gather_output_scale(self, layer): - """ - Gather all output scales to self._out_scale_dict - """ - with dygraph.guard(): - layer.eval() - for _, sub_layer in layer.named_sublayers(): - if self._is_target_layer(sub_layer): - layer_name = sub_layer.full_name() - if hasattr(sub_layer, "_quant_out_scale"): - self._out_scale_dict[layer_name] = float( - sub_layer._quant_out_scale) - - def _save_output_scale(self, infer_program): + def _is_target_layer(self, layer): """ - Save all output scales to the corresponding ops in static - inference program. - - Because the Layer in dygraph may correspond to multiple ops - in static program after being saved. To ensure correctness, - the outscale collected for output of dygraph Layer can only - be set to the last op in the corresponding ops in static program. + Whether the layer needs to calculate output scales. """ - assert infer_program.num_blocks == 1, \ - "The inference program should only have a block." - - global_block = infer_program.global_block() - target_ops = global_block.ops - - scale_idx = 0 - op_idx = 0 - attr_name = "out_threshold" - - for scale_name, scale_value in self._out_scale_dict.items(): - while True: - if op_idx >= len(target_ops): - break - - op = target_ops[op_idx] - if not self._is_scale_op_matched(scale_name, op, global_block): - op_idx += 1 - else: - if op.type in utils.weight_op_types \ - and op_idx + 1 < len(target_ops) \ - and target_ops[op_idx+1].type == "elementwise_add": - target_ops[op_idx + 1]._set_attr(attr_name, scale_value) - op_idx += 2 - else: - op._set_attr(attr_name, scale_value) - op_idx += 1 - scale_idx += 1 - break - - if scale_idx != len(self._out_scale_dict): - _logger.warning("Warning: the model have %s output scales, "\ - "but it only saves %s output scales." \ - % (len(self._out_scale_dict), scale_idx)) - - def _is_target_layer(self, layer): return isinstance(layer, tuple(utils.quant_output_layers_map.values())) \ - or ('quantized_' in layer.full_name() and \ + or ('quantized' in layer.full_name() and \ 'quantized_noweight' not in layer.full_name()) - def _init_scale_params(self, layer, name=None): + def _save_output_scale(self, program, scope): """ - Init the scale params for calculating output scales and save them in the - target layer. - After the users define the dygraph model, the hooks for calculating output - scales will not execute immediately. If the users load parameters form - checkpoint and save the quantized inference model immediately, the inference - model would not be saved successfully. Beacuse the dygraph_to_static requires - that the parameters created in __init__, but the uniqueness of hook make it - impossible to create parameters in __init__. To avoid this mistake, we define - the scale parameters in the beginning instead of hook. + Save all output scales to the corresponding ops in static + inference program and delete 'moving_average_abs_max_scale' ops. """ + for block in program.blocks: + for op in block.ops: + if op.type == "moving_average_abs_max_scale": + in_var_name = op.input('X')[0] + out_var_name = op.output('Out')[0] + out_scale_name = op.output('OutScale')[0] - def _create_param(in_layer, first_name, last_name, dtype): - prefix = '{}.{}'.format(first_name, last_name) \ - if first_name else 'outscale.{}'.format(last_name) - attr = ParamAttr( - name=unique_name.generate(prefix), - initializer=Constant(1), - trainable=False) - param = in_layer.create_parameter(shape=[1], attr=attr, dtype=dtype) - return param - - dtype = layer._dtype if layer._dtype is not None else "float32" - if dtype not in ["float32", "float64"]: - return - - layer._quant_out_scale = _create_param(layer, name, "scale", dtype) - layer._quant_out_scale.stop_gradient = True - - layer._quant_out_state = _create_param(layer, name, "state", dtype) - layer._quant_out_state.stop_gradient = True + out_scale = utils.load_variable_data(scope, out_scale_name) + previous_op = utils.find_previous_op(block, in_var_name) + previous_op._set_attr("out_threshold", float(out_scale)) - layer._quant_out_accum = _create_param(layer, name, "accum", dtype) - layer._quant_out_accum.stop_gradient = True + next_ops = utils.find_next_ops(block, out_var_name) + for next_op in next_ops: + next_op._rename_input(out_var_name, in_var_name) - def _is_scale_op_matched(self, scale_name, op, block): + def _set_skip_quant_attr(self, program): """ - Based on the op name and attrs to judge whether the op in - program matches the scale_name. We must know the corresponding - name between dgraph and static model. + Label the skip quantized ops. """ - fp_type = [core.VarDesc.VarType.FP64, core.VarDesc.VarType.FP32] - if op.type in quantization_pass._op_real_in_out_name.keys(): - output_var_names = quantization_pass._get_op_output_var_names(op) - for output_var_name in output_var_names: - output_var_tensor = block.var(output_var_name) - if output_var_tensor.dtype not in fp_type: - return False - - # corresponding_map: [name, op_types, function] - # Note that, the items have priority in corresponding_map - corresponding_map = [ - ['conv2d_tranpose', ['conv2d_transpose', \ - 'depthwise_conv2d_transpose'], None], - ['conv2d', ['conv2d', 'depthwise_conv2d'], None], - ['linear', ['matmul'], None], - ['re_lu6', ['relu6'], None], - ['p_re_lu', ['prelu'], None], - ['leaky_re_lu', ['leaky_relu'], None], - ['re_lu', ['relu'], None], - ] - - for item in corresponding_map: - if item[0] in scale_name: - return (op.type in item[1]) and \ - (len(item) == 2 or item[2] is None or item[2](op)) - - return op.type in scale_name - - def _set_skip_quant_attr(self, program): - block = program.global_block() - for op in block.ops: - if self._is_skip_quant_op(block, op): - op._set_attr("skip_quant", True) + for block in program.blocks: + for op in block.ops: + if self._is_skip_quant_op(block, op): + op._set_attr("skip_quant", True) def _is_skip_quant_op(self, block, in_op): """ @@ -621,33 +507,11 @@ class ImperativeCalcOutputScale(object): 1. the type of input op should be conv2d, depthwise_conv2d or matmul 2. the previous ops of the input op are not fake_quantize_dequantize ops """ - - def _find_previous_op(block, var_name): - for op in block.ops: - if var_name in op.output_arg_names: - return op - target_op_types = ["conv2d", "depthwise_conv2d", "matmul"] if in_op.type not in target_op_types: return False - previous_ops = [_find_previous_op(block, arg_name) \ + previous_ops = [utils.find_previous_op(block, arg_name) \ for arg_name in in_op.input_arg_names] - return any(op is not None and op.type not in utils.fake_quantize_dequantize_types \ - for op in previous_ops ) - - def _calc_output_scale_hook(self, layer, input, output): - """ - Create the MovingAverageAbsMaxScale layer for the target layer if needed. - Execute MovingAverageAbsMaxScale layer to calculate the output scale. - """ - assert isinstance(output, (core.VarBase, framework.Variable)), \ - "Multiple outputs are not currently supported in ImperativeOutScale." - - fp_types = [core.VarDesc.VarType.FP32, core.VarDesc.VarType.FP64] - if output.dtype in fp_types: - if not hasattr(layer, "_out_scale"): - self._out_scale = quant_nn.MovingAverageAbsMaxScale( - layer, output.name, self._moving_rate, output.dtype) - # TODO (jc): consider the ops that have several outputs - self._out_scale(output) + return any(op is not None and op.type not in \ + utils.fake_quantize_dequantize_types for op in previous_ops) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index 3c4fb323bc..f6fef0689d 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -507,59 +507,42 @@ class QuantizedNoweightLayer(layers.Layer): class MovingAverageAbsMaxScale(layers.Layer): - def __init__(self, layer=None, name=None, moving_rate=0.9, dtype='float32'): + def __init__(self, name=None, moving_rate=0.9, dtype='float32'): r""" - MovingAverageMaxScale layer is used to calculating the output quantization scale of Layer. - Its computational formula is described as below: + MovingAverageMaxScale layer is used to calculating the output quantization + scale of Layer. Its computational formula is described as below: :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)` :math:`Out = X` """ super(MovingAverageAbsMaxScale, self).__init__() self._moving_rate = moving_rate - self._dtype = dtype - self._layer = layer - if self._layer is None or not hasattr(self._layer, "_quant_out_scale"): - scale_prefix = '{}.scale'.format(name) if name else 'outscale.scale' - scale_name = unique_name.generate(scale_prefix) - scale_attr = ParamAttr( - name=scale_name, initializer=Constant(1), trainable=False) - self._scale = self.create_parameter( - shape=[1], attr=scale_attr, dtype=self._dtype) - self._scale.stop_gradient = True - if self._layer is not None: - setattr(self._layer, "_quant_out_scale", self._scale) - else: - self._scale = self._layer._quant_out_scale + scale_prefix = '{}.scale'.format(name) if name else 'outscale.scale' + scale_name = unique_name.generate(scale_prefix) + scale_attr = ParamAttr( + name=scale_name, initializer=Constant(1), trainable=False) + self._scale = self.create_parameter( + shape=[1], attr=scale_attr, dtype=dtype) + self._scale.stop_gradient = True - if self._layer is None or not hasattr(self._layer, "_quant_out_state"): - state_prefix = "{}.state".format(name) if name else 'outscale.state' - state_attr = ParamAttr( - name=unique_name.generate(state_prefix), - initializer=Constant(1), - trainable=False) - self._state = self.create_parameter( - shape=[1], attr=state_attr, dtype=self._dtype) - self._state.stop_gradient = True - if self._layer is not None: - setattr(self._layer, "_quant_out_state", self._state) - else: - self._state = self._layer._quant_out_state + state_prefix = "{}.state".format(name) if name else 'outscale.state' + state_attr = ParamAttr( + name=unique_name.generate(state_prefix), + initializer=Constant(1), + trainable=False) + self._state = self.create_parameter( + shape=[1], attr=state_attr, dtype=dtype) + self._state.stop_gradient = True - if self._layer is None or not hasattr(self._layer, "_quant_out_accum"): - accum_prefix = "{}.accum".format(name) if name else 'outscale.accum' - accum_attr = ParamAttr( - name=unique_name.generate(accum_prefix), - initializer=Constant(1), - trainable=False) - self._accum = self.create_parameter( - shape=[1], attr=accum_attr, dtype=self._dtype) - self._accum.stop_gradient = True - if self._layer is not None: - setattr(self._layer, "_quant_out_accum", self._accum) - else: - self._accum = self._layer._quant_out_accum + accum_prefix = "{}.accum".format(name) if name else 'outscale.accum' + accum_attr = ParamAttr( + name=unique_name.generate(accum_prefix), + initializer=Constant(1), + trainable=False) + self._accum = self.create_parameter( + shape=[1], attr=accum_attr, dtype=dtype) + self._accum.stop_gradient = True def forward(self, input): if in_dygraph_mode(): @@ -567,18 +550,30 @@ class MovingAverageAbsMaxScale(layers.Layer): not self.training) state = self._state if self.training else None accum = self._accum if self.training else None + quant_out = _varbase_creator( + type=input.type, + name="{}.tmp".format(input.name), + shape=input.shape, + dtype=input.dtype, + persistable=False) - self._scale, _, _ = core.ops.moving_average_abs_max_scale( - input, accum, state, self._scale, state, accum, *attrs) - return self._scale + out, _, _, _ = core.ops.moving_average_abs_max_scale( + input, accum, state, quant_out, self._scale, state, accum, + *attrs) + return out check_variable_and_dtype(input, 'input', ['float32', 'float64'], 'MovingAverageAbsMaxScale') attrs = {'moving_rate': self._moving_rate, 'is_test': not self.training} - inputs = {"X": [input]} - outputs = {"OutScale": [self._scale]} + quant_out = self._helper.create_variable( + name="{}.tmp".format(input.name), + dtype=input.dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + stop_gradient=False) + outputs = {"Out": [quant_out], "OutScale": [self._scale]} if self.training: inputs['InState'] = [self._state] @@ -592,4 +587,22 @@ class MovingAverageAbsMaxScale(layers.Layer): outputs=outputs, attrs=attrs) - return self._scale + return quant_out + + +class QuantizedOutputLayer(layers.Layer): + def __init__(self, layer=None, moving_rate=0.9, dtype='float32'): + r""" + Add MovingAverageMaxScale layer to the behind of the input layer. + """ + super(QuantizedOutputLayer, self).__init__() + self._layer = layer + self._moving_average_abs_max_scale = \ + MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype) + + def forward(self, input): + if isinstance(input, list): + assert len(input) == 1, \ + "The QuantizedOutputLayer should only have one input." + out = self._layer(input) + return self._moving_average_abs_max_scale(out) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index 090f6cda38..f45eb8c97f 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -13,22 +13,7 @@ # limitations under the License. import paddle - -op_real_in_out_name = { - "conv2d": [["Input", "Filter"], ["Output"]], - "depthwise_conv2d": [["Input", "Filter"], ["Output"]], - "pool2d": [["X"], ["Out"]], - "elementwise_add": [["X", "Y"], ["Out"]], - "softmax": [["X"], ["Out"]], - "relu": [["X"], ["Out"]], - "relu6": [["X"], ["Out"]], - "leaky_relu": [["X"], ["Out"]], - "prelu": [["X"], ["Out"]], - "tanh": [["X"], ["Out"]], - "batch_norm": [["X"], ["Y"]], - "sigmoid": [["X"], ["Out"]], - "swish": [["X"], ["Out"]], -} +import numpy as np quant_input_layers_map = { 'Conv2D': paddle.nn.Conv2D, @@ -85,3 +70,33 @@ weight_op_types = [ "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose", "depthwise_conv2d_transpose" ] + + +def load_variable_data(scope, var_name): + ''' + Load variable value from scope + ''' + var_node = scope.find_var(var_name) + assert var_node is not None, \ + "Can not find " + var_name + " in the scope." + return np.array(var_node.get_tensor()) + + +def find_previous_op(block, var_name): + """ + Find the previous op for the input variable. + """ + for op in block.ops: + if var_name in op.output_arg_names: + return op + + +def find_next_ops(block, var_name): + """ + Find all followed ops for the input variable. + """ + res_ops = [] + for op in block.ops: + if var_name in op.input_arg_names: + res_ops.append(op) + return res_ops diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py index 600174e503..8d6ce76ef0 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py @@ -478,30 +478,5 @@ class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): self.assertTrue(op_count == 14) -class TestSaveQuantizedModel_Warning(unittest.TestCase): - def test_warning(self): - path = "./dynamic_outscale_infer_model_with_warnings/lenet" - imperative_out_scale = ImperativeQuantAware() - with fluid.dygraph.guard(): - lenet = ImperativeLenet() - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - imperative_out_scale.save_quantized_model( - layer=lenet, - path=path, - input_spec=[ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') - ]) - - warning_message = "Warning: No Layer of the model while to be " \ - "saved contains the out_threshold attribute, so the " \ - "generated inference model would not contain the " \ - "out_threshold." - num = get_vaild_warning_num(warning_message, w) - assert num == 1 - - if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py index 01f0abe0f2..1d7bfc9f69 100644 --- a/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py +++ b/python/paddle/fluid/tests/unittests/test_fake_quantize_op.py @@ -166,12 +166,14 @@ class TestMovingAverageAbsMaxScaleOp(OpTest): accum[0] = 1 state = np.zeros(1).astype("float32") state[0] = 1 + x = np.random.random((8, 16, 7, 7)).astype("float32") self.inputs = { - 'X': np.random.random((8, 16, 7, 7)).astype("float32"), + 'X': x, 'InAccum': accum, 'InState': state, } + out = x out_accum = np.zeros(1).astype("float32") out_state = np.zeros(1).astype("float32") out_scale = np.zeros(1).astype("float32") @@ -180,6 +182,7 @@ class TestMovingAverageAbsMaxScaleOp(OpTest): out_state[0] = self.attrs['moving_rate'] * state[0] + 1 out_scale = out_accum / out_state self.outputs = { + 'Out': out, 'OutAccum': out_accum, 'OutState': out_state, 'OutScale': out_scale, -- GitLab