diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py index 9c028736d6826530994396ed5429b03c1d4118a9..22c30b3166cee72ff5e52f5d12e7876c540060f6 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py @@ -121,7 +121,7 @@ class ImperativePTQ(object): InputSpec or example Tensor. If None, all input variables of the original Layer's forward method would be the inputs of the saved model. Default None. - **configs (dict, optional): Other save configuration options for + **config (dict, optional): Other save configuration options for compatibility. We do not recommend using these configurations, they may be removed in the future. If not necessary, DO NOT use them. Default None. @@ -140,11 +140,15 @@ class ImperativePTQ(object): assert isinstance( model, paddle.nn.Layer ), "The model must be the instance of paddle.nn.Layer." + is_postprocess = config.get('postprocess', False) + config.pop('postprocess', None) # Convert and save dygraph quantized model self._convert(model) paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config) + if not is_postprocess: + return # Load inference program is_dynamic_mode = False @@ -272,10 +276,16 @@ class ImperativePTQ(object): output_names = layer_info.output_names output_thresholds = quant_config.out_act_quantizer.thresholds assert len(output_names) == 1 - assert len(output_thresholds) == 1 - save_name = output_names[0] + str(0) + "_threshold" - sub_layer._set_op_attrs({save_name: output_thresholds[0]}) - sub_layer._set_op_attrs({"out_threshold": output_thresholds[0]}) + if len(output_thresholds) == 1: + save_name = output_names[0] + str(0) + "_threshold" + sub_layer._set_op_attrs({save_name: output_thresholds[0]}) + sub_layer._set_op_attrs({"out_threshold": output_thresholds[0]}) + else: + _logger.warning( + "output_thresholds shape of {} need to be 1, but received {}".format( + output_names[0], len(output_thresholds) + ) + ) def _wrap_simulated_layers(self, model): """ @@ -326,11 +336,13 @@ class ImperativePTQ(object): # save the input thresholds assert hasattr(quant_layer, "_fake_quant_input") assert hasattr(quant_layer._fake_quant_input, "_scale") - assert len(in_act_quantizer.thresholds) == 1 - input_threshold = np.array( - [in_act_quantizer.thresholds[0]], dtype=np.float32 - ) - quant_layer._fake_quant_input._scale.set_value(input_threshold) + if len(in_act_quantizer.thresholds) == 1: + input_threshold = np.array( + [in_act_quantizer.thresholds[0]], dtype=np.float32 + ) + quant_layer._fake_quant_input._scale.set_value( + input_threshold + ) assert hasattr(quant_layer, "_fake_quant_weight") assert hasattr(quant_layer._fake_quant_weight, "_scale") diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py index 5bc7fc0c6b3512da1926347e407022666484f5db..e7b6a243abece6a85b212886771b7e244002c41f 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py @@ -41,6 +41,7 @@ PTQ_LAYERS_INFO = [ LayerInfo(paddle.nn.ReLU, ['X'], [], ['Out']), LayerInfo(paddle.nn.ReLU6, ['X'], [], ['Out']), LayerInfo(paddle.nn.Hardswish, ['X'], [], ['Out']), + LayerInfo(paddle.nn.Swish, ['X'], [], ['Out']), LayerInfo(paddle.nn.Sigmoid, ['X'], [], ['Out']), LayerInfo(paddle.nn.Softmax, ['X'], [], ['Out']), LayerInfo(paddle.nn.Tanh, ['X'], [], ['Out']),