diff --git a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py index 98123a474c9bcca43b79b755b898622199fd5c64..8aaf327ce96750674111fde0d4a702aab74546e9 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py @@ -146,9 +146,10 @@ class Quant2Int8MkldnnPass(object): input_name = op.input("X")[0] scale_name = op.input("InScale")[0] output_name = op.output("Out")[0] - # Gather new weights scale after folding batchnorm in convolution + # Gather new weight scales after folding batchnorm in convolution scale = np.array(1.0 / self._load_param( self._scope, scale_name)[0]).astype(np.float64) + scale[scale == np.Inf] = 0.0 lod_tensor = self._convert_scale2tensor(scale) use_unsigned_int = False _add_scale_for_vars([input_name, output_name], use_unsigned_int, @@ -166,10 +167,11 @@ class Quant2Int8MkldnnPass(object): self._weight_scales[input_name] = _max_range else: scale_name = op.input("Scales")[0] - scale = np.array( + scales = np.array( self._s8_max * self._s8_max / self._load_param( self._scope, scale_name)).astype(np.float64) - self._weight_scales[input_name] = scale + scales[scales == np.Inf] = 0.0 + self._weight_scales[input_name] = scales return graph @@ -179,6 +181,7 @@ class Quant2Int8MkldnnPass(object): attr_scale = op.op().attr("out_threshold") if attr_scale == 0.0: continue scale = np.array(1.0 / attr_scale).astype(np.float64) + scale[scale == np.Inf] = 0.0 scale_lod_tensor = self._convert_scale2tensor(scale) use_unsigned_int = False for output_name in op.op().outputs():