diff --git a/paddleslim/quant/advanced/layerwise_quant_error.py b/paddleslim/quant/advanced/layerwise_quant_error.py index ce03d198cd09a943cedde26b0338ff50f65b8e12..ae230ceddfa30ae1653ca5f7b92e886c8919a3ed 100644 --- a/paddleslim/quant/advanced/layerwise_quant_error.py +++ b/paddleslim/quant/advanced/layerwise_quant_error.py @@ -54,6 +54,7 @@ class LayerWiseQuantError(nn.Layer): if type(cur_layer) == LayerWiseQuantError: print(cur_name, cur_layer.losses.mean()) ''' + super(LayerWiseQuantError, self).__init__() self.layer = layer self.weight = layer.weight self.weight_bits = weight_bits @@ -62,14 +63,13 @@ class LayerWiseQuantError(nn.Layer): self.act_method = act_quant_method self.loss_function = loss_function self.losses = [] + self.loss = None def forward(self, input): act = input[0] if type(input) == tuple else input origin_out = paddle.matmul(act, self.weight) bnt = (1 << (self.weight_bits - 1)) - 1 - quant_scale = compute_scales( - self.weight.cast('float32'), - method=self.weight_method).cast(self.weight.dtype) + quant_scale = compute_scales(self.weight, method=self.weight_method) quant_weight = paddle.clip( paddle.round(self.weight / quant_scale * bnt), -bnt - 1, bnt) quant_dequant_weight = quant_weight / bnt * quant_scale @@ -80,6 +80,7 @@ class LayerWiseQuantError(nn.Layer): paddle.round(act / quant_scale * bnt), -bnt - 1, bnt) quant_dequant_act = quant_act / bnt * quant_scale quant_out = paddle.matmul(quant_dequant_act, quant_dequant_weight) - loss = self.loss_function(origin_out, quant_out) + loss = self.loss_function(origin_out, quant_out).cast('float32') self.losses.append(loss) + self.loss = paddle.to_tensor(self.losses, dtype='float32').mean() return self.layer(input) diff --git a/paddleslim/quant/advanced/utils.py b/paddleslim/quant/advanced/utils.py index 98f24ef124e10b345be6354f9c37fc352cfee6de..703fc5e1ce6b6e2f7419ab6c494c51d4efd5f52b 100644 --- a/paddleslim/quant/advanced/utils.py +++ b/paddleslim/quant/advanced/utils.py @@ -48,8 +48,10 @@ def compute_scales(x, method='abs_max'): elif method == 'abs_max_channel_wise': reduce_axis = tuple([i for i in range(len(x.shape)) if i != 1]) quant_scale = paddle.max(paddle.abs(x), axis=reduce_axis) - quant_scale = paddle.where(quant_scale == np.float32(0.0), - np.float32(1e-8), quant_scale) + quant_scale = paddle.where(quant_scale == paddle.to_tensor( + 0, dtype=x.dtype), + paddle.to_tensor(1e-8, dtype=x.dtype), + quant_scale) return quant_scale diff --git a/paddleslim/quant/advanced/utils_layers.py b/paddleslim/quant/advanced/utils_layers.py index 3b06a3ede83a3ef3892026d2ef87c0284e8c320c..166d938da2cadfa27e42ea5621b78f5d02f0ea16 100644 --- a/paddleslim/quant/advanced/utils_layers.py +++ b/paddleslim/quant/advanced/utils_layers.py @@ -26,7 +26,7 @@ class ShiftSmoothHelpLayer(nn.Layer): super(ShiftSmoothHelpLayer, self).__init__() self.weight = layer.weight shift_shape = self.weight.shape[0] - if hasattr(layer, "bias") or layer.bias is None: + if not hasattr(layer, "bias") or layer.bias is None: self.bias = paddle.create_parameter( shape=[self.weight.shape[1]], dtype=self.weight.dtype,