未验证 提交 341f68fe 编写于 作者: C Chang Xu 提交者: GitHub

Add LSQ/LSQ+ in QAT (#45652)

上级 383dc908
......@@ -324,16 +324,18 @@ class ImperativeQuantizeInputs(object):
"%s is unspported to be quantized." % layer
quantize_type = {
'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max'
'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max',
'lsq_weight', 'channel_wise_lsq_weight'
}
act_quantize_type = {'moving_average_abs_max', 'lsq_act'}
assert weight_quantize_type != 'moving_average_abs_max' \
and weight_quantize_type in quantize_type, \
"Unsupported weight_quantize_type: %s. It can only " \
"be abs_max or channel_wise_abs_max." % weight_quantize_type
# TODO (jc): activation_quantize_type supports range_abs_max
assert activation_quantize_type == 'moving_average_abs_max', \
assert activation_quantize_type in act_quantize_type, \
"Unsupported activation_quantize_type: %s. It can " \
"only be moving_average_abs_max now." \
"only be moving_average_abs_max or lsq_act now." \
% activation_quantize_type
bits_check = lambda bits: isinstance(bits, int) \
......
......@@ -252,6 +252,7 @@ if(WIN32)
list(REMOVE_ITEM TEST_OPS test_weight_quantization_mobilenetv1)
list(REMOVE_ITEM TEST_OPS test_quantize_transpiler_v2)
list(REMOVE_ITEM TEST_OPS test_imperative_qat_amp)
list(REMOVE_ITEM TEST_OPS test_imperative_qat_lsq)
endif()
if(LINUX AND WITH_MKLDNN)
......@@ -505,6 +506,7 @@ if(WIN32)
test_moving_average_abs_max_scale_op
test_imperative_qat_channelwise
test_imperative_qat
test_imperative_qat_lsq
test_imperative_out_scale
test_graph)
list(REMOVE_ITEM TEST_OPS ${SINGLE_CARD_TEST_OPS})
......@@ -544,6 +546,7 @@ set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_qat_fuse PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_qat_user_defined PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_qat_lsq PROPERTIES TIMEOUT 300)
if(LINUX AND WITH_MKLDNN)
set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT
......
# copyright (c) 2022 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
from __future__ import print_function
import os
import numpy as np
import random
import time
import tempfile
import unittest
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer, AdamOptimizer, MomentumOptimizer
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.dygraph.container import Sequential
from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
from paddle.fluid.log_helper import get_logger
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.quant.quant_layers import QuantizedConv2D, QuantizedConv2DTranspose
from paddle.fluid.framework import _test_eager_guard
from imperative_test_utils import fix_model_dict
paddle.enable_static()
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(__name__,
logging.INFO,
fmt='%(asctime)s-%(levelname)s: %(message)s')
class ImperativeLenet(fluid.dygraph.Layer):
def __init__(self, num_classes=10):
super(ImperativeLenet, self).__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
self.features = Sequential(
Conv2D(in_channels=1,
out_channels=6,
kernel_size=3,
stride=1,
padding=1,
weight_attr=conv2d_w1_attr,
bias_attr=False), BatchNorm2D(6), ReLU(),
MaxPool2D(kernel_size=2, stride=2),
Conv2D(in_channels=6,
out_channels=16,
kernel_size=5,
stride=1,
padding=0,
weight_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr), BatchNorm2D(16), PReLU(),
MaxPool2D(kernel_size=2, stride=2))
self.fc = Sequential(
Linear(in_features=400,
out_features=120,
weight_attr=fc_w1_attr,
bias_attr=fc_b1_attr), LeakyReLU(),
Linear(in_features=120,
out_features=84,
weight_attr=fc_w2_attr,
bias_attr=fc_b2_attr), Sigmoid(),
Linear(in_features=84,
out_features=num_classes,
weight_attr=fc_w3_attr,
bias_attr=fc_b3_attr), Softmax())
def forward(self, inputs):
x = self.features(inputs)
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
class TestImperativeQatLSQ(unittest.TestCase):
def set_vars(self):
self.weight_quantize_type = 'channel_wise_lsq_weight'
self.activation_quantize_type = 'lsq_act'
self.onnx_format = False
self.fuse_conv_bn = False
def func_qat(self):
self.set_vars()
imperative_qat = ImperativeQuantAware(
weight_quantize_type=self.weight_quantize_type,
activation_quantize_type=self.activation_quantize_type,
fuse_conv_bn=self.fuse_conv_bn)
seed = 100
np.random.seed(seed)
fluid.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed
paddle.disable_static()
lenet = ImperativeLenet()
lenet = fix_model_dict(lenet)
imperative_qat.quantize(lenet)
optimizer = MomentumOptimizer(learning_rate=0.1,
parameter_list=lenet.parameters(),
momentum=0.9)
train_reader = paddle.batch(paddle.dataset.mnist.train(),
batch_size=64,
drop_last=True)
test_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=32)
epoch_num = 2
for epoch in range(epoch_num):
lenet.train()
for batch_id, data in enumerate(train_reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array([x[1] for x in data
]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
acc = fluid.layers.accuracy(out, label)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = paddle.mean(loss)
avg_loss.backward()
optimizer.minimize(avg_loss)
lenet.clear_gradients()
if batch_id % 100 == 0:
_logger.info(
"Train | At epoch {} step {}: loss = {:}, acc= {:}".
format(epoch, batch_id, avg_loss.numpy(), acc.numpy()))
lenet.eval()
eval_acc_top1_list = []
with paddle.no_grad():
for batch_id, data in enumerate(test_reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array([x[1] for x in data
]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
acc_top1 = fluid.layers.accuracy(input=out,
label=label,
k=1)
acc_top5 = fluid.layers.accuracy(input=out,
label=label,
k=5)
if batch_id % 100 == 0:
eval_acc_top1_list.append(float(acc_top1.numpy()))
_logger.info(
"Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}"
.format(epoch, batch_id, acc_top1.numpy(),
acc_top5.numpy()))
# check eval acc
eval_acc_top1 = sum(eval_acc_top1_list) / len(eval_acc_top1_list)
print('eval_acc_top1', eval_acc_top1)
self.assertTrue(eval_acc_top1 > 0.9,
msg="The test acc {%f} is less than 0.9." %
eval_acc_top1)
def test_qat(self):
self.func_qat()
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from paddle.framework import core
from paddle.fluid import dygraph_utils
from paddle.utils import unique_name
from paddle.framework import ParamAttr
from paddle.fluid.framework import _varbase_creator
from paddle.nn.initializer import Constant
from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.nn import functional as F
import logging
from paddle.fluid.log_helper import get_logger
from paddle import in_dynamic_mode
from paddle.nn import Layer
from paddle.autograd import PyLayer
import math
import copy
def round(x):
sign = paddle.sign(x)
x = sign * paddle.floor(paddle.abs(x) + 0.5)
return x
class LsqFunc(PyLayer):
@staticmethod
def forward(ctx, weight, alpha, g, Qn, Qp, per_channel=False, quant_axis=0):
ctx.save_for_backward(weight, alpha)
ctx.other = g, Qn, Qp, per_channel, quant_axis
if per_channel:
sizes = weight.shape
weight = weight.reshape((weight.shape[quant_axis], -1))
weight = weight.transpose((1, 0))
alpha = paddle.broadcast_to(alpha, weight.shape)
quant_w = round(paddle.divide(weight, alpha)).clip(Qn, Qp)
quant_w = quant_w * alpha
quant_w = quant_w.transpose((1, 0))
quant_w = quant_w.reshape(sizes)
else:
quant_w = round(paddle.divide(weight, alpha)).clip(Qn, Qp)
quant_w = quant_w * alpha
return quant_w
@staticmethod
def backward(ctx, grad_weight):
weight, alpha = ctx.saved_tensor()
g, Qn, Qp, per_channel, quant_axis = ctx.other
if per_channel:
sizes = weight.shape
weight = weight.reshape((weight.shape[quant_axis], -1))
weight = weight.transpose((1, 0))
alpha = paddle.broadcast_to(alpha, weight.shape)
q_w = paddle.divide(weight, alpha)
q_w = q_w.transpose((1, 0))
q_w = q_w.reshape(sizes)
else:
q_w = paddle.divide(weight, alpha)
lower_flag = paddle.cast((q_w < Qn), 'float32')
upper_flag = paddle.cast((q_w > Qp), 'float32')
middle_flag = 1.0 - lower_flag - upper_flag
if per_channel:
grad_alpha = ((lower_flag * Qn + upper_flag * Qp +
middle_flag * round(q_w) - middle_flag * q_w) *
grad_weight * g)
grad_alpha = grad_alpha.reshape(
(grad_alpha.shape[quant_axis], -1)).sum(axis=1)
else:
grad_alpha = ((lower_flag * Qn + upper_flag * Qp +
middle_flag * round(q_w) - middle_flag * q_w) *
grad_weight * g).sum().unsqueeze(axis=0)[0]
grad_weight = middle_flag * grad_weight
return grad_weight, grad_alpha
class LsqPlusActFunc(PyLayer):
@staticmethod
def forward(ctx, x, alpha, beta, g, Qn, Qp):
ctx.save_for_backward(x, alpha, beta)
ctx.other = g, Qn, Qp
quant_x = round(paddle.divide((x - beta), alpha)).clip(Qn, Qp)
return quant_x * alpha + beta
@staticmethod
def backward(ctx, grad_x):
x, alpha, beta = ctx.saved_tensor()
g, Qn, Qp = ctx.other
q_x = (x - beta) / alpha
lower_flag = paddle.cast((q_x < Qn), 'float32')
upper_flag = paddle.cast((q_x > Qp), 'float32')
middle_flag = 1.0 - lower_flag - upper_flag
grad_alpha = ((lower_flag * Qn + upper_flag * Qp +
middle_flag * round(q_x) - middle_flag * q_x) * grad_x *
g).sum().unsqueeze(axis=0)[0]
grad_beta = ((lower_flag + upper_flag) * grad_x *
g).sum().unsqueeze(axis=0)[0]
grad_x = middle_flag * grad_x
return grad_x, grad_alpha, grad_beta
class FakeQuantActLSQPlus(Layer):
def __init__(self,
quant_bits,
all_postive=False,
symmetric=False,
batch_init=20,
dtype='float32',
name=None,
reduce_type=None):
super(FakeQuantActLSQPlus, self).__init__()
'''
Args:
quant_bits(int): quantization bit number for weights.
all_postive(bool): whether unsigned or signed quantization, where True for unsigned quantization and False for signed quantization.
symmetric(bool): whether symmetric or asymmetric quantization.
batch_init(int): number of batches that collect Gaussian approximation for the weight distribution in each layer.
dtype(str): data type.
name(str): the name of the weight.
reduce_type(str): the reduce type which is needed when parallel training.
'''
self.bits = quant_bits
self.all_positive = all_postive
self.symmetric = symmetric
self.batch_init = batch_init
self.name = name
self.reduce_type = reduce_type
if self.all_positive:
# unsigned activation
self.Qn = 0
self.Qp = 2**self.bits - 1
else:
# signed activation
self.Qn = -2**(self.bits - 1)
self.Qp = 2**(self.bits - 1) - 1
scale_prefix = "{}.scale".format(
name) if name else 'quant_dequant.scale'
self._scale_name = unique_name.generate(scale_prefix)
s_attr = ParamAttr(name=self._scale_name,
initializer=Constant(1.0),
trainable=True)
self.s = self.create_parameter(shape=[1], attr=s_attr, dtype='float32')
self.s.stop_gradient = False
if not self.symmetric:
beta_prefix = "{}.beta".format(
name) if name else 'quant_dequant.beta'
self._beta_name = unique_name.generate(beta_prefix)
beta_attr = ParamAttr(name=self._beta_name,
initializer=Constant(0.0),
trainable=True)
self.beta = self.create_parameter(shape=[1],
attr=beta_attr,
dtype='float32')
self.beta.stop_gradient = False
self.init_state = 0
def forward(self, activation):
if self.reduce_type == "max":
paddle.distributed.all_reduce(self.s,
op=paddle.distributed.ReduceOp.MAX)
if not self.symmetric and self.reduce_type == "max":
paddle.distributed.all_reduce(self.beta,
op=paddle.distributed.ReduceOp.MAX)
if self.init_state == 0:
self.g = paddle.to_tensor(1.0 /
math.sqrt(activation.numel() * self.Qp))
min_a = paddle.min(activation.detach())
max_a = paddle.max(activation.detach())
self.s.set_value((max_a - min_a) / (self.Qp - self.Qn))
if not self.symmetric:
self.beta.set_value(min_a - self.s * self.Qn)
self.init_state += 1
elif self.init_state < self.batch_init:
min_a = paddle.min(activation.detach())
max_a = paddle.max(activation.detach())
self.s.set_value(self.s * 0.9 + 0.1 * (max_a - min_a) /
(self.Qp - self.Qn))
if not self.symmetric:
self.beta.set_value(self.s * 0.9 + 0.1 *
(min_a - self.s * self.Qn))
self.init_state += 1
else:
self.init_state += 1
activation.stop_gradient = False
if not self.symmetric:
q_a = LsqPlusActFunc.apply(activation, self.s, self.beta, self.g,
self.Qn, self.Qp)
else:
q_a = LsqFunc.apply(activation,
self.s,
self.g,
self.Qn,
self.Qp,
per_channel=False)
return q_a
class FakeQuantWeightLSQPlus(Layer):
def __init__(self,
quant_bits,
all_postive=False,
per_channel=False,
batch_init=20,
channel_num=None,
quant_linear=False,
dtype='float32',
name=None,
reduce_type=None):
super(FakeQuantWeightLSQPlus, self).__init__()
'''
Args:
quant_bits(int): quantization bit number for weights.
all_postive(bool): whether unsigned or signed quantization, where True for unsigned quantization and False for signed quantization.
per_channel(bool): whether layer-wise or channel-wise quantization, where True for layer-wise quantization and False for channel-wise quantization.
batch_init(int): number of batches that collect Gaussian approximation for the weight distribution in each layer.
channel_num(int): the channel number of the weight which is needed when per_channel is True.
quant_linear(bool): whether the weight is from Linear.
dtype(str): data type.
name(str): the name of the weight.
reduce_type(str): the reduce type which is needed when parallel training.
'''
self.bits = quant_bits
self.all_positive = all_postive
self.per_channel = per_channel
self.quant_linear = quant_linear
self.batch_init = batch_init
self.name = name
self.quant_axis = 1 if quant_linear else 0
self.collect_axis = 0 if quant_linear else 1
self.reduce_type = reduce_type
if self.all_positive:
# unsigned weight
self.Qn = 0
self.Qp = 2**self.bits - 1
else:
# signed weight
self.Qn = -2**(self.bits - 1)
self.Qp = 2**(self.bits - 1) - 1
self.init_state = 0
scale_prefix = "{}.scale".format(
name) if name else 'quant_dequant.scale'
self._scale_name = unique_name.generate(scale_prefix)
s_attr = ParamAttr(name=self._scale_name,
initializer=Constant(1.0),
trainable=True)
self.s = self.create_parameter(shape=[channel_num],
attr=s_attr,
dtype=dtype)
self.s.stop_gradient = False
def forward(self, weight):
if self.reduce_type == "max":
paddle.distributed.all_reduce(self.s,
op=paddle.distributed.ReduceOp.MAX)
if self.init_state == 0:
self.g = paddle.to_tensor(1.0 / math.sqrt(weight.numel() * self.Qp))
self.div = 2**self.bits - 1
if self.per_channel:
weight_tmp = weight.detach().reshape((weight.shape[0], -1))
mean = paddle.mean(weight_tmp, axis=self.collect_axis)
std = paddle.std(weight_tmp, axis=self.collect_axis)
s = paddle.max(paddle.stack(
[paddle.abs(mean - 3 * std),
paddle.abs(mean + 3 * std)]),
axis=0)
self.s.set_value(s / self.div)
else:
mean = paddle.mean(weight.detach())
std = paddle.std(weight.detach())
self.s.set_value(
max([
paddle.abs(mean - 3 * std),
paddle.abs(mean + 3 * std)
]) / self.div)
self.init_state += 1
elif self.init_state < self.batch_init:
self.div = 2**self.bits - 1
if self.per_channel:
weight_tmp = weight.detach().reshape((weight.shape[0], -1))
mean = paddle.mean(weight_tmp, axis=self.collect_axis)
std = paddle.std(weight_tmp, axis=self.collect_axis)
s = paddle.max(paddle.stack(
[paddle.abs(mean - 3 * std),
paddle.abs(mean + 3 * std)]),
axis=0)
self.s.set_value(s * 0.9 + 0.1 * s / self.div)
else:
mean = paddle.mean(weight.detach())
std = paddle.std(weight.detach())
self.s.set_value(self.s * 0.9 + 0.1 * max(
[paddle.abs(mean - 3 * std),
paddle.abs(mean + 3 * std)]) / self.div)
self.init_state += 1
elif self.init_state == self.batch_init:
self.init_state += 1
weight.stop_gradient = False
w_q = LsqFunc.apply(weight, self.s, self.g, self.Qn, self.Qp,
self.per_channel, self.quant_axis)
return w_q
......@@ -26,6 +26,7 @@ from paddle.fluid.log_helper import get_logger
from paddle import _C_ops, _legacy_C_ops
from paddle import in_dynamic_mode
from paddle.nn import Layer
from paddle.nn.quant.lsq import FakeQuantActLSQPlus, FakeQuantWeightLSQPlus
__all__ = [
'FakeQuantAbsMax',
......@@ -653,7 +654,8 @@ class QuantizedLinear(Layer):
dtype=self._dtype,
quant_on_weight=True,
channel_num=self.weight.shape[self._linear_quant_axis],
quant_axis=self._linear_quant_axis)
quant_axis=self._linear_quant_axis,
quant_linear=True)
if act_quant_layer is not None:
self._fake_quant_input = act_quant_layer()
......@@ -946,10 +948,29 @@ def _get_fake_quant_type(quant_type, **kwargs):
assert call_args["channel_num"] is not None, (
"You need to input channel_num"
"when you use channel_wise_abs_max strategy.")
elif quant_type == 'lsq_weight':
call_args["all_postive"] = kwargs.get("all_postive", False)
call_args["per_channel"] = False
call_args["channel_num"] = 1
call_args["quant_linear"] = kwargs.get("quant_linear", False)
elif quant_type == 'channel_wise_lsq_weight':
quant_type = 'lsq_weight'
call_args["all_postive"] = kwargs.get("all_postive", False)
call_args["per_channel"] = True
call_args["channel_num"] = kwargs.get("channel_num", None)
call_args["quant_linear"] = kwargs.get("quant_linear", False)
assert call_args["channel_num"] is not None, (
"You need to input channel_num"
"when you use channel_wise_abs_max strategy.")
elif quant_type == 'lsq_act':
call_args["all_postive"] = kwargs.get("all_postive", False)
call_args["symmetric"] = kwargs.get("symmetric", True)
fake_quant_map = {
'abs_max': FakeQuantAbsMax,
'moving_average_abs_max': FakeQuantMovingAverageAbsMax,
'channel_wise_abs_max': FakeQuantChannelWiseAbsMax
'channel_wise_abs_max': FakeQuantChannelWiseAbsMax,
'lsq_weight': FakeQuantWeightLSQPlus,
'lsq_act': FakeQuantActLSQPlus
}
return fake_quant_map[quant_type](**call_args)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册