未验证 提交 ddc95a01 编写于 作者: C cc 提交者: GitHub

[quant] Add quant wrap for functional api and refine the qat (#33162)

* Add wrap for functional api
* Refine the wraped api
* Add unit test for quant functional layers
* Update all unit tests for dygraph qat
上级 92081e1d
...@@ -251,24 +251,25 @@ class ImperativeQuantizeInputs(object): ...@@ -251,24 +251,25 @@ class ImperativeQuantizeInputs(object):
super(ImperativeQuantizeInputs, self).__init__() super(ImperativeQuantizeInputs, self).__init__()
self._quantizable_layer_type = tuple( self._quantizable_layer_type = tuple(
utils.quant_input_layers_map[layer] utils.layer_name_map[layer]
if layer in utils.quant_input_layers_map else layer if layer in utils.layer_name_map else layer
for layer in quantizable_layer_type) for layer in quantizable_layer_type)
for layer in self._quantizable_layer_type: for layer in self._quantizable_layer_type:
assert not isinstance(layer, str), \ assert not isinstance(layer, str) \
and layer in utils.fake_quant_input_layers, \
"%s is unspported to be quantized." % layer "%s is unspported to be quantized." % layer
quantize_type = { quantize_type = {
'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max' 'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max'
} }
assert weight_quantize_type in quantize_type, \ assert weight_quantize_type != 'moving_average_abs_max' \
and weight_quantize_type in quantize_type, \
"Unsupported weight_quantize_type: %s. It can only " \ "Unsupported weight_quantize_type: %s. It can only " \
"be abs_max or moving_average_abs_max or " \ "be abs_max or channel_wise_abs_max." % weight_quantize_type
"channel_wise_abs_max." % weight_quantize_type # TODO (jc): activation_quantize_type supports range_abs_max
assert activation_quantize_type != 'channel_wise_abs_max' \ assert activation_quantize_type == 'moving_average_abs_max', \
and activation_quantize_type in quantize_type, \
"Unsupported activation_quantize_type: %s. It can " \ "Unsupported activation_quantize_type: %s. It can " \
"only be abs_max or moving_average_abs_max now." \ "only be moving_average_abs_max now." \
% activation_quantize_type % activation_quantize_type
bits_check = lambda bits: isinstance(bits, int) \ bits_check = lambda bits: isinstance(bits, int) \
...@@ -305,30 +306,22 @@ class ImperativeQuantizeInputs(object): ...@@ -305,30 +306,22 @@ class ImperativeQuantizeInputs(object):
assert isinstance(model, dygraph.Layer), \ assert isinstance(model, dygraph.Layer), \
"The model must be the instance of dygraph.Layer." "The model must be the instance of dygraph.Layer."
for name, layer in model.named_sublayers(): for name, cur_layer in model.named_sublayers():
if not isinstance(layer, self._quantizable_layer_type) \ if not isinstance(cur_layer, self._quantizable_layer_type) \
or (hasattr(layer, "skip_quant") \ or (hasattr(cur_layer, "skip_quant") \
and layer.skip_quant == True): and cur_layer.skip_quant == True):
continue continue
# TODO(jc): optimize this module parent_layer, sub_name = \
last_idx = 0 utils.find_parent_layer_and_sub_name(model, name)
idx = 0
obj = model cur_quant_layer = self._get_input_quantized_layer(cur_layer)
while idx < len(name): setattr(parent_layer, sub_name, cur_quant_layer)
if (name[idx] == '.'):
if hasattr(obj, name[last_idx:idx]):
obj = getattr(obj, name[last_idx:idx])
last_idx = idx + 1
idx += 1
target = name[last_idx:idx]
quant_layer = self._get_input_quantized_layer(layer)
setattr(obj, target, quant_layer)
def _get_input_quantized_layer(self, layer): def _get_input_quantized_layer(self, layer):
quant_layer_name = None quant_layer_name = None
for key, value in utils.quant_input_layers_map.items():
for key, value in utils.layer_name_map.items():
if isinstance(layer, value): if isinstance(layer, value):
quant_layer_name = 'Quantized' + key quant_layer_name = 'Quantized' + key
break break
...@@ -336,10 +329,6 @@ class ImperativeQuantizeInputs(object): ...@@ -336,10 +329,6 @@ class ImperativeQuantizeInputs(object):
"The layer %s is unsupported to be quantized." \ "The layer %s is unsupported to be quantized." \
% layer.full_name() % layer.full_name()
layer_with_weight = ['QuantizedConv2D', 'QuantizedLinear']
if quant_layer_name not in layer_with_weight:
quant_layer_name = 'QuantizedNoweightLayer'
return quant_nn.__dict__[quant_layer_name](layer, **self._kwargs) return quant_nn.__dict__[quant_layer_name](layer, **self._kwargs)
...@@ -374,25 +363,21 @@ class ImperativeQuantizeOutputs(object): ...@@ -374,25 +363,21 @@ class ImperativeQuantizeOutputs(object):
assert isinstance(model, dygraph.Layer), \ assert isinstance(model, dygraph.Layer), \
"The model must be the instance of dygraph.Layer." "The model must be the instance of dygraph.Layer."
for name, layer in model.named_sublayers(): for cur_name, cur_layer in model.named_sublayers():
if not self._is_target_layer(layer): if not self._is_target_layer(cur_layer):
continue continue
# TODO(jc): optimize this module parent_layer, sub_name = \
last_idx = 0 utils.find_parent_layer_and_sub_name(model, cur_name)
idx = 0
obj = model if isinstance(cur_layer, tuple(utils.fake_quant_output_layers)):
while idx < len(name): cur_quant_layer = quant_nn.FakeQuantMAOutputScaleLayer(
if (name[idx] == '.'): cur_layer, self._moving_rate)
if hasattr(obj, name[last_idx:idx]): else:
obj = getattr(obj, name[last_idx:idx]) cur_quant_layer = quant_nn.MAOutputScaleLayer(cur_layer,
last_idx = idx + 1 self._moving_rate)
idx += 1
target = name[last_idx:idx] setattr(parent_layer, sub_name, cur_quant_layer)
quant_layer = quant_nn.__dict__["QuantizedOutputLayer"](
layer, self._moving_rate)
setattr(obj, target, quant_layer)
def save_quantized_model(self, layer, path, input_spec=None, **config): def save_quantized_model(self, layer, path, input_spec=None, **config):
""" """
...@@ -468,9 +453,18 @@ class ImperativeQuantizeOutputs(object): ...@@ -468,9 +453,18 @@ class ImperativeQuantizeOutputs(object):
""" """
Whether the layer needs to calculate output scales. Whether the layer needs to calculate output scales.
""" """
return isinstance(layer, utils.quant_output_layers) \ flag = False
or ('quantized' in layer.full_name() and \ if isinstance(layer, dygraph.Layer):
'quantized_noweight' not in layer.full_name()) # exclude fake_quant ops in quant_nn file
if utils.is_leaf_layer(layer) and \
not isinstance(layer, tuple(utils.fake_quant_leaf_layers)):
flag = True
# consider QuantizedConv2D and QuantizedLinear ops
if isinstance(layer, tuple(utils.fake_quant_wrap_layers)):
flag = True
if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer):
flag = True
return flag
def _save_output_scale(self, program, scope): def _save_output_scale(self, program, scope):
""" """
...@@ -514,4 +508,4 @@ class ImperativeQuantizeOutputs(object): ...@@ -514,4 +508,4 @@ class ImperativeQuantizeOutputs(object):
previous_ops = [utils.find_previous_op(block, arg_name) \ previous_ops = [utils.find_previous_op(block, arg_name) \
for arg_name in in_op.input_arg_names] for arg_name in in_op.input_arg_names]
return any(op is not None and op.type not in \ return any(op is not None and op.type not in \
utils.fake_quantize_dequantize_types for op in previous_ops) utils.fake_quantize_dequantize_op_types for op in previous_ops)
...@@ -22,17 +22,28 @@ from paddle.fluid.framework import in_dygraph_mode ...@@ -22,17 +22,28 @@ from paddle.fluid.framework import in_dygraph_mode
from paddle.fluid.initializer import Constant from paddle.fluid.initializer import Constant
from paddle.fluid.data_feeder import check_variable_and_dtype from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.nn import functional as F from paddle.nn import functional as F
import logging
from paddle.fluid.log_helper import get_logger
__all__ = [ __all__ = [
'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'FakeQuantMovingAverageAbsMax',
'FakeChannelWiseQuantDequantAbsMax', 'QuantizedConv2D', 'QuantizedLinear', 'FakeQuantAbsMax',
'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale' 'FakeQuantChannelWiseAbsMax',
'QuantizedConv2D',
'QuantizedLinear',
'QuantizedNoweightLayer',
'MovingAverageAbsMaxScale',
'MAOutputScaleLayer',
'FakeQuantMAOutputScaleLayer',
] ]
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class FakeQuantMovingAverage(layers.Layer):
class FakeQuantMovingAverageAbsMax(layers.Layer):
r""" r"""
FakeQuantMovingAverage layer does the moving_average_abs_max quant and then dequant. FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant.
Its computational formula is described as below: Its computational formula is described as below:
:math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)` :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
...@@ -45,7 +56,7 @@ class FakeQuantMovingAverage(layers.Layer): ...@@ -45,7 +56,7 @@ class FakeQuantMovingAverage(layers.Layer):
moving_rate=0.9, moving_rate=0.9,
quant_bits=8, quant_bits=8,
dtype='float32'): dtype='float32'):
super(FakeQuantMovingAverage, self).__init__() super(FakeQuantMovingAverageAbsMax, self).__init__()
self._moving_rate = moving_rate self._moving_rate = moving_rate
self._quant_bits = quant_bits self._quant_bits = quant_bits
...@@ -98,7 +109,7 @@ class FakeQuantMovingAverage(layers.Layer): ...@@ -98,7 +109,7 @@ class FakeQuantMovingAverage(layers.Layer):
return out return out
check_variable_and_dtype(input, 'input', ['float32'], check_variable_and_dtype(input, 'input', ['float32'],
"FakeQuantMovingAverage") "FakeQuantMovingAverageAbsMax")
attrs = { attrs = {
'moving_rate': self._moving_rate, 'moving_rate': self._moving_rate,
'bit_length': self._quant_bits, 'bit_length': self._quant_bits,
...@@ -210,7 +221,7 @@ class FakeQuantAbsMax(layers.Layer): ...@@ -210,7 +221,7 @@ class FakeQuantAbsMax(layers.Layer):
return quant_out return quant_out
class FakeChannelWiseQuantDequantAbsMax(layers.Layer): class FakeQuantChannelWiseAbsMax(layers.Layer):
def __init__(self, def __init__(self,
name=None, name=None,
channel_num=None, channel_num=None,
...@@ -219,7 +230,7 @@ class FakeChannelWiseQuantDequantAbsMax(layers.Layer): ...@@ -219,7 +230,7 @@ class FakeChannelWiseQuantDequantAbsMax(layers.Layer):
dtype='float32', dtype='float32',
quant_on_weight=False): quant_on_weight=False):
assert quant_on_weight == True, "Channel_wise only can be used on weight quantization." assert quant_on_weight == True, "Channel_wise only can be used on weight quantization."
super(FakeChannelWiseQuantDequantAbsMax, self).__init__() super(FakeQuantChannelWiseAbsMax, self).__init__()
self._quant_bits = quant_bits self._quant_bits = quant_bits
self._quant_axis = quant_axis self._quant_axis = quant_axis
self._dtype = dtype self._dtype = dtype
...@@ -265,7 +276,7 @@ class FakeChannelWiseQuantDequantAbsMax(layers.Layer): ...@@ -265,7 +276,7 @@ class FakeChannelWiseQuantDequantAbsMax(layers.Layer):
return out return out
check_variable_and_dtype(input, 'input', ['float32'], check_variable_and_dtype(input, 'input', ['float32'],
"FakeChannelWiseQuantDequantAbsMax") "FakeQuantChannelWiseAbsMax")
attrs = {'bit_length': self._quant_bits, 'quant_axis': self._quant_axis} attrs = {'bit_length': self._quant_bits, 'quant_axis': self._quant_axis}
inputs = {"X": [input]} inputs = {"X": [input]}
quant_out = self._helper.create_variable( quant_out = self._helper.create_variable(
...@@ -313,8 +324,8 @@ def _get_fake_quant_type(quant_type, **kwargs): ...@@ -313,8 +324,8 @@ def _get_fake_quant_type(quant_type, **kwargs):
"when you use channel_wise_abs_max strategy.") "when you use channel_wise_abs_max strategy.")
fake_quant_map = { fake_quant_map = {
'abs_max': FakeQuantAbsMax, 'abs_max': FakeQuantAbsMax,
'moving_average_abs_max': FakeQuantMovingAverage, 'moving_average_abs_max': FakeQuantMovingAverageAbsMax,
'channel_wise_abs_max': FakeChannelWiseQuantDequantAbsMax 'channel_wise_abs_max': FakeQuantChannelWiseAbsMax
} }
return fake_quant_map[quant_type](**call_args) return fake_quant_map[quant_type](**call_args)
...@@ -498,12 +509,7 @@ class QuantizedNoweightLayer(layers.Layer): ...@@ -498,12 +509,7 @@ class QuantizedNoweightLayer(layers.Layer):
quant_on_weight=False) quant_on_weight=False)
def forward(self, input): def forward(self, input):
quant_input = self._fake_quant_input(input) return self._layer.forward(self._fake_quant_input(input))
# TODO (jc): support ops that have several inputs
if isinstance(input, list):
assert len(input) == 1, \
"The QuantizedNoweightLayer should only have one input."
return self._layer.forward(quant_input)
class MovingAverageAbsMaxScale(layers.Layer): class MovingAverageAbsMaxScale(layers.Layer):
...@@ -590,19 +596,56 @@ class MovingAverageAbsMaxScale(layers.Layer): ...@@ -590,19 +596,56 @@ class MovingAverageAbsMaxScale(layers.Layer):
return quant_out return quant_out
class QuantizedOutputLayer(layers.Layer): class MAOutputScaleLayer(layers.Layer):
def __init__(self, layer=None, moving_rate=0.9, dtype='float32'): """
Calculate the scale (moving average abs max) for the output of the input layer.
Add MovingAverageMaxScale layer to the behind of the input layer.
"""
def __init__(self, layer=None, moving_rate=0.9, name=None, dtype='float32'):
r""" r"""
Add MovingAverageMaxScale layer to the behind of the input layer. Construct
""" """
super(QuantizedOutputLayer, self).__init__() super(MAOutputScaleLayer, self).__init__()
self._layer = layer self._layer = layer
self._moving_average_abs_max_scale = \ if name is None:
MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype) name = layer.full_name()
self._ma_output_scale = \
MovingAverageAbsMaxScale(name, moving_rate, dtype)
def forward(self, *inputs, **kwargs):
out = self._layer(*inputs, **kwargs)
# TODO (jc): support the ops of several outputs
if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
return out
else:
return self._ma_output_scale(out)
def forward(self, input):
if isinstance(input, list): class FakeQuantMAOutputScaleLayer(layers.Layer):
assert len(input) == 1, \ def __init__(self,
"The QuantizedOutputLayer should only have one input." layer,
out = self._layer(input) weight_bits=8,
return self._moving_average_abs_max_scale(out) activation_bits=8,
moving_rate=0.9,
name=None,
*args,
**kwargs):
super(FakeQuantMAOutputScaleLayer, self).__init__()
self._layer = layer
self._fake_quant_output = _get_fake_quant_type(
'moving_average_abs_max',
name=layer.full_name() if name is None else name,
moving_rate=moving_rate,
quant_bits=activation_bits,
dtype=self._dtype,
quant_on_weight=False)
def forward(self, *inputs, **kwargs):
out = self._layer(*inputs, **kwargs)
# TODO (jc): support the ops of several outputs
if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
return out
else:
return self._fake_quant_output(out)
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
# limitations under the License. # limitations under the License.
import paddle import paddle
from paddle.fluid import dygraph
import numpy as np import numpy as np
from . import quant_nn
quant_input_layers_map = { layer_name_map = {
'Conv2D': paddle.nn.Conv2D, 'Conv2D': paddle.nn.Conv2D,
'Linear': paddle.nn.Linear, 'Linear': paddle.nn.Linear,
'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D, 'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D,
...@@ -37,30 +39,38 @@ quant_input_layers_map = { ...@@ -37,30 +39,38 @@ quant_input_layers_map = {
'LayerNorm': paddle.nn.LayerNorm, 'LayerNorm': paddle.nn.LayerNorm,
} }
fake_quantize_dequantize_types = [ # Apply fake quant for the inputs of these layers
"fake_quantize_dequantize_abs_max", # TODO (jc): support paddle.nn.Conv2DTranspose
"fake_channel_wise_quantize_dequantize_abs_max", fake_quant_input_layers = [paddle.nn.Conv2D, paddle.nn.Linear]
"fake_quantize_dequantize_moving_average_abs_max"
# Apply fake quant for the output of these layers
# TODO(jc): fix the problem of adding duplicate fake_quant ops
# paddle.nn.AdaptiveAvgPool2D, paddle.nn.AvgPool2D, paddle.nn.ReLU,paddle.nn.LeakyReLU
fake_quant_output_layers = [
paddle.nn.quant.add, paddle.nn.quant.subtract, paddle.nn.quant.multiply,
paddle.nn.quant.divide
]
fake_quant_leaf_layers = [
quant_nn.FakeQuantAbsMax,
quant_nn.FakeQuantChannelWiseAbsMax,
quant_nn.FakeQuantMovingAverageAbsMax,
quant_nn.MovingAverageAbsMaxScale,
] ]
quant_output_layers = ( fake_quant_wrap_layers = [quant_nn.QuantizedConv2D, quant_nn.QuantizedLinear]
paddle.nn.Conv2D, paddle.nn.Conv2DTranspose, paddle.nn.Linear,
paddle.nn.AdaptiveAvgPool2D, paddle.nn.AdaptiveMaxPool2D,
paddle.nn.AvgPool2D, paddle.nn.MaxPool2D, paddle.nn.BatchNorm,
paddle.nn.BatchNorm2D, paddle.nn.LayerNorm, paddle.nn.SyncBatchNorm,
paddle.nn.ELU, paddle.nn.GELU, paddle.nn.Hardshrink, paddle.nn.Hardsigmoid,
paddle.nn.Hardswish, paddle.nn.Hardtanh, paddle.nn.LeakyReLU,
paddle.nn.LogSigmoid, paddle.nn.LogSoftmax, paddle.nn.Maxout,
paddle.nn.PReLU, paddle.nn.ReLU, paddle.nn.ReLU6, paddle.nn.SELU,
paddle.nn.Sigmoid, paddle.nn.Softmax, paddle.nn.Softplus,
paddle.nn.Softshrink, paddle.nn.Softsign, paddle.nn.Swish, paddle.nn.Tanh,
paddle.nn.Tanhshrink, paddle.nn.ThresholdedReLU, paddle.nn.Upsample)
weight_op_types = [ weight_op_types = [
"conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose", "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose",
"depthwise_conv2d_transpose" "depthwise_conv2d_transpose"
] ]
fake_quantize_dequantize_op_types = [
"fake_quantize_dequantize_abs_max",
"fake_channel_wise_quantize_dequantize_abs_max",
"fake_quantize_dequantize_moving_average_abs_max"
]
def load_variable_data(scope, var_name): def load_variable_data(scope, var_name):
''' '''
...@@ -90,3 +100,36 @@ def find_next_ops(block, var_name): ...@@ -90,3 +100,36 @@ def find_next_ops(block, var_name):
if var_name in op.input_arg_names: if var_name in op.input_arg_names:
res_ops.append(op) res_ops.append(op)
return res_ops return res_ops
def find_parent_layer_and_sub_name(model, name):
"""
Given the model and the name of a layer, find the parent layer and
the sub_name of the layer.
For example, if name is 'block_1/convbn_1/conv_1', the parent layer is
'block_1/convbn_1' and the sub_name is `conv_1`.
"""
assert isinstance(model, dygraph.Layer), \
"The model must be the instance of paddle.nn.Layer."
assert len(name) > 0, "The input (name) should not be empty."
last_idx = 0
idx = 0
parent_layer = model
while idx < len(name):
if name[idx] == '.':
sub_name = name[last_idx:idx]
if hasattr(parent_layer, sub_name):
parent_layer = getattr(parent_layer, sub_name)
last_idx = idx + 1
idx += 1
sub_name = name[last_idx:idx]
return parent_layer, sub_name
def is_leaf_layer(layer):
"""
Whether the layer is leaf layer.
"""
return isinstance(layer, dygraph.Layer) \
and len(layer.sublayers()) == 0
...@@ -270,12 +270,6 @@ list(REMOVE_ITEM TEST_OPS ...@@ -270,12 +270,6 @@ list(REMOVE_ITEM TEST_OPS
#TODO(wanghaoshuang): Fix this unitest failed on GCC8. #TODO(wanghaoshuang): Fix this unitest failed on GCC8.
LIST(REMOVE_ITEM TEST_OPS test_auto_pruning) LIST(REMOVE_ITEM TEST_OPS test_auto_pruning)
LIST(REMOVE_ITEM TEST_OPS test_filter_pruning) LIST(REMOVE_ITEM TEST_OPS test_filter_pruning)
# only tests on singal GPU environment
LIST(REMOVE_ITEM TEST_OPS test_imperative_qat_addquantdequant)
py_test_modules(test_imperative_qat_addquantdequant MODULES test_imperative_qat_addquantdequant ENVS
CUDA_VISIBLE_DEVICES=0)
# fix # fix
if(WIN32) if(WIN32)
...@@ -313,7 +307,6 @@ set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120) ...@@ -313,7 +307,6 @@ set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 120)
set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 120) set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_qat_addquantdequant PROPERTIES TIMEOUT 120)
set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 120)
if(LINUX AND WITH_MKLDNN) if(LINUX AND WITH_MKLDNN)
set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120) set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120)
......
# copyright (c) 2021 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import numpy as np
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.dygraph.container import Sequential
from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
from paddle.fluid.log_helper import get_logger
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
def fix_model_dict(model):
fixed_state = {}
for name, param in model.named_parameters():
p_shape = param.numpy().shape
p_value = param.numpy()
if name.endswith("bias"):
value = np.zeros_like(p_value).astype('float32')
else:
value = np.random.normal(
loc=0.0, scale=0.01,
size=np.product(p_shape)).reshape(p_shape).astype('float32')
fixed_state[name] = value
model.set_dict(fixed_state)
return model
def train_lenet(lenet, reader, optimizer):
loss_list = []
lenet.train()
for batch_id, data in enumerate(reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
img = paddle.to_tensor(x_data)
label = paddle.to_tensor(y_data)
out = lenet(img)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
optimizer.minimize(avg_loss)
lenet.clear_gradients()
if batch_id % 100 == 0:
loss_list.append(avg_loss.numpy()[0])
_logger.info('{}: {}'.format('loss', avg_loss.numpy()))
return loss_list
class ImperativeLenet(fluid.dygraph.Layer):
def __init__(self, num_classes=10):
super(ImperativeLenet, self).__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
self.features = Sequential(
Conv2D(
in_channels=1,
out_channels=6,
kernel_size=3,
stride=1,
padding=1,
weight_attr=conv2d_w1_attr,
bias_attr=False),
BatchNorm2D(6),
ReLU(),
MaxPool2D(
kernel_size=2, stride=2),
Conv2D(
in_channels=6,
out_channels=16,
kernel_size=5,
stride=1,
padding=0,
weight_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr),
BatchNorm2D(16),
PReLU(),
MaxPool2D(
kernel_size=2, stride=2))
self.fc = Sequential(
Linear(
in_features=400,
out_features=120,
weight_attr=fc_w1_attr,
bias_attr=fc_b1_attr),
LeakyReLU(),
Linear(
in_features=120,
out_features=84,
weight_attr=fc_w2_attr,
bias_attr=fc_b2_attr),
Sigmoid(),
Linear(
in_features=84,
out_features=num_classes,
weight_attr=fc_w3_attr,
bias_attr=fc_b3_attr),
Softmax())
self.add = paddle.nn.quant.add()
def forward(self, inputs):
x = self.features(inputs)
x = fluid.layers.flatten(x, 1)
x = self.add(x, paddle.to_tensor(0.0)) # For CI
x = self.fc(x)
return x
class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
def __init__(self, num_classes=10):
super(ImperativeLenetWithSkipQuant, self).__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
self.conv2d_0 = Conv2D(
in_channels=1,
out_channels=6,
kernel_size=3,
stride=1,
padding=1,
weight_attr=conv2d_w1_attr,
bias_attr=conv2d_b1_attr)
self.conv2d_0.skip_quant = True
self.batch_norm_0 = BatchNorm2D(6)
self.relu_0 = ReLU()
self.pool2d_0 = MaxPool2D(kernel_size=2, stride=2)
self.conv2d_1 = Conv2D(
in_channels=6,
out_channels=16,
kernel_size=5,
stride=1,
padding=0,
weight_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr)
self.conv2d_1.skip_quant = False
self.batch_norm_1 = BatchNorm2D(16)
self.relu6_0 = ReLU6()
self.pool2d_1 = MaxPool2D(kernel_size=2, stride=2)
self.linear_0 = Linear(
in_features=400,
out_features=120,
weight_attr=fc_w1_attr,
bias_attr=fc_b1_attr)
self.linear_0.skip_quant = True
self.leaky_relu_0 = LeakyReLU()
self.linear_1 = Linear(
in_features=120,
out_features=84,
weight_attr=fc_w2_attr,
bias_attr=fc_b2_attr)
self.linear_1.skip_quant = False
self.sigmoid_0 = Sigmoid()
self.linear_2 = Linear(
in_features=84,
out_features=num_classes,
weight_attr=fc_w3_attr,
bias_attr=fc_b3_attr)
self.linear_2.skip_quant = False
self.softmax_0 = Softmax()
def forward(self, inputs):
x = self.conv2d_0(inputs)
x = self.batch_norm_0(x)
x = self.relu_0(x)
x = self.pool2d_0(x)
x = self.conv2d_1(x)
x = self.batch_norm_1(x)
x = self.relu6_0(x)
x = self.pool2d_1(x)
x = fluid.layers.flatten(x, 1)
x = self.linear_0(x)
x = self.leaky_relu_0(x)
x = self.linear_1(x)
x = self.sigmoid_0(x)
x = self.linear_2(x)
x = self.softmax_0(x)
return x
...@@ -28,7 +28,6 @@ from paddle.fluid import core ...@@ -28,7 +28,6 @@ from paddle.fluid import core
from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.framework import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass, OutScaleForInferencePass, QuantizationTransformPass
from paddle.fluid.dygraph.container import Sequential from paddle.fluid.dygraph.container import Sequential
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, PReLU from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, PReLU
...@@ -36,6 +35,8 @@ from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D ...@@ -36,6 +35,8 @@ from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
from paddle.fluid.log_helper import get_logger from paddle.fluid.log_helper import get_logger
from paddle.fluid.dygraph import nn from paddle.fluid.dygraph import nn
from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenet
paddle.enable_static() paddle.enable_static()
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
...@@ -54,59 +55,6 @@ def get_vaild_warning_num(warning, w): ...@@ -54,59 +55,6 @@ def get_vaild_warning_num(warning, w):
return num return num
def StaticLenet(data, num_classes=10, classifier_activation='softmax'):
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
conv1 = fluid.layers.conv2d(
data,
num_filters=6,
filter_size=3,
stride=1,
padding=1,
param_attr=conv2d_w1_attr,
bias_attr=False)
batch_norm1 = layers.batch_norm(conv1)
relu1 = layers.relu(batch_norm1)
pool1 = fluid.layers.pool2d(
relu1, pool_size=2, pool_type='max', pool_stride=2)
conv2 = fluid.layers.conv2d(
pool1,
num_filters=16,
filter_size=5,
stride=1,
padding=0,
param_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr)
batch_norm2 = layers.batch_norm(conv2)
prelu1 = layers.prelu(batch_norm2, mode='all')
pool2 = fluid.layers.pool2d(
prelu1, pool_size=2, pool_type='max', pool_stride=2)
fc1 = fluid.layers.fc(input=pool2,
size=120,
param_attr=fc_w1_attr,
bias_attr=fc_b1_attr)
leaky_relu1 = layers.leaky_relu(fc1, alpha=0.01)
fc2 = fluid.layers.fc(input=leaky_relu1,
size=84,
param_attr=fc_w2_attr,
bias_attr=fc_b2_attr)
sigmoid1 = layers.sigmoid(fc2)
fc3 = fluid.layers.fc(input=sigmoid1,
size=num_classes,
param_attr=fc_w3_attr,
bias_attr=fc_b3_attr)
softmax1 = layers.softmax(fc3, use_cudnn=True)
return softmax1
class ImperativeLenet(fluid.dygraph.Layer): class ImperativeLenet(fluid.dygraph.Layer):
def __init__(self, num_classes=10): def __init__(self, num_classes=10):
super(ImperativeLenet, self).__init__() super(ImperativeLenet, self).__init__()
...@@ -175,38 +123,11 @@ class ImperativeLenet(fluid.dygraph.Layer): ...@@ -175,38 +123,11 @@ class ImperativeLenet(fluid.dygraph.Layer):
class TestImperativeOutSclae(unittest.TestCase): class TestImperativeOutSclae(unittest.TestCase):
def test_out_scale_acc(self): def test_out_scale_acc(self):
def _build_static_lenet(main, startup, is_test=False, seed=1000):
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
main.random_seed = seed
startup.random_seed = seed
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
prediction = StaticLenet(img)
if not is_test:
loss = fluid.layers.cross_entropy(
input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
else:
avg_loss = prediction
return img, label, avg_loss
reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
weight_quantize_type = 'abs_max'
activation_quantize_type = 'moving_average_abs_max'
param_init_map = {}
seed = 1000 seed = 1000
lr = 0.001 lr = 0.001
dynamic_out_scale_list = []
static_out_scale_list = []
# imperative train weight_quantize_type = 'abs_max'
_logger.info( activation_quantize_type = 'moving_average_abs_max'
"--------------------------dynamic graph qat--------------------------"
)
imperative_out_scale = ImperativeQuantAware( imperative_out_scale = ImperativeQuantAware(
weight_quantize_type=weight_quantize_type, weight_quantize_type=weight_quantize_type,
activation_quantize_type=activation_quantize_type) activation_quantize_type=activation_quantize_type)
...@@ -215,207 +136,46 @@ class TestImperativeOutSclae(unittest.TestCase): ...@@ -215,207 +136,46 @@ class TestImperativeOutSclae(unittest.TestCase):
np.random.seed(seed) np.random.seed(seed)
fluid.default_main_program().random_seed = seed fluid.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed fluid.default_startup_program().random_seed = seed
lenet = ImperativeLenet() lenet = ImperativeLenet()
fixed_state = {} lenet = fix_model_dict(lenet)
for name, param in lenet.named_parameters():
p_shape = param.numpy().shape
p_value = param.numpy()
if name.endswith("bias"):
value = np.zeros_like(p_value).astype('float32')
else:
value = np.random.normal(
loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
p_shape).astype('float32')
fixed_state[name] = value
param_init_map[param.name] = value
lenet.set_dict(fixed_state)
imperative_out_scale.quantize(lenet) imperative_out_scale.quantize(lenet)
reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
adam = AdamOptimizer( adam = AdamOptimizer(
learning_rate=lr, parameter_list=lenet.parameters()) learning_rate=lr, parameter_list=lenet.parameters())
dynamic_loss_rec = [] loss_list = train_lenet(lenet, reader, adam)
lenet.train()
for batch_id, data in enumerate(reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
lenet.clear_gradients()
dynamic_loss_rec.append(avg_loss.numpy()[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', avg_loss.numpy()))
lenet.eval() lenet.eval()
param_save_path = "test_save_quantized_model/lenet.pdparams" param_save_path = "test_save_quantized_model/lenet.pdparams"
save_dict = lenet.state_dict() save_dict = lenet.state_dict()
paddle.save(save_dict, param_save_path) paddle.save(save_dict, param_save_path)
path = "./dynamic_outscale_infer_model/lenet" save_path = "./dynamic_outscale_infer_model/lenet"
dynamic_save_dir = "./dynamic_outscale_infer_model"
imperative_out_scale.save_quantized_model( imperative_out_scale.save_quantized_model(
layer=lenet, layer=lenet,
path=path, path=save_path,
input_spec=[ input_spec=[
paddle.static.InputSpec( paddle.static.InputSpec(
shape=[None, 1, 28, 28], dtype='float32') shape=[None, 1, 28, 28], dtype='float32')
]) ])
_logger.info( for i in range(len(loss_list) - 1):
"--------------------------static graph qat--------------------------" self.assertTrue(
) loss_list[i] > loss_list[i + 1],
static_loss_rec = [] msg='Failed to do the imperative qat.')
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = fluid.Executor(place)
main = fluid.Program()
infer = fluid.Program()
startup = fluid.Program()
static_img, static_label, static_loss = _build_static_lenet(
main, startup, False, seed)
infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
seed)
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
opt = AdamOptimizer(learning_rate=lr)
opt.minimize(static_loss)
scope = core.Scope()
with fluid.scope_guard(scope):
exe.run(startup)
for param in main.all_parameters():
if "batch_norm" in param.name:
param_name = param.name.replace("norm", "norm2d")
elif 'prelu' in param.name:
param_name = param.name.replace("prelu", 'p_re_lu')
else:
param_name = param.name
param_tensor = scope.var(param.name).get_tensor()
param_tensor.set(param_init_map[param_name], place)
main_graph = IrGraph(core.Graph(main.desc), for_test=False)
infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
transform_pass = QuantizationTransformPass(
scope=scope,
place=place,
activation_quantize_type=activation_quantize_type,
weight_quantize_type=weight_quantize_type,
quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
transform_pass.apply(main_graph)
transform_pass.apply(infer_graph)
outscale_pass = OutScaleForTrainingPass(scope=scope, place=place)
outscale_pass.apply(main_graph)
build_strategy = fluid.BuildStrategy()
build_strategy.fuse_all_reduce_ops = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=static_loss.name, build_strategy=build_strategy)
feeder = fluid.DataFeeder(
feed_list=[static_img, static_label], place=place)
with fluid.scope_guard(scope):
for batch_id, data in enumerate(reader()):
loss_v, = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[static_loss])
static_loss_rec.append(loss_v[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', loss_v))
scale_inference_pass = OutScaleForInferencePass(scope=scope)
scale_inference_pass.apply(infer_graph)
save_program = infer_graph.to_program()
static_save_dir = "./static_outscale_infer_model"
with fluid.scope_guard(scope):
fluid.io.save_inference_model(
dirname=static_save_dir,
feeded_var_names=[infer_img.name],
target_vars=[infer_pre],
executor=exe,
main_program=save_program,
model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX)
rtol = 1e-05
atol = 1e-08
for i, (loss_d,
loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
diff = np.abs(loss_d - loss_s)
if diff > (atol + rtol * np.abs(loss_s)):
_logger.info(
"diff({}) at {}, dynamic loss = {}, static loss = {}".
format(diff, i, loss_d, loss_s))
break
self.assertTrue(
np.allclose(
np.array(dynamic_loss_rec),
np.array(static_loss_rec),
rtol=rtol,
atol=atol,
equal_nan=True),
msg='Failed to do the imperative qat.')
# load dynamic model
[dynamic_inference_program, feed_target_names, fetch_targets] = (
fluid.io.load_inference_model(
dirname=dynamic_save_dir,
executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX))
# load static model
[static_inference_program, feed_target_names, fetch_targets] = (
fluid.io.load_inference_model(
dirname=static_save_dir,
executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX))
dynamic_ops = dynamic_inference_program.global_block().ops
static_ops = static_inference_program.global_block().ops
for op in dynamic_ops[:]:
if op.type == "flatten2" or 'fake' in op.type:
dynamic_ops.remove(op)
for op in static_ops[:]:
if 'fake' in op.type:
static_ops.remove(op)
op_count = 0
for i in range(len(dynamic_ops)):
if dynamic_ops[i].has_attr("out_threshold"):
op_count += 1
self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
if dynamic_ops[i].attr("out_threshold") != static_ops[i].attr(
"out_threshold"):
_logger.info(dynamic_ops[i].attr("out_threshold"))
_logger.info(static_ops[i].attr("out_threshold"))
self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
static_ops[i].attr("out_threshold"))
_logger.info("op_cout: {}".format(op_count))
self.assertTrue(op_count == 14)
class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase):
def test_save_quantized_model(self): def test_save_quantized_model(self):
weight_quantize_type = 'abs_max' lr = 0.001
activation_quantize_type = 'moving_average_abs_max'
load_param_path = "test_save_quantized_model/lenet.pdparams" load_param_path = "test_save_quantized_model/lenet.pdparams"
path = "./dynamic_outscale_infer_model_from_checkpoint/lenet" save_path = "./dynamic_outscale_infer_model_from_checkpoint/lenet"
dynamic_model_save_dir = "./dynamic_outscale_infer_model_from_checkpoint"
static_model_save_dir = "./static_outscale_infer_model"
weight_quantize_type = 'abs_max'
activation_quantize_type = 'moving_average_abs_max'
imperative_out_scale = ImperativeQuantAware( imperative_out_scale = ImperativeQuantAware(
weight_quantize_type=weight_quantize_type, weight_quantize_type=weight_quantize_type,
activation_quantize_type=activation_quantize_type) activation_quantize_type=activation_quantize_type)
...@@ -426,56 +186,25 @@ class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): ...@@ -426,56 +186,25 @@ class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase):
imperative_out_scale.quantize(lenet) imperative_out_scale.quantize(lenet)
lenet.set_dict(load_dict) lenet.set_dict(load_dict)
reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
adam = AdamOptimizer(
learning_rate=lr, parameter_list=lenet.parameters())
loss_list = train_lenet(lenet, reader, adam)
lenet.eval()
imperative_out_scale.save_quantized_model( imperative_out_scale.save_quantized_model(
layer=lenet, layer=lenet,
path=path, path=save_path,
input_spec=[ input_spec=[
paddle.static.InputSpec( paddle.static.InputSpec(
shape=[None, 1, 28, 28], dtype='float32') shape=[None, 1, 28, 28], dtype='float32')
]) ])
if core.is_compiled_with_cuda(): for i in range(len(loss_list) - 1):
place = core.CUDAPlace(0) self.assertTrue(
else: loss_list[i] > loss_list[i + 1],
place = core.CPUPlace() msg='Failed to do the imperative qat.')
exe = fluid.Executor(place)
# load dynamic model
[dynamic_inference_program, feed_target_names, fetch_targets] = (
fluid.io.load_inference_model(
dirname=dynamic_model_save_dir,
executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX))
# load static model
[static_inference_program, feed_target_names, fetch_targets] = (
fluid.io.load_inference_model(
dirname=static_model_save_dir,
executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX))
dynamic_ops = dynamic_inference_program.global_block().ops
static_ops = static_inference_program.global_block().ops
for op in dynamic_ops[:]:
if op.type == "flatten2" or 'fake' in op.type:
dynamic_ops.remove(op)
for op in static_ops[:]:
if 'fake' in op.type:
static_ops.remove(op)
op_count = 0
for i in range(len(dynamic_ops)):
if dynamic_ops[i].has_attr("out_threshold"):
op_count += 1
self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
static_ops[i].attr("out_threshold"))
_logger.info("op_cout: {}".format(op_count))
self.assertTrue(op_count == 14)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -21,20 +21,20 @@ import shutil ...@@ -21,20 +21,20 @@ import shutil
import time import time
import unittest import unittest
import logging import logging
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.dygraph.container import Sequential from paddle.fluid.dygraph.container import Sequential
from paddle.nn import Linear, Conv2D, Softmax from paddle.nn import Linear, Conv2D, Softmax
from paddle.fluid.dygraph.nn import Pool2D
from paddle.fluid.log_helper import get_logger from paddle.fluid.log_helper import get_logger
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.fluid.contrib.slim.quantization.imperative.quant_nn import QuantizedConv2D from paddle.fluid.contrib.slim.quantization.imperative.quant_nn import QuantizedConv2D
from imperative_test_utils import fix_model_dict, ImperativeLenet
paddle.enable_static() paddle.enable_static()
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
...@@ -45,115 +45,6 @@ _logger = get_logger( ...@@ -45,115 +45,6 @@ _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
def StaticLenet(data, num_classes=10):
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
conv1 = fluid.layers.conv2d(
data,
num_filters=6,
filter_size=3,
stride=1,
padding=1,
param_attr=conv2d_w1_attr,
bias_attr=conv2d_b1_attr)
pool1 = fluid.layers.pool2d(
conv1, pool_size=2, pool_type='max', pool_stride=2)
conv2 = fluid.layers.conv2d(
pool1,
num_filters=16,
filter_size=5,
stride=1,
padding=0,
param_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr)
pool2 = fluid.layers.pool2d(
conv2, pool_size=2, pool_type='max', pool_stride=2)
fc1 = fluid.layers.fc(input=pool2,
size=120,
param_attr=fc_w1_attr,
bias_attr=fc_b1_attr)
fc2 = fluid.layers.fc(input=fc1,
size=84,
param_attr=fc_w2_attr,
bias_attr=fc_b2_attr)
fc3 = fluid.layers.fc(input=fc2,
size=num_classes,
param_attr=fc_w3_attr,
bias_attr=fc_b3_attr)
fc4 = fluid.layers.softmax(fc3, use_cudnn=True)
return fc4
class ImperativeLenet(fluid.dygraph.Layer):
def __init__(self, num_classes=10):
super(ImperativeLenet, self).__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
self.features = Sequential(
Conv2D(
in_channels=1,
out_channels=6,
kernel_size=3,
stride=1,
padding=1,
weight_attr=conv2d_w1_attr,
bias_attr=conv2d_b1_attr),
Pool2D(
pool_size=2, pool_type='max', pool_stride=2),
Conv2D(
in_channels=6,
out_channels=16,
kernel_size=5,
stride=1,
padding=0,
weight_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr),
Pool2D(
pool_size=2, pool_type='max', pool_stride=2))
self.fc = Sequential(
Linear(
in_features=400,
out_features=120,
weight_attr=fc_w1_attr,
bias_attr=fc_b1_attr),
Linear(
in_features=120,
out_features=84,
weight_attr=fc_w2_attr,
bias_attr=fc_b2_attr),
Linear(
in_features=84,
out_features=num_classes,
weight_attr=fc_w3_attr,
bias_attr=fc_b3_attr),
Softmax())
def forward(self, inputs):
x = self.features(inputs)
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
class TestImperativeQat(unittest.TestCase): class TestImperativeQat(unittest.TestCase):
""" """
QAT = quantization-aware training QAT = quantization-aware training
...@@ -164,19 +55,26 @@ class TestImperativeQat(unittest.TestCase): ...@@ -164,19 +55,26 @@ class TestImperativeQat(unittest.TestCase):
timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
cls.root_path = os.path.join(os.getcwd(), "imperative_qat_" + timestamp) cls.root_path = os.path.join(os.getcwd(), "imperative_qat_" + timestamp)
cls.save_path = os.path.join(cls.root_path, "lenet") cls.save_path = os.path.join(cls.root_path, "lenet")
cls.dynamic_root_path = os.path.join(os.getcwd(),
"dynamic_mnist_" + timestamp)
cls.dynamic_save_path = os.path.join(cls.dynamic_root_path, "model")
@classmethod @classmethod
def tearDownClass(cls): def tearDownClass(cls):
shutil.rmtree(cls.root_path) try:
shutil.rmtree(cls.dynamic_root_path) shutil.rmtree(cls.root_path)
except Exception as e:
print("Failed to delete {} due to {}".format(cls.root_path, str(e)))
def set_vars(self):
self.weight_quantize_type = None
self.activation_quantize_type = None
print('weight_quantize_type', self.weight_quantize_type)
def run_qat_save(self):
self.set_vars()
def test_qat_save(self):
imperative_qat = ImperativeQuantAware( imperative_qat = ImperativeQuantAware(
weight_quantize_type='abs_max', weight_quantize_type=self.weight_quantize_type,
activation_quantize_type='moving_average_abs_max') activation_quantize_type=self.activation_quantize_type)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
# For CI coverage # For CI coverage
conv1 = Conv2D( conv1 = Conv2D(
...@@ -190,10 +88,17 @@ class TestImperativeQat(unittest.TestCase): ...@@ -190,10 +88,17 @@ class TestImperativeQat(unittest.TestCase):
data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
quant_conv1(fluid.dygraph.to_variable(data)) quant_conv1(fluid.dygraph.to_variable(data))
seed = 1
np.random.seed(seed)
fluid.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed
lenet = ImperativeLenet() lenet = ImperativeLenet()
lenet = fix_model_dict(lenet)
imperative_qat.quantize(lenet) imperative_qat.quantize(lenet)
adam = AdamOptimizer( adam = AdamOptimizer(
learning_rate=0.001, parameter_list=lenet.parameters()) learning_rate=0.001, parameter_list=lenet.parameters())
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=32, drop_last=True) paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
test_reader = paddle.batch( test_reader = paddle.batch(
...@@ -226,6 +131,7 @@ class TestImperativeQat(unittest.TestCase): ...@@ -226,6 +131,7 @@ class TestImperativeQat(unittest.TestCase):
break break
lenet.eval() lenet.eval()
eval_acc_top1_list = []
for batch_id, data in enumerate(test_reader()): for batch_id, data in enumerate(test_reader()):
x_data = np.array([x[0].reshape(1, 28, 28) x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32') for x in data]).astype('float32')
...@@ -242,14 +148,19 @@ class TestImperativeQat(unittest.TestCase): ...@@ -242,14 +148,19 @@ class TestImperativeQat(unittest.TestCase):
input=out, label=label, k=5) input=out, label=label, k=5)
if batch_id % 100 == 0: if batch_id % 100 == 0:
eval_acc_top1_list.append(float(acc_top1.numpy()))
_logger.info( _logger.info(
"Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
format(epoch, batch_id, format(epoch, batch_id,
acc_top1.numpy(), acc_top5.numpy())) acc_top1.numpy(), acc_top5.numpy()))
# save weights # check eval acc
model_dict = lenet.state_dict() eval_acc_top1 = sum(eval_acc_top1_list) / len(
fluid.save_dygraph(model_dict, "save_temp") eval_acc_top1_list)
print('eval_acc_top1', eval_acc_top1)
self.assertTrue(
eval_acc_top1 > 0.9,
msg="The test acc {%f} is less than 0.9." % eval_acc_top1)
# test the correctness of `paddle.jit.save` # test the correctness of `paddle.jit.save`
data = next(test_reader()) data = next(test_reader())
...@@ -260,13 +171,14 @@ class TestImperativeQat(unittest.TestCase): ...@@ -260,13 +171,14 @@ class TestImperativeQat(unittest.TestCase):
before_save = lenet(test_img) before_save = lenet(test_img)
# save inference quantized model # save inference quantized model
paddle.jit.save( imperative_qat.save_quantized_model(
layer=lenet, layer=lenet,
path=TestImperativeQat.save_path, path=self.save_path,
input_spec=[ input_spec=[
paddle.static.InputSpec( paddle.static.InputSpec(
shape=[None, 1, 28, 28], dtype='float32') shape=[None, 1, 28, 28], dtype='float32')
]) ])
print('Quantized model saved in {%s}' % self.save_path)
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
...@@ -275,183 +187,27 @@ class TestImperativeQat(unittest.TestCase): ...@@ -275,183 +187,27 @@ class TestImperativeQat(unittest.TestCase):
exe = fluid.Executor(place) exe = fluid.Executor(place)
[inference_program, feed_target_names, [inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model( fetch_targets] = fluid.io.load_inference_model(
dirname=TestImperativeQat.root_path, dirname=self.root_path,
executor=exe, executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX, model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX) params_filename="lenet" + INFER_PARAMS_SUFFIX)
after_save, = exe.run(inference_program, after_save, = exe.run(inference_program,
feed={feed_target_names[0]: test_data}, feed={feed_target_names[0]: test_data},
fetch_list=fetch_targets) fetch_list=fetch_targets)
# check
self.assertTrue( self.assertTrue(
np.allclose(after_save, before_save.numpy()), np.allclose(after_save, before_save.numpy()),
msg='Failed to save the inference quantized model.') msg='Failed to save the inference quantized model.')
def test_qat_acc(self):
def _build_static_lenet(main, startup, is_test=False, seed=1000):
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
main.random_seed = seed
startup.random_seed = seed
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
prediction = StaticLenet(img)
if not is_test:
loss = fluid.layers.cross_entropy(
input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
else:
avg_loss = prediction
return img, label, avg_loss
reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
weight_quantize_type = 'abs_max'
activation_quant_type = 'moving_average_abs_max'
param_init_map = {}
seed = 1000
lr = 0.01
# imperative train
_logger.info(
"--------------------------dynamic graph qat--------------------------"
)
imperative_qat = ImperativeQuantAware(
weight_quantize_type=weight_quantize_type,
activation_quantize_type=activation_quant_type)
with fluid.dygraph.guard(): class TestImperativeQatAbsMax(TestImperativeQat):
np.random.seed(seed) def set_vars(self):
fluid.default_main_program().random_seed = seed self.weight_quantize_type = 'abs_max'
fluid.default_startup_program().random_seed = seed self.activation_quantize_type = 'moving_average_abs_max'
lenet = ImperativeLenet() print('weight_quantize_type', self.weight_quantize_type)
fixed_state = {}
for name, param in lenet.named_parameters():
p_shape = param.numpy().shape
p_value = param.numpy()
if name.endswith("bias"):
value = np.zeros_like(p_value).astype('float32')
else:
value = np.random.normal(
loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
p_shape).astype('float32')
fixed_state[name] = value
param_init_map[param.name] = value
lenet.set_dict(fixed_state)
imperative_qat.quantize(lenet) def test_qat(self):
adam = AdamOptimizer( self.run_qat_save()
learning_rate=lr, parameter_list=lenet.parameters())
dynamic_loss_rec = []
lenet.train()
for batch_id, data in enumerate(reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
lenet.clear_gradients()
dynamic_loss_rec.append(avg_loss.numpy()[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', avg_loss.numpy()))
paddle.jit.save(
layer=lenet,
path=TestImperativeQat.dynamic_save_path,
input_spec=[
paddle.static.InputSpec(
shape=[None, 1, 28, 28], dtype='float32')
])
# static graph train
_logger.info(
"--------------------------static graph qat--------------------------"
)
static_loss_rec = []
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = fluid.Executor(place)
main = fluid.Program()
infer = fluid.Program()
startup = fluid.Program()
static_img, static_label, static_loss = _build_static_lenet(
main, startup, False, seed)
infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
seed)
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
opt = AdamOptimizer(learning_rate=lr)
opt.minimize(static_loss)
scope = core.Scope()
with fluid.scope_guard(scope):
exe.run(startup)
for param in main.all_parameters():
param_tensor = scope.var(param.name).get_tensor()
param_tensor.set(param_init_map[param.name], place)
main_graph = IrGraph(core.Graph(main.desc), for_test=False)
infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
transform_pass = QuantizationTransformPass(
scope=scope,
place=place,
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quantize_type,
quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
transform_pass.apply(main_graph)
transform_pass.apply(infer_graph)
build_strategy = fluid.BuildStrategy()
build_strategy.fuse_all_reduce_ops = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=static_loss.name, build_strategy=build_strategy)
feeder = fluid.DataFeeder(
feed_list=[static_img, static_label], place=place)
with fluid.scope_guard(scope):
for batch_id, data in enumerate(reader()):
loss_v, = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[static_loss])
static_loss_rec.append(loss_v[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', loss_v))
save_program = infer_graph.to_program()
with fluid.scope_guard(scope):
fluid.io.save_inference_model("./static_mnist", [infer_img.name],
[infer_pre], exe, save_program)
rtol = 1e-05
atol = 1e-08
for i, (loss_d,
loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
diff = np.abs(loss_d - loss_s)
if diff > (atol + rtol * np.abs(loss_s)):
_logger.info(
"diff({}) at {}, dynamic loss = {}, static loss = {}".
format(diff, i, loss_d, loss_s))
break
self.assertTrue(
np.allclose(
np.array(dynamic_loss_rec),
np.array(static_loss_rec),
rtol=rtol,
atol=atol,
equal_nan=True),
msg='Failed to do the imperative qat.')
if __name__ == '__main__': if __name__ == '__main__':
......
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
from __future__ import print_function
import os
import numpy as np
import random
import shutil
import time
import unittest
import logging
import paddle
import six
import paddle.fluid as fluid
from paddle.nn import functional
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm
from paddle.fluid.layers import nn
from paddle.fluid import core
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware, QuantizationTransformPass, AddQuantDequantPass
from paddle.fluid.dygraph.container import Sequential
from paddle.fluid.dygraph.nn import Pool2D
from paddle.nn.layer.activation import ReLU, LeakyReLU, ReLU6, Tanh, Swish
from paddle.fluid.log_helper import get_logger
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
paddle.enable_static()
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
def StaticLenet(data, num_classes=10):
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
conv2d_w3_attr = fluid.ParamAttr(name="conv2d_w_3")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
conv2d_b3_attr = fluid.ParamAttr(name="conv2d_b_3")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
conv1 = fluid.layers.conv2d(
data,
num_filters=6,
filter_size=3,
stride=1,
padding=1,
param_attr=conv2d_w1_attr,
bias_attr=conv2d_b1_attr)
conv1 = fluid.layers.leaky_relu(conv1, alpha=0.02)
pool1 = fluid.layers.pool2d(
conv1, pool_size=2, pool_type='max', pool_stride=2)
conv2 = fluid.layers.conv2d(
pool1,
num_filters=16,
filter_size=5,
stride=1,
padding=0,
param_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr)
pool2 = fluid.layers.pool2d(
conv2, pool_size=2, pool_type='max', pool_stride=2)
pool2 = fluid.layers.relu(pool2)
pool2 = fluid.layers.swish(pool2)
conv3 = fluid.layers.conv2d(
pool2,
num_filters=16,
filter_size=1,
stride=1,
padding=0,
param_attr=conv2d_w3_attr,
bias_attr=conv2d_b3_attr)
conv3 = fluid.layers.relu6(conv3)
conv3 = paddle.tensor.math.tanh(conv3)
fc1 = fluid.layers.fc(input=conv3,
size=120,
param_attr=fc_w1_attr,
bias_attr=fc_b1_attr)
fc2 = fluid.layers.fc(input=fc1,
size=84,
param_attr=fc_w2_attr,
bias_attr=fc_b2_attr)
fc3 = fluid.layers.fc(input=fc2,
size=num_classes,
param_attr=fc_w3_attr,
bias_attr=fc_b3_attr)
fc3 = fluid.layers.softmax(fc3, use_cudnn=True)
return fc3
class ImperativeLenet(fluid.dygraph.Layer):
def __init__(self, num_classes=10):
super(ImperativeLenet, self).__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
conv2d_w3_attr = fluid.ParamAttr(name="conv2d_w_3")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
conv2d_b3_attr = fluid.ParamAttr(name="conv2d_b_3")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
self.features = Sequential(
Conv2D(
in_channels=1,
out_channels=6,
kernel_size=3,
stride=1,
padding=1,
weight_attr=conv2d_w1_attr,
bias_attr=conv2d_b1_attr),
LeakyReLU(negative_slope=0.02),
Pool2D(
pool_size=2, pool_type='max', pool_stride=2),
Conv2D(
in_channels=6,
out_channels=16,
kernel_size=5,
stride=1,
padding=0,
weight_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr),
Pool2D(
pool_size=2, pool_type='max', pool_stride=2),
ReLU(),
Swish(),
Conv2D(
in_channels=16,
out_channels=16,
kernel_size=1,
stride=1,
padding=0,
weight_attr=conv2d_w3_attr,
bias_attr=conv2d_b3_attr),
ReLU6(),
Tanh())
self.fc = Sequential(
Linear(
in_features=400,
out_features=120,
weight_attr=fc_w1_attr,
bias_attr=fc_b1_attr),
Linear(
in_features=120,
out_features=84,
weight_attr=fc_w2_attr,
bias_attr=fc_b2_attr),
Linear(
in_features=84,
out_features=num_classes,
weight_attr=fc_w3_attr,
bias_attr=fc_b3_attr),
Softmax())
def forward(self, inputs):
x = self.features(inputs)
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
class TestImperativeAddQuantDequant(unittest.TestCase):
@classmethod
def setUpClass(cls):
timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
cls.root_path = os.path.join(os.getcwd(),
"imperative_qat_aqd_" + timestamp)
cls.save_path = os.path.join(cls.root_path, "lenet")
cls.dynamic_root_path = os.path.join(os.getcwd(),
"dynamic_mnist_aqd_" + timestamp)
cls.dynamic_save_path = os.path.join(cls.dynamic_root_path, "model")
@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.root_path)
shutil.rmtree(cls.dynamic_root_path)
def test_qat_save(self):
imperative_qat = ImperativeQuantAware(
weight_quantize_type='abs_max',
activation_quantize_type='moving_average_abs_max',
quantizable_layer_type=[
'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
'Swish'
])
with fluid.dygraph.guard():
lenet = ImperativeLenet()
imperative_qat.quantize(lenet)
adam = AdamOptimizer(
learning_rate=0.001, parameter_list=lenet.parameters())
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32)
epoch_num = 1
for epoch in range(epoch_num):
lenet.train()
for batch_id, data in enumerate(train_reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
acc = fluid.layers.accuracy(out, label)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
lenet.clear_gradients()
if batch_id % 100 == 0:
_logger.info(
"Train | At epoch {} step {}: loss = {:}, acc= {:}".
format(epoch, batch_id,
avg_loss.numpy(), acc.numpy()))
if batch_id == 500: # For shortening CI time
break
lenet.eval()
for batch_id, data in enumerate(test_reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
acc_top1 = fluid.layers.accuracy(
input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(
input=out, label=label, k=5)
if batch_id % 100 == 0:
_logger.info(
"Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
format(epoch, batch_id,
acc_top1.numpy(), acc_top5.numpy()))
# save weights
model_dict = lenet.state_dict()
fluid.save_dygraph(model_dict, "save_temp")
# test the correctness of `paddle.jit.save`
data = next(test_reader())
test_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
test_img = fluid.dygraph.to_variable(test_data)
lenet.eval()
before_save = lenet(test_img)
# save inference quantized model
paddle.jit.save(
layer=lenet,
path=TestImperativeAddQuantDequant.save_path,
input_spec=[
paddle.static.InputSpec(
shape=[None, 1, 28, 28], dtype='float32')
])
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = fluid.Executor(place)
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(
dirname=TestImperativeAddQuantDequant.root_path,
executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX)
after_save, = exe.run(inference_program,
feed={feed_target_names[0]: test_data},
fetch_list=fetch_targets)
self.assertTrue(
np.allclose(after_save, before_save.numpy()),
msg='Failed to save the inference quantized model.')
def test_qat_acc(self):
def _build_static_lenet(main, startup, is_test=False, seed=1000):
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
main.random_seed = seed
startup.random_seed = seed
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
prediction = StaticLenet(img)
if not is_test:
loss = fluid.layers.cross_entropy(
input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
else:
avg_loss = prediction
return img, label, avg_loss
reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
weight_quantize_type = 'abs_max'
activation_quant_type = 'moving_average_abs_max'
param_init_map = {}
seed = 1000
lr = 0.001
# imperative train
_logger.info(
"--------------------------dynamic graph qat--------------------------"
)
imperative_qat = ImperativeQuantAware(
weight_quantize_type=weight_quantize_type,
activation_quantize_type=activation_quant_type,
quantizable_layer_type=[
'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
'Swish'
])
with fluid.dygraph.guard():
np.random.seed(seed)
fluid.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed
lenet = ImperativeLenet()
fixed_state = {}
for name, param in lenet.named_parameters():
p_shape = param.numpy().shape
p_value = param.numpy()
if name.endswith("bias"):
value = np.zeros_like(p_value).astype('float32')
else:
value = np.random.normal(
loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
p_shape).astype('float32')
fixed_state[name] = value
param_init_map[param.name] = value
lenet.set_dict(fixed_state)
imperative_qat.quantize(lenet)
adam = AdamOptimizer(
learning_rate=lr, parameter_list=lenet.parameters())
dynamic_loss_rec = []
lenet.train()
for batch_id, data in enumerate(reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
lenet.clear_gradients()
dynamic_loss_rec.append(avg_loss.numpy()[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', avg_loss.numpy()))
if batch_id > 500:
break
lenet.eval()
paddle.jit.save(
layer=lenet,
path=TestImperativeAddQuantDequant.dynamic_save_path,
input_spec=[
paddle.static.InputSpec(
shape=[None, 1, 28, 28], dtype='float32')
])
# static graph train
_logger.info(
"--------------------------static graph qat--------------------------"
)
static_loss_rec = []
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = fluid.Executor(place)
main = fluid.Program()
infer = fluid.Program()
startup = fluid.Program()
static_img, static_label, static_loss = _build_static_lenet(
main, startup, False, seed)
infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
seed)
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
opt = AdamOptimizer(learning_rate=lr)
opt.minimize(static_loss)
scope = core.Scope()
with fluid.scope_guard(scope):
exe.run(startup)
for param in main.all_parameters():
param_tensor = scope.var(param.name).get_tensor()
param_tensor.set(param_init_map[param.name], place)
main_graph = IrGraph(core.Graph(main.desc), for_test=False)
infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
transform_pass = QuantizationTransformPass(
scope=scope,
place=place,
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quantize_type,
quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
add_quant_dequant_pass = AddQuantDequantPass(
scope=scope,
place=place,
quantizable_op_type=[
'relu', 'leaky_relu', 'relu6', 'tanh', 'swish'
])
transform_pass.apply(main_graph)
transform_pass.apply(infer_graph)
add_quant_dequant_pass.apply(main_graph)
add_quant_dequant_pass.apply(infer_graph)
build_strategy = fluid.BuildStrategy()
build_strategy.fuse_all_reduce_ops = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=static_loss.name, build_strategy=build_strategy)
feeder = fluid.DataFeeder(
feed_list=[static_img, static_label], place=place)
with fluid.scope_guard(scope):
for batch_id, data in enumerate(reader()):
loss_v, = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[static_loss])
static_loss_rec.append(loss_v[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', loss_v))
save_program = infer_graph.to_program()
with fluid.scope_guard(scope):
fluid.io.save_inference_model("./static_mnist", [infer_img.name],
[infer_pre], exe, save_program)
rtol = 1e-08
atol = 1e-10
for i, (loss_d,
loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
diff = np.abs(loss_d - loss_s)
if diff > (atol + rtol * np.abs(loss_s)):
_logger.info(
"diff({}) at {}, dynamic loss = {}, static loss = {}".
format(diff, i, loss_d, loss_s))
break
self.assertTrue(
np.allclose(
np.array(dynamic_loss_rec),
np.array(static_loss_rec),
rtol=rtol,
atol=atol,
equal_nan=True),
msg='Failed to do the imperative qat.')
if __name__ == '__main__':
unittest.main()
...@@ -19,18 +19,13 @@ import numpy as np ...@@ -19,18 +19,13 @@ import numpy as np
import random import random
import unittest import unittest
import logging import logging
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.dygraph.container import Sequential
from paddle.nn import Linear, Conv2D, Softmax
from paddle.fluid.dygraph.nn import Pool2D
from paddle.fluid.log_helper import get_logger from paddle.fluid.log_helper import get_logger
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from test_imperative_qat import TestImperativeQat
paddle.enable_static() paddle.enable_static()
...@@ -42,388 +37,14 @@ _logger = get_logger( ...@@ -42,388 +37,14 @@ _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
def StaticLenet(data, num_classes=10): class TestImperativeQatChannelWise(TestImperativeQat):
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") def set_vars(self):
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") self.weight_quantize_type = 'channel_wise_abs_max'
fc_w1_attr = fluid.ParamAttr(name="fc_w_1") self.activation_quantize_type = 'moving_average_abs_max'
fc_w2_attr = fluid.ParamAttr(name="fc_w_2") print('weight_quantize_type', self.weight_quantize_type)
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
conv1 = fluid.layers.conv2d(
data,
num_filters=6,
filter_size=3,
stride=1,
padding=1,
param_attr=conv2d_w1_attr,
bias_attr=conv2d_b1_attr)
pool1 = fluid.layers.pool2d(
conv1, pool_size=2, pool_type='max', pool_stride=2)
conv2 = fluid.layers.conv2d(
pool1,
num_filters=16,
filter_size=5,
stride=1,
padding=0,
param_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr)
pool2 = fluid.layers.pool2d(
conv2, pool_size=2, pool_type='max', pool_stride=2)
fc1 = fluid.layers.fc(input=pool2,
size=120,
param_attr=fc_w1_attr,
bias_attr=fc_b1_attr)
fc2 = fluid.layers.fc(input=fc1,
size=84,
param_attr=fc_w2_attr,
bias_attr=fc_b2_attr)
fc3 = fluid.layers.fc(input=fc2,
size=num_classes,
param_attr=fc_w3_attr,
bias_attr=fc_b3_attr)
fc3 = fluid.layers.softmax(fc3, use_cudnn=True)
return fc3
class ImperativeLenet(fluid.dygraph.Layer):
def __init__(self, num_classes=10):
super(ImperativeLenet, self).__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
self.features = Sequential(
Conv2D(
in_channels=1,
out_channels=6,
kernel_size=3,
stride=1,
padding=1,
weight_attr=conv2d_w1_attr,
bias_attr=conv2d_b1_attr),
Pool2D(
pool_size=2, pool_type='max', pool_stride=2),
Conv2D(
in_channels=6,
out_channels=16,
kernel_size=5,
stride=1,
padding=0,
weight_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr),
Pool2D(
pool_size=2, pool_type='max', pool_stride=2))
self.fc = Sequential(
Linear(
in_features=400,
out_features=120,
weight_attr=fc_w1_attr,
bias_attr=fc_b1_attr),
Linear(
in_features=120,
out_features=84,
weight_attr=fc_w2_attr,
bias_attr=fc_b2_attr),
Linear(
in_features=84,
out_features=num_classes,
weight_attr=fc_w3_attr,
bias_attr=fc_b3_attr),
Softmax())
def forward(self, inputs):
x = self.features(inputs)
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
class TestImperativeQatChannelWise(unittest.TestCase):
"""
QAT = quantization-aware training
"""
def test_qat_save(self):
imperative_qat = ImperativeQuantAware(
weight_quantize_type='channel_wise_abs_max',
activation_quantize_type='moving_average_abs_max')
with fluid.dygraph.guard():
lenet = ImperativeLenet()
imperative_qat.quantize(lenet)
adam = AdamOptimizer(
learning_rate=0.001, parameter_list=lenet.parameters())
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32)
epoch_num = 1
for epoch in range(epoch_num):
lenet.train()
for batch_id, data in enumerate(train_reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
acc = fluid.layers.accuracy(out, label)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
lenet.clear_gradients()
if batch_id % 100 == 0:
_logger.info(
"Train | At epoch {} step {}: loss = {:}, acc= {:}".
format(epoch, batch_id,
avg_loss.numpy(), acc.numpy()))
lenet.eval()
for batch_id, data in enumerate(test_reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
acc_top1 = fluid.layers.accuracy(
input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(
input=out, label=label, k=5)
if batch_id % 100 == 0:
_logger.info(
"Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
format(epoch, batch_id,
acc_top1.numpy(), acc_top5.numpy()))
# save weights
model_dict = lenet.state_dict()
fluid.save_dygraph(model_dict, "save_temp")
# test the correctness of `paddle.jit.save`
data = next(test_reader())
test_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
test_img = fluid.dygraph.to_variable(test_data)
lenet.eval()
before_save = lenet(test_img)
# save inference quantized model
path = "./qat_infer_model/mnist"
save_dir = "./qat_infer_model"
paddle.jit.save(
layer=lenet,
path=path,
input_spec=[
paddle.static.InputSpec(
shape=[None, 1, 28, 28], dtype='float32')
])
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = fluid.Executor(place)
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(
dirname=save_dir,
executor=exe,
model_filename="mnist" + INFER_MODEL_SUFFIX,
params_filename="mnist" + INFER_PARAMS_SUFFIX)
after_save, = exe.run(inference_program,
feed={feed_target_names[0]: test_data},
fetch_list=fetch_targets)
self.assertTrue(
np.allclose(after_save, before_save.numpy()),
msg='Failed to save the inference quantized model.')
def test_qat_acc(self):
def _build_static_lenet(main, startup, is_test=False, seed=1000):
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
main.random_seed = seed
startup.random_seed = seed
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
prediction = StaticLenet(img)
if not is_test:
loss = fluid.layers.cross_entropy(
input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
else:
avg_loss = prediction
return img, label, avg_loss
reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
weight_quantize_type = 'channel_wise_abs_max'
activation_quant_type = 'moving_average_abs_max'
param_init_map = {}
seed = 1000
lr = 0.001
# imperative train
_logger.info(
"--------------------------dynamic graph qat--------------------------"
)
imperative_qat = ImperativeQuantAware(
weight_quantize_type=weight_quantize_type,
activation_quantize_type=activation_quant_type)
with fluid.dygraph.guard():
np.random.seed(seed)
fluid.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed
lenet = ImperativeLenet()
fixed_state = {}
for name, param in lenet.named_parameters():
p_shape = param.numpy().shape
p_value = param.numpy()
if name.endswith("bias"):
value = np.zeros_like(p_value).astype('float32')
else:
value = np.random.normal(
loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
p_shape).astype('float32')
fixed_state[name] = value
param_init_map[param.name] = value
lenet.set_dict(fixed_state)
imperative_qat.quantize(lenet)
adam = AdamOptimizer(
learning_rate=lr, parameter_list=lenet.parameters())
dynamic_loss_rec = []
lenet.train()
for batch_id, data in enumerate(reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
lenet.clear_gradients()
dynamic_loss_rec.append(avg_loss.numpy()[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', avg_loss.numpy()))
paddle.jit.save(
layer=lenet,
path="./dynamic_mnist/model",
input_spec=[
paddle.static.InputSpec(
shape=[None, 1, 28, 28], dtype='float32')
])
# static graph train
_logger.info(
"--------------------------static graph qat--------------------------"
)
static_loss_rec = []
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = fluid.Executor(place)
main = fluid.Program()
infer = fluid.Program()
startup = fluid.Program()
static_img, static_label, static_loss = _build_static_lenet(
main, startup, False, seed)
infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
seed)
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
opt = AdamOptimizer(learning_rate=lr)
opt.minimize(static_loss)
scope = core.Scope()
with fluid.scope_guard(scope):
exe.run(startup)
for param in main.all_parameters():
param_tensor = scope.var(param.name).get_tensor()
param_tensor.set(param_init_map[param.name], place)
main_graph = IrGraph(core.Graph(main.desc), for_test=False)
infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
transform_pass = QuantizationTransformPass(
scope=scope,
place=place,
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quantize_type,
quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
transform_pass.apply(main_graph)
transform_pass.apply(infer_graph)
build_strategy = fluid.BuildStrategy()
build_strategy.fuse_all_reduce_ops = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=static_loss.name, build_strategy=build_strategy)
feeder = fluid.DataFeeder(
feed_list=[static_img, static_label], place=place)
with fluid.scope_guard(scope):
for batch_id, data in enumerate(reader()):
loss_v, = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[static_loss])
static_loss_rec.append(loss_v[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', loss_v))
save_program = infer_graph.to_program()
with fluid.scope_guard(scope):
fluid.io.save_inference_model("./static_mnist", [infer_img.name],
[infer_pre], exe, save_program)
rtol = 1e-05
atol = 1e-08
for i, (loss_d,
loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
diff = np.abs(loss_d - loss_s)
if diff > (atol + rtol * np.abs(loss_s)):
_logger.info(
"diff({}) at {}, dynamic loss = {}, static loss = {}".
format(diff, i, loss_d, loss_s))
break
self.assertTrue( def test_qat(self):
np.allclose( self.run_qat_save()
np.array(dynamic_loss_rec),
np.array(static_loss_rec),
rtol=rtol,
atol=atol,
equal_nan=True),
msg='Failed to do the imperative qat.')
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -31,6 +31,8 @@ from paddle.nn import Linear, Conv2D, Softmax, BatchNorm ...@@ -31,6 +31,8 @@ from paddle.nn import Linear, Conv2D, Softmax, BatchNorm
from paddle.fluid.dygraph.nn import Pool2D from paddle.fluid.dygraph.nn import Pool2D
from paddle.fluid.log_helper import get_logger from paddle.fluid.log_helper import get_logger
from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenetWithSkipQuant
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True}) fluid.set_flags({"FLAGS_cudnn_deterministic": True})
...@@ -39,144 +41,33 @@ _logger = get_logger( ...@@ -39,144 +41,33 @@ _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
class ImperativeLenet(fluid.dygraph.Layer):
def __init__(self, num_classes=10):
super(ImperativeLenet, self).__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
self.conv2d_0 = Conv2D(
in_channels=1,
out_channels=6,
kernel_size=3,
stride=1,
padding=1,
weight_attr=conv2d_w1_attr,
bias_attr=conv2d_b1_attr)
self.conv2d_0.skip_quant = True
self.batch_norm_0 = BatchNorm(6)
self.relu_0 = ReLU()
self.pool2d_0 = Pool2D(pool_size=2, pool_type='max', pool_stride=2)
self.conv2d_1 = Conv2D(
in_channels=6,
out_channels=16,
kernel_size=5,
stride=1,
padding=0,
weight_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr)
self.conv2d_1.skip_quant = False
self.batch_norm_1 = BatchNorm(16)
self.relu6_0 = ReLU6()
self.pool2d_1 = Pool2D(pool_size=2, pool_type='max', pool_stride=2)
self.linear_0 = Linear(
in_features=400,
out_features=120,
weight_attr=fc_w1_attr,
bias_attr=fc_b1_attr)
self.linear_0.skip_quant = True
self.leaky_relu_0 = LeakyReLU()
self.linear_1 = Linear(
in_features=120,
out_features=84,
weight_attr=fc_w2_attr,
bias_attr=fc_b2_attr)
self.linear_1.skip_quant = False
self.sigmoid_0 = Sigmoid()
self.linear_2 = Linear(
in_features=84,
out_features=num_classes,
weight_attr=fc_w3_attr,
bias_attr=fc_b3_attr)
self.linear_2.skip_quant = False
self.softmax_0 = Softmax()
def forward(self, inputs):
x = self.conv2d_0(inputs)
x = self.batch_norm_0(x)
x = self.relu_0(x)
x = self.pool2d_0(x)
x = self.conv2d_1(x)
x = self.batch_norm_1(x)
x = self.relu6_0(x)
x = self.pool2d_1(x)
x = fluid.layers.flatten(x, 1)
x = self.linear_0(x)
x = self.leaky_relu_0(x)
x = self.linear_1(x)
x = self.sigmoid_0(x)
x = self.linear_2(x)
x = self.softmax_0(x)
return x
class TestImperativeOutSclae(unittest.TestCase): class TestImperativeOutSclae(unittest.TestCase):
def test_out_scale_acc(self): def test_out_scale_acc(self):
seed = 1000 seed = 1000
lr = 0.1 lr = 0.1
imperative_out_scale = ImperativeQuantAware() qat = ImperativeQuantAware()
np.random.seed(seed) np.random.seed(seed)
reader = paddle.batch( reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=512, drop_last=True) paddle.dataset.mnist.test(), batch_size=512, drop_last=True)
lenet = ImperativeLenet()
fixed_state = {} lenet = ImperativeLenetWithSkipQuant()
for name, param in lenet.named_parameters(): lenet = fix_model_dict(lenet)
p_shape = param.numpy().shape qat.quantize(lenet)
p_value = param.numpy()
if name.endswith("bias"):
value = np.zeros_like(p_value).astype('float32')
else:
value = np.random.normal(
loc=0.0, scale=0.01,
size=np.product(p_shape)).reshape(p_shape).astype('float32')
fixed_state[name] = value
lenet.set_dict(fixed_state)
imperative_out_scale.quantize(lenet)
adam = AdamOptimizer( adam = AdamOptimizer(
learning_rate=lr, parameter_list=lenet.parameters()) learning_rate=lr, parameter_list=lenet.parameters())
dynamic_loss_rec = [] dynamic_loss_rec = []
lenet.train() lenet.train()
for batch_id, data in enumerate(reader()): loss_list = train_lenet(lenet, reader, adam)
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
lenet.clear_gradients()
dynamic_loss_rec.append(avg_loss.numpy()[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', avg_loss.numpy()))
lenet.eval() lenet.eval()
path = "./save_dynamic_quant_infer_model/lenet" path = "./save_dynamic_quant_infer_model/lenet"
save_dir = "./save_dynamic_quant_infer_model" save_dir = "./save_dynamic_quant_infer_model"
imperative_out_scale.save_quantized_model( qat.save_quantized_model(
layer=lenet, layer=lenet,
path=path, path=path,
input_spec=[ input_spec=[
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
class TestFunctionalLayers(unittest.TestCase):
"""
"""
def setUp(self):
paddle.disable_static()
np.random.seed(1)
shape = [3, 100, 120]
self.x = paddle.to_tensor(np.random.random(shape))
self.y = paddle.to_tensor(np.random.random(shape))
def check(self, x, y):
self.assertTrue(np.allclose(x.numpy(), y.numpy()))
def test_quant_add(self):
out_1 = paddle.add(self.x, self.y)
out_2 = paddle.nn.quant.add()(self.x, self.y)
self.check(out_1, out_2)
def test_quant_subtract(self):
out_1 = paddle.subtract(self.x, self.y)
out_2 = paddle.nn.quant.subtract()(self.x, self.y)
self.check(out_1, out_2)
def test_quant_multiply(self):
out_1 = paddle.multiply(self.x, self.y)
out_2 = paddle.nn.quant.multiply()(self.x, self.y)
self.check(out_1, out_2)
def test_quant_divide(self):
out_1 = paddle.divide(self.x, self.y)
out_2 = paddle.nn.quant.divide()(self.x, self.y)
self.check(out_1, out_2)
def test_quant_reshape(self):
reshape = [120, 300]
out_1 = paddle.reshape(self.x, reshape)
out_2 = paddle.nn.quant.reshape()(self.x.clone(), reshape)
self.check(out_1, out_2)
self.assertTrue(out_1.shape == out_2.shape)
def test_quant_transpose(self):
perm = [1, 2, 0]
out_1 = paddle.transpose(self.x, perm)
out_2 = paddle.nn.quant.transpose()(self.x.clone(), perm)
self.check(out_1, out_2)
self.assertTrue(out_1.shape == out_2.shape)
def test_quant_concat(self):
out_1 = paddle.concat([self.x, self.y], axis=0)
out_2 = paddle.nn.quant.concat()([self.x, self.y], 0)
self.check(out_1, out_2)
self.assertTrue(out_1.shape == out_2.shape)
def test_quant_flatten(self):
start_axis = 1
end_axis = 2
out_1 = paddle.flatten(self.x, start_axis, end_axis)
out_2 = paddle.nn.quant.flatten()(self.x.clone(), start_axis, end_axis)
self.check(out_1, out_2)
self.assertTrue(out_1.shape == out_2.shape)
if __name__ == '__main__':
unittest.main()
...@@ -138,6 +138,7 @@ from ..fluid.dygraph.container import Sequential # noqa: F401 ...@@ -138,6 +138,7 @@ from ..fluid.dygraph.container import Sequential # noqa: F401
from . import utils # noqa: F401 from . import utils # noqa: F401
from . import functional # noqa: F401 from . import functional # noqa: F401
from . import initializer # noqa: F401 from . import initializer # noqa: F401
from . import quant # noqa: F401
#TODO: remove 'diag_embed', 'remove_weight_norm', 'weight_norm' months later. #TODO: remove 'diag_embed', 'remove_weight_norm', 'weight_norm' months later.
import paddle.utils.deprecated as deprecated import paddle.utils.deprecated as deprecated
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .functional_layers import FloatFunctionalLayer # noqa: F401
from .functional_layers import add # noqa: F401
from .functional_layers import subtract # noqa: F401
from .functional_layers import multiply # noqa: F401
from .functional_layers import divide # noqa: F401
from .functional_layers import reshape # noqa: F401
from .functional_layers import transpose # noqa: F401
from .functional_layers import concat # noqa: F401
from .functional_layers import flatten # noqa: F401
__all__ = []
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ...fluid.dygraph import layers
from ...tensor import math, manipulation
__all__ = []
class FloatFunctionalLayer(layers.Layer):
def __init__(self):
super(FloatFunctionalLayer, self).__init__()
class add(FloatFunctionalLayer):
def __init__(self):
super(add, self).__init__()
def forward(self, x, y, name=None):
return math.add(x, y, name)
class subtract(FloatFunctionalLayer):
def __init__(self):
super(subtract, self).__init__()
def forward(self, x, y, name=None):
return math.subtract(x, y, name)
class multiply(FloatFunctionalLayer):
def __init__(self):
super(multiply, self).__init__()
def forward(self, x, y, name=None):
return math.multiply(x, y, name)
class divide(FloatFunctionalLayer):
def __init__(self):
super(divide, self).__init__()
def forward(self, x, y, name=None):
return math.divide(x, y, name)
class reshape(FloatFunctionalLayer):
def __init__(self):
super(reshape, self).__init__()
def forward(self, x, shape, name=None):
return manipulation.reshape(x, shape, name)
class transpose(FloatFunctionalLayer):
def __init__(self):
super(transpose, self).__init__()
def forward(self, x, perm, name=None):
return manipulation.transpose(x, perm, name)
class concat(FloatFunctionalLayer):
def __init__(self):
super(concat, self).__init__()
def forward(self, x, axis=0, name=None):
return manipulation.concat(x, axis, name)
class flatten(FloatFunctionalLayer):
def __init__(self):
super(flatten, self).__init__()
def forward(self, x, start_axis=0, stop_axis=-1, name=None):
return manipulation.flatten(x, start_axis, stop_axis, name)
...@@ -213,6 +213,7 @@ packages=['paddle', ...@@ -213,6 +213,7 @@ packages=['paddle',
'paddle.nn', 'paddle.nn',
'paddle.nn.functional', 'paddle.nn.functional',
'paddle.nn.layer', 'paddle.nn.layer',
'paddle.nn.quant',
'paddle.nn.initializer', 'paddle.nn.initializer',
'paddle.nn.utils', 'paddle.nn.utils',
'paddle.metric', 'paddle.metric',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册