未验证 提交 72973d5a 编写于 作者: Z zhouzj 提交者: GitHub

[clean fluid api] Move fluid/contrib/slim and remove fluid api. (#48717)

上级 a186e60d
...@@ -119,7 +119,7 @@ if(WITH_TESTING) ...@@ -119,7 +119,7 @@ if(WITH_TESTING)
add_subdirectory(paddle/tests) add_subdirectory(paddle/tests)
add_subdirectory(paddle/fluid/tests) add_subdirectory(paddle/fluid/tests)
add_subdirectory(paddle/fluid/contrib/tests) add_subdirectory(paddle/fluid/contrib/tests)
add_subdirectory(paddle/fluid/contrib/slim/tests) add_subdirectory(paddle/static/quantization/tests)
endif() endif()
if(NOT WITH_SETUP_INSTALL) if(NOT WITH_SETUP_INSTALL)
......
...@@ -1617,9 +1617,7 @@ class Engine: ...@@ -1617,9 +1617,7 @@ class Engine:
fetch_vars = self._fetch_vars["predict"]['outputs'] fetch_vars = self._fetch_vars["predict"]['outputs']
dist_main_prog = self._dist_main_progs["predict"][self._cur_rank] dist_main_prog = self._dist_main_progs["predict"][self._cur_rank]
if self._strategy.qat.enable and self._strategy.qat.onnx_format: if self._strategy.qat.enable and self._strategy.qat.onnx_format:
from paddle.fluid.contrib.slim.quantization import ( from paddle.static.quantization import QuantWeightPass
QuantWeightPass,
)
self._logger.info("export quantized model.") self._logger.info("export quantized model.")
self._logger.info( self._logger.info(
......
...@@ -18,14 +18,14 @@ import numpy as np ...@@ -18,14 +18,14 @@ import numpy as np
import paddle import paddle
from paddle.fluid import core, framework from paddle.fluid import core, framework
from paddle.fluid.contrib.slim.quantization import ( from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.static.quantization import (
AddQuantDequantForInferencePass, AddQuantDequantForInferencePass,
AddQuantDequantPassV2, AddQuantDequantPassV2,
OutScaleForTrainingPass, OutScaleForTrainingPass,
QuantizationTransformPassV2, QuantizationTransformPassV2,
utils, utils,
) )
from paddle.fluid.dygraph.parallel import ParallelEnv
from ..auto_parallel.converter import Converter from ..auto_parallel.converter import Converter
from ..auto_parallel.dist_attribute import ( from ..auto_parallel.dist_attribute import (
......
...@@ -18,9 +18,6 @@ from . import memory_usage_calc ...@@ -18,9 +18,6 @@ from . import memory_usage_calc
from .memory_usage_calc import * from .memory_usage_calc import *
from . import op_frequence from . import op_frequence
from .op_frequence import * from .op_frequence import *
from . import quantize
from .quantize import *
from . import slim
from . import extend_optimizer from . import extend_optimizer
from .extend_optimizer import * from .extend_optimizer import *
from . import model_stat from . import model_stat
...@@ -36,7 +33,6 @@ __all__ = [] ...@@ -36,7 +33,6 @@ __all__ = []
__all__ += memory_usage_calc.__all__ __all__ += memory_usage_calc.__all__
__all__ += op_frequence.__all__ __all__ += op_frequence.__all__
__all__ += quantize.__all__
__all__ += extend_optimizer.__all__ __all__ += extend_optimizer.__all__
__all__ += ['mixed_precision'] __all__ += ['mixed_precision']
__all__ += layers.__all__ __all__ += layers.__all__
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import quantization_pass
from .quantization_pass import *
from . import quant_int8_mkldnn_pass
from .quant_int8_mkldnn_pass import *
from . import quant2_int8_mkldnn_pass
from .quant2_int8_mkldnn_pass import *
from . import post_training_quantization
from .post_training_quantization import *
from . import imperative
from .imperative import *
__all__ = []
__all__ += quantization_pass.__all__
__all__ += quant_int8_mkldnn_pass.__all__
__all__ += quant2_int8_mkldnn_pass.__all__
__all__ += post_training_quantization.__all__
__all__ += imperative.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import logging
import numpy as np
from .... import core
from ....framework import Program, Operator, Variable, program_guard
from ....executor import global_scope
from .... import unique_name
from ....layer_helper import LayerHelper
from ....param_attr import ParamAttr
from ....initializer import Constant
from ....log_helper import get_logger
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
def find_next_ops(block, var_name):
"""
Find all followed ops for the input variable.
"""
res_ops = []
for op in block.ops:
if var_name in op.input_arg_names:
res_ops.append(op)
return res_ops
def load_variable_data(scope, var_name):
'''
Load variable value from scope
'''
var_node = scope.find_var(var_name)
assert var_node is not None, "Cannot find " + var_name + " in scope."
return np.array(var_node.get_tensor())
class QuantizeTranspilerV2:
def __init__(
self,
weight_bits=8,
activation_bits=8,
weight_quantize_type='abs_max',
activation_quantize_type='moving_average_abs_max',
quantizable_op_type=[
'conv2d',
'depthwise_conv2d',
'mul',
],
skip_pattern=['skip_quant'],
):
"""
Apply fake quant for the quantized ops.
Args:
weight_bits(int): the bit of quantized weight.
activation_bits(int): the bit of quantized activation.
weight_quantize_type(str): the quantization type for weight.
Only support to be 'abs_max' and 'channel_wise_abs_max'.
activation_quantize_type(str): the quantization type for activation.
Only support to be 'abs_max' and 'moving_average_abs_max'.
quantizable_op_type(str): set the op type for quantization.
skip_pattern(str|list): The user-defined quantization skip pattern, which
will be presented in the name scope of an op. When the skip pattern is
detected in an op's name scope, the corresponding op will not be quantized.
"""
self._weight_bits = weight_bits
self._activation_bits = activation_bits
assert activation_quantize_type in [
"abs_max",
"moving_average_abs_max",
], (
"activation_quantize_type should be abs_max "
"or moving_average_abs_max for now."
)
assert weight_quantize_type in [
"abs_max",
"channel_wise_abs_max",
], "weight_quantize_type should be abs_max or channel_wise_abs_max."
self._activation_quantize_type = activation_quantize_type
self._weight_quantize_type = weight_quantize_type
for op_type in quantizable_op_type:
assert op_type in [
'conv2d',
'depthwise_conv2d',
'mul',
], "Quantize op should be ['conv2d', 'depthwise_conv2d', 'mul']"
self._quantizable_ops = quantizable_op_type
self._quantizable_grad_ops = [
'%s_grad' % (op) for op in self._quantizable_ops
]
self._skip_pattern = skip_pattern
self._helper = LayerHelper(self.__class__.__name__)
self._moving_rate = 0.9
self._out_ch_axis1_ops = ['conv2d_transpose', 'mul', 'matmul']
def apply(self, program, startup_program, is_test=False):
"""
Apply quantization to fluid Program.
Args:
program(Program): the train or test program to be quantized.
startup_program(Program): the corresponding startup_program.
is_test(bool): Whethe the program is used for test.
Returns:
None
"""
assert isinstance(
program, Program
), "program must be the instance of Program"
assert isinstance(
startup_program, Program
), "startup_program must be the instance of Program"
var_rename_map = [
collections.OrderedDict() for _ in range(len(program.blocks))
]
with program_guard(program, startup_program):
for block in program.blocks:
ops = list(block.ops)
for op in ops:
if op.type in self._quantizable_ops and (
not self._is_skip_quant(op)
):
self._transform_forward(
block, op, var_rename_map, is_test
)
for block in program.blocks:
ops = list(block.ops)
for op in ops:
if op.type in self._quantizable_grad_ops and (
not self._is_skip_quant(op)
):
self._transform_backward(block, op, var_rename_map)
def convert(self, test_program, scope=None):
"""
Convert the test program.
Get the out scale from the moving_average_abs_max_scale op and save the
out scale into the quantized op.
Args:
test_program(Program): the test program to be converted.
scope(fluid.Scope, optional): The scope of the program, use it to load
and save variables. If scope=None, get scope by global_scope().
"""
scope = global_scope() if scope is None else scope
for block in test_program.blocks:
for op in block.ops:
if (
op.has_attr("quantization_type")
and op.attr("quantization_type") == "qat_with_weight"
):
# quant op -> var1 -> fake op -> var2
assert len(op.output_arg_names) == 1
var1_name = op.output_arg_names[0]
fake_ops = find_next_ops(block, var1_name)
assert len(fake_ops) == 1
fake_op = fake_ops[0]
assert fake_op.type == "moving_average_abs_max_scale"
out_scale_name = fake_op.output("OutScale")
out_threshold = load_variable_data(scope, out_scale_name[0])
op._set_attr("out_threshold", float(out_threshold))
var2_name = fake_op.output("Out")[0]
op._rename_output(var1_name, var2_name)
fake_op._rename_output(var2_name, var1_name)
def _transform_forward(self, block, op, var_rename_map, is_test):
"""
Insert fake quant op before the target ops.
"""
op._set_attr("quantization_type", "qat_with_weight")
# insert fake quant op before the quantized op
for in_name in op.input_arg_names:
block_id = block.idx
idx = block.ops.index(op)
if in_name in var_rename_map[block_id]:
new_in_name = var_rename_map[block_id][in_name]
else:
in_var = block.var(in_name)
target_dtype = [
core.VarDesc.VarType.FP32,
core.VarDesc.VarType.FP16,
]
if in_var.dtype not in target_dtype:
continue
quant_bits = (
self._weight_bits
if in_var.persistable
else self._activation_bits
)
quant_type = (
self._weight_quantize_type
if in_var.persistable
else self._activation_quantize_type
)
if quant_type == "abs_max":
new_var = self._insert_abs_max_fq_op(
block, idx, in_var, quant_bits
)
elif quant_type == "moving_average_abs_max":
new_var = self._insert_ma_abs_max_fq_op(
block, idx, in_var, quant_bits, is_test
)
elif quant_type == "channel_wise_abs_max":
ch_axis = 1 if op.type in self._out_ch_axis1_ops else 0
new_var = self._insert_pc_abs_max_fq_op(
block, idx, in_var, quant_bits, ch_axis
)
else:
_logger.error(
"Don't support the quant_type: %s" % quant_type
)
continue
new_in_name = new_var.name
var_rename_map[block_id][in_name] = new_in_name
op._rename_input(in_name, new_in_name)
# insert out scale op followed the quantized op
for out_name in op.output_arg_names:
next_ops = find_next_ops(block, out_name)
idx = block.ops.index(op)
out_var = block.var(out_name)
new_out_var = self._insert_ma_abs_max_scale_op(
block, idx + 1, out_var, is_test, True
)
for next_op in next_ops:
if "_grad" not in next_op.type:
next_op._rename_input(out_name, new_out_var.name)
def _is_skip_quant(self, op):
"""
Analyse whether the op should skip quantization or not.
"""
user_skipped = False
if isinstance(self._skip_pattern, list):
user_skipped = op.has_attr("op_namescope") and any(
pattern in op.attr("op_namescope")
for pattern in self._skip_pattern
)
elif isinstance(self._skip_pattern, str):
user_skipped = (
op.has_attr("op_namescope")
and op.attr("op_namescope").find(self._skip_pattern) != -1
)
return user_skipped
def _transform_backward(self, block, op, var_rename_map):
"""
Update the backword of the target ops.
Note: for the grad ops, only rename the input, skip rename the output.
"""
block_id = block.idx
no_dequanted_input_vars = True
for name in op.input_arg_names:
if name in var_rename_map[block_id]:
new_var_name = var_rename_map[block_id][name]
op._rename_input(name, new_var_name)
no_dequanted_input_vars = False
if no_dequanted_input_vars:
raise ValueError(
"There is no dequanted inputs for op %s." % (op.type)
)
def _insert_abs_max_fq_op(self, block, idx, in_var, quant_bits):
"""
Inset abs max fake quant op.
"""
quant_dequant_var = block.create_var(
type=in_var.type,
name="{}.quant_dequant".format(in_var.name),
shape=in_var.shape,
dtype=in_var.dtype,
)
scale_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.quant_dequant.scale".format(in_var.name),
initializer=Constant(0.0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
scale_var.stop_gradient = True
inputs = {'X': in_var}
outputs = {'Out': quant_dequant_var, 'OutScale': scale_var}
attrs = {'bit_length': quant_bits}
block._insert_op(
idx,
type='fake_quantize_dequantize_abs_max',
attrs=attrs,
inputs=inputs,
outputs=outputs,
)
return quant_dequant_var
def _insert_ma_abs_max_fq_op(self, block, idx, in_var, quant_bits, is_test):
"""
Insert moving average abs max fake quant op.
"""
quant_dequant_var = block.create_var(
type=in_var.type,
name="{}.quant_dequant".format(in_var.name),
shape=in_var.shape,
dtype=in_var.dtype,
)
scale_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.quant_dequant.scale".format(in_var.name),
initializer=Constant(0.0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
scale_var.stop_gradient = True
if not is_test:
state_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.quant_dequant.state".format(in_var.name),
initializer=Constant(0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
state_var.stop_gradient = True
accum_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.quant_dequant.accum".format(in_var.name),
initializer=Constant(0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
accum_var.stop_gradient = True
attrs = {
'moving_rate': self._moving_rate,
'bit_length': quant_bits,
'is_test': is_test,
}
inputs = {'X': in_var, 'InScale': scale_var}
outputs = {'Out': quant_dequant_var, 'OutScale': scale_var}
if not is_test:
inputs['InState'] = state_var
inputs['InAccum'] = accum_var
outputs['OutState'] = state_var
outputs['OutAccum'] = accum_var
block._insert_op(
idx,
type='fake_quantize_dequantize_moving_average_abs_max',
attrs=attrs,
inputs=inputs,
outputs=outputs,
)
return quant_dequant_var
def _insert_pc_abs_max_fq_op(self, block, idx, in_var, quant_bits, ch_axis):
"""
Insert per channel abs max fake quant op.
"""
quant_dequant_var = block.create_var(
type=in_var.type,
name="{}.quant_dequant".format(in_var.name),
shape=in_var.shape,
dtype=in_var.dtype,
)
scale_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.quant_dequant.scale".format(in_var.name),
initializer=Constant(0.0),
trainable=False,
),
shape=[in_var.shape[ch_axis]],
dtype=in_var.dtype,
)
scale_var.stop_gradient = True
inputs = {'X': in_var}
outputs = {'Out': quant_dequant_var, 'OutScale': scale_var}
attrs = {'bit_length': quant_bits, 'quant_axis': ch_axis}
block._insert_op(
idx,
type='fake_channel_wise_quantize_dequantize_abs_max',
attrs=attrs,
inputs=inputs,
outputs=outputs,
)
return quant_dequant_var
def _insert_ma_abs_max_scale_op(
self, block, idx, in_var, is_test, has_out_var=False
):
"""
Insert moving average abs max scale op.
"""
scale_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.outscale.scale".format(in_var.name),
initializer=Constant(0.0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
scale_var.stop_gradient = True
attrs = {'moving_rate': self._moving_rate, 'is_test': is_test}
inputs = {'X': in_var}
outputs = {'OutScale': scale_var}
if not is_test:
state_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.outscale.state".format(in_var.name),
initializer=Constant(0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
state_var.stop_gradient = True
accum_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.outscale.accum".format(in_var.name),
initializer=Constant(0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
accum_var.stop_gradient = True
inputs['InState'] = state_var
inputs['InAccum'] = accum_var
outputs['OutState'] = state_var
outputs['OutAccum'] = accum_var
if has_out_var:
out_var = block.create_var(
type=in_var.type,
name="{}.tmp".format(in_var.name),
shape=in_var.shape,
dtype=in_var.dtype,
)
outputs['Out'] = out_var
block._insert_op(
idx,
type='moving_average_abs_max_scale',
attrs=attrs,
inputs=inputs,
outputs=outputs,
)
if has_out_var:
return out_var
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import os
import unittest
import random
import numpy as np
import paddle.fluid as fluid
import paddle
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization.quantize_transpiler_v2 import (
QuantizeTranspilerV2,
)
from paddle.fluid import core
paddle.enable_static()
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"
def conv_net(img, label):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
pool_type='max',
act="relu",
)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
pool_type='avg',
act="relu",
)
with fluid.name_scope("skip_quant"):
hidden = fluid.layers.fc(input=conv_pool_1, size=100, act='relu')
prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
loss = paddle.nn.functional.cross_entropy(
input=prediction, label=label, reduction='none', use_softmax=False
)
avg_loss = paddle.mean(loss)
return avg_loss
class TestQuantizeProgramPass(unittest.TestCase):
def quantize_program(
self,
use_cuda,
seed,
activation_quant_type='abs_max',
weight_quant_type='abs_max',
for_ci=False,
):
def build_program(main, startup, is_test):
main.random_seed = seed
startup.random_seed = seed
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32'
)
label = fluid.layers.data(
name='label', shape=[1], dtype='int64'
)
loss = conv_net(img, label)
if not is_test:
opt = fluid.optimizer.Adam(learning_rate=0.0001)
opt.minimize(loss)
return [img, label], loss
random.seed(0)
np.random.seed(0)
# 1 Define program
train_program = fluid.Program()
startup_program = fluid.Program()
test_program = fluid.Program()
feeds, loss = build_program(train_program, startup_program, False)
build_program(test_program, startup_program, True)
test_program = test_program.clone(for_test=True)
if not for_ci:
train_graph = IrGraph(
core.Graph(train_program.desc), for_test=False
)
train_graph.draw('.', 'train_program_1')
test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
test_graph.draw('.', 'test_program_1')
# 2 Apply quantization
qt = QuantizeTranspilerV2(
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quant_type,
)
qt.apply(train_program, startup_program, is_test=False)
qt.apply(test_program, startup_program, is_test=True)
# 3 Train
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
scope = fluid.Scope()
with fluid.scope_guard(scope):
exe.run(startup_program)
if not for_ci:
train_graph = IrGraph(
core.Graph(train_program.desc), for_test=False
)
train_graph.draw('.', 'train_program_2')
test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
test_graph.draw('.', 'test_program_2')
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
build_strategy.fuse_all_reduce_ops = False
binary = fluid.CompiledProgram(train_program).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy
)
iters = 5
batch_size = 8
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=batch_size
)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
with fluid.scope_guard(scope):
for idx in range(iters):
data = next(train_reader())
loss_v = exe.run(
binary, feed=feeder.feed(data), fetch_list=[loss]
)
if not for_ci and idx % 20 == 0:
print('{}: {}'.format('loss', np.mean(loss_v)))
print('{}: {}'.format('loss', np.mean(loss_v)))
# 4 Convert
qt.convert(test_program, scope)
if not for_ci:
with fluid.scope_guard(scope):
fluid.io.save_inference_model(
'./infer_model',
['image', 'label'],
[loss],
exe,
test_program,
clip_extra=True,
)
def test_gpu_1(self):
if fluid.core.is_compiled_with_cuda():
self.quantize_program(
use_cuda=True,
seed=1,
activation_quant_type='abs_max',
weight_quant_type='abs_max',
for_ci=True,
)
def test_gpu_2(self):
if fluid.core.is_compiled_with_cuda():
self.quantize_program(
use_cuda=True,
seed=1,
activation_quant_type='moving_average_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True,
)
def test_cpu_1(self):
self.quantize_program(
use_cuda=False,
seed=2,
activation_quant_type='abs_max',
weight_quant_type='abs_max',
for_ci=True,
)
def test_cpu_2(self):
self.quantize_program(
use_cuda=False,
seed=2,
activation_quant_type='moving_average_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True,
)
if __name__ == '__main__':
unittest.main()
...@@ -25,5 +25,4 @@ set_tests_properties(test_multi_precision_fp16_train PROPERTIES TIMEOUT 120) ...@@ -25,5 +25,4 @@ set_tests_properties(test_multi_precision_fp16_train PROPERTIES TIMEOUT 120)
if(APPLE) if(APPLE)
set_tests_properties(test_model_cast_to_bf16 PROPERTIES TIMEOUT 300) set_tests_properties(test_model_cast_to_bf16 PROPERTIES TIMEOUT 300)
set_tests_properties(test_quantize_transpiler PROPERTIES TIMEOUT 300)
endif() endif()
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import numpy as np
import unittest
import paddle
import paddle.fluid as fluid
from paddle.fluid.contrib.quantize.quantize_transpiler import _original_var_name
from paddle.fluid.contrib.quantize.quantize_transpiler import QuantizeTranspiler
import paddle
paddle.enable_static()
def linear_fc(num):
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = data
for _ in range(num):
hidden = fluid.layers.fc(hidden, size=128, act='relu')
loss = paddle.nn.functional.cross_entropy(
input=hidden, label=label, reduction='none', use_softmax=False
)
loss = paddle.mean(loss)
return loss
def residual_block(num):
def conv_bn_layer(
input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
):
tmp = paddle.static.nn.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=bias_attr,
)
return paddle.static.nn.batch_norm(input=tmp, act=act)
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = data
for _ in range(num):
conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
hidden = paddle.nn.functional.relu(paddle.add(x=conv, y=short))
fc = fluid.layers.fc(input=hidden, size=10)
loss = paddle.nn.functional.cross_entropy(
input=fc, label=label, reduction='none', use_softmax=False
)
loss = paddle.mean(loss)
return loss
def conv_net(img, label):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu",
)
conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu",
)
prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
loss = paddle.nn.functional.cross_entropy(
input=prediction, label=label, reduction='none', use_softmax=False
)
avg_loss = paddle.mean(loss)
return avg_loss
class TestQuantizeTranspiler(unittest.TestCase):
def setUp(self):
# since quant_op and dequant_op is not ready, use cos and sin for test
self.weight_quant_op_type = 'fake_quantize_abs_max'
self.dequant_op_type = 'fake_dequantize_max_abs'
self.quantizable_op_and_inputs = {
'conv2d': ['Input', 'Filter'],
'depthwise_conv2d': ['Input', 'Filter'],
'mul': ['X', 'Y'],
}
self.quantizable_op_grad_and_inputs = {
'conv2d_grad': ['Input', 'Filter'],
'depthwise_conv2d_grad': ['Input', 'Filter'],
'mul_grad': ['X', 'Y'],
}
def check_program(self, program):
quantized_ops = {}
persistable_vars = [
v.name
for v in filter(lambda var: var.persistable, program.list_vars())
]
for block in program.blocks:
for idx, op in enumerate(block.ops):
# check forward
if op.type in self.quantizable_op_and_inputs:
for i, arg_name in enumerate(op.input_arg_names):
quant_op_type = (
self.weight_quant_op_type
if _original_var_name(arg_name) in persistable_vars
else self.act_quant_op_type
)
self.assertTrue(
arg_name.endswith('.quantized.dequantized')
)
if arg_name not in quantized_ops:
self.assertEqual(
block.ops[idx - 2 * i - 1].type,
self.dequant_op_type,
)
self.assertEqual(
block.ops[idx - 2 * i - 2].type, quant_op_type
)
quantized_ops[arg_name] = block.ops[idx - 2 * i - 2]
else:
op_idx = block.ops.index(quantized_ops[arg_name])
self.assertLess(op_idx, idx)
# check backward
if op.type in self.quantizable_op_grad_and_inputs:
for pname in self.quantizable_op_grad_and_inputs[op.type]:
arg_name = op.input(pname)[0]
self.assertTrue(
arg_name.endswith('.quantized.dequantized')
)
self.assertTrue(arg_name in quantized_ops)
def linear_fc_quant(self, quant_type):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = linear_fc(3)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
t = QuantizeTranspiler(activation_quantize_type=quant_type)
t.training_transpile(main)
self.check_program(main)
def test_linear_fc_quant_abs_max(self):
self.act_quant_op_type = 'fake_quantize_abs_max'
self.linear_fc_quant('abs_max')
def test_linear_fc_quant_range_abs_max(self):
self.act_quant_op_type = 'fake_quantize_range_abs_max'
self.linear_fc_quant('range_abs_max')
def residual_block_quant(self, quant_type):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = residual_block(2)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
t = QuantizeTranspiler(activation_quantize_type=quant_type)
t.training_transpile(main)
self.check_program(main)
def test_residual_block_abs_max(self):
self.act_quant_op_type = 'fake_quantize_abs_max'
self.residual_block_quant('abs_max')
def test_residual_block_range_abs_max(self):
self.act_quant_op_type = 'fake_quantize_range_abs_max'
self.residual_block_quant('range_abs_max')
def freeze_program(self, use_cuda, seed):
def build_program(main, startup, is_test):
main.random_seed = seed
startup.random_seed = seed
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32'
)
label = fluid.layers.data(
name='label', shape=[1], dtype='int64'
)
loss = conv_net(img, label)
if not is_test:
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
return [img, label], loss
main = fluid.Program()
startup = fluid.Program()
test_program = fluid.Program()
import random
random.seed(0)
np.random.seed(0)
feeds, loss = build_program(main, startup, False)
build_program(test_program, startup, True)
test_program = test_program.clone(for_test=True)
quant_type = 'range_abs_max' # 'range_abs_max' or 'abs_max'
quant_transpiler = QuantizeTranspiler(
activation_quantize_type=quant_type
)
quant_transpiler.training_transpile(main, startup)
quant_transpiler.training_transpile(test_program, startup)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
iters = 5
batch_size = 8
class_num = 10
exe.run(startup)
train_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500),
batch_size=batch_size,
)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=batch_size
)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
with fluid.program_guard(main):
for _ in range(iters):
data = next(train_reader())
loss_v = exe.run(
program=main, feed=feeder.feed(data), fetch_list=[loss]
)
with fluid.program_guard(test_program):
test_data = next(test_reader())
w_var = fluid.framework._get_var(
'conv2d_1.w_0.quantized', test_program
)
# Testing during training
test_loss1, w_quant = exe.run(
program=test_program,
feed=feeder.feed(test_data),
fetch_list=[loss, w_var],
)
# Freeze program for inference, but the weight of fc/conv is still float type.
quant_transpiler.freeze_program(test_program, place)
(test_loss2,) = exe.run(
program=test_program,
feed=feeder.feed(test_data),
fetch_list=[loss],
)
self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
w_freeze = np.array(
fluid.global_scope().find_var('conv2d_1.w_0').get_tensor()
)
# fail: -432.0 != -433.0, this is due to the calculation precision
# self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
# Convert parameter to 8-bit.
quant_transpiler.convert_to_int8(test_program, place)
# Save the 8-bit parameter and model file.
fluid.io.save_inference_model(
'model_8bit',
['image', 'label'],
[loss],
exe,
test_program,
clip_extra=True,
)
# Test whether the 8-bit parameter and model file can be loaded successfully.
[infer, feed, fetch] = fluid.io.load_inference_model(
'model_8bit', exe
)
# Check the loaded 8-bit weight.
w_8bit = np.array(
fluid.global_scope().find_var('conv2d_1.w_0.int8').get_tensor()
)
self.assertEqual(w_8bit.dtype, np.int8)
self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
def not_test_freeze_program_cuda(self):
if fluid.core.is_compiled_with_cuda():
with fluid.unique_name.guard():
self.freeze_program(True, seed=1)
def not_test_freeze_program_cpu(self):
with fluid.unique_name.guard():
self.freeze_program(False, seed=2)
if __name__ == '__main__':
unittest.main()
...@@ -23,7 +23,7 @@ import paddle.distributed.fleet as fleet ...@@ -23,7 +23,7 @@ import paddle.distributed.fleet as fleet
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.nn as nn import paddle.nn as nn
from paddle.distributed.utils.launch_utils import find_free_ports, get_cluster from paddle.distributed.utils.launch_utils import find_free_ports, get_cluster
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.quantization import ImperativeQuantAware
def set_random_seed(seed, dp_id, rank_id): def set_random_seed(seed, dp_id, rank_id):
......
...@@ -20,10 +20,6 @@ import numpy as np ...@@ -20,10 +20,6 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.contrib.slim.quantization import (
QuantizationFreezePass,
QuantizationTransformPass,
)
from paddle.fluid.executor import global_scope from paddle.fluid.executor import global_scope
from paddle.fluid.framework import ( from paddle.fluid.framework import (
IrGraph, IrGraph,
...@@ -32,6 +28,10 @@ from paddle.fluid.framework import ( ...@@ -32,6 +28,10 @@ from paddle.fluid.framework import (
convert_np_dtype_to_dtype_, convert_np_dtype_to_dtype_,
) )
from paddle.fluid.initializer import NumpyArrayInitializer from paddle.fluid.initializer import NumpyArrayInitializer
from paddle.static.quantization import (
QuantizationFreezePass,
QuantizationTransformPass,
)
class TensorConfig: class TensorConfig:
......
...@@ -21,16 +21,16 @@ import numpy as np ...@@ -21,16 +21,16 @@ import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import Program, Variable, core from paddle.fluid import Program, Variable, core
from paddle.fluid.contrib.slim.quantization import ( from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
from paddle.fluid.framework import IrGraph
from paddle.fluid.io import append_fetch_ops, prepend_feed_ops
from paddle.static.quantization import (
AddQuantDequantPass, AddQuantDequantPass,
OutScaleForInferencePass, OutScaleForInferencePass,
OutScaleForTrainingPass, OutScaleForTrainingPass,
QuantizationFreezePass, QuantizationFreezePass,
QuantizationTransformPass, QuantizationTransformPass,
) )
from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
from paddle.fluid.framework import IrGraph
from paddle.fluid.io import append_fetch_ops, prepend_feed_ops
class QuantDequantTest(unittest.TestCase): class QuantDequantTest(unittest.TestCase):
......
...@@ -18,9 +18,9 @@ import paddle ...@@ -18,9 +18,9 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.framework import IrGraph, Program, program_guard from paddle.fluid.framework import IrGraph, Program, program_guard
from paddle.fluid.tests.unittests.op_test import OpTestTool from paddle.fluid.tests.unittests.op_test import OpTestTool
from paddle.static.quantization import QuantizationTransformPass
paddle.enable_static() paddle.enable_static()
......
...@@ -24,7 +24,7 @@ from PIL import Image ...@@ -24,7 +24,7 @@ from PIL import Image
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.dataset.common import download from paddle.dataset.common import download
from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization from paddle.static.quantization import PostTrainingQuantization
paddle.enable_static() paddle.enable_static()
......
...@@ -12,40 +12,41 @@ ...@@ -12,40 +12,41 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from ..fluid.contrib.slim.quantization.imperative.ptq_config import ( from .imperative.ptq_config import (
PTQConfig, PTQConfig,
default_ptq_config, default_ptq_config,
) )
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import ( from .imperative.ptq_quantizer import (
BaseQuantizer, BaseQuantizer,
) )
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import ( from .imperative.ptq_quantizer import (
AbsmaxQuantizer, AbsmaxQuantizer,
) )
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import ( from .imperative.ptq_quantizer import (
PerChannelAbsmaxQuantizer, PerChannelAbsmaxQuantizer,
) )
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import ( from .imperative.ptq_quantizer import (
KLQuantizer, KLQuantizer,
) )
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import ( from .imperative.ptq_quantizer import (
HistQuantizer, HistQuantizer,
) )
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import ( from .imperative.ptq_quantizer import (
SUPPORT_ACT_QUANTIZERS, SUPPORT_ACT_QUANTIZERS,
) )
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import ( from .imperative.ptq_quantizer import (
SUPPORT_WT_QUANTIZERS, SUPPORT_WT_QUANTIZERS,
) )
from ..fluid.contrib.slim.quantization.imperative.ptq_registry import ( from .imperative.ptq_registry import (
PTQRegistry, PTQRegistry,
) )
from ..fluid.contrib.slim.quantization.imperative.ptq import ImperativePTQ from .imperative.ptq import (
from ..fluid.contrib.slim.quantization.imperative.qat import ( ImperativePTQ,
)
from .imperative.qat import (
ImperativeQuantAware, ImperativeQuantAware,
) )
from .config import QuantConfig from .config import QuantConfig
from .base_quanter import BaseQuanter from .base_quanter import BaseQuanter
from .factory import quanter from .factory import quanter
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -13,23 +13,24 @@ ...@@ -13,23 +13,24 @@
# limitations under the License. # limitations under the License.
from . import qat from . import qat
from .qat import * from .qat import ImperativeQuantAware
from . import ptq from . import ptq
from .ptq import * from .ptq import ImperativePTQ
from . import ptq_config from . import ptq_config
from .ptq_config import * from .ptq_config import PTQConfig, default_ptq_config
from . import ptq_quantizer from . import ptq_quantizer
from .ptq_quantizer import * from .ptq_quantizer import (
BaseQuantizer,
AbsmaxQuantizer,
PerChannelAbsmaxQuantizer,
KLQuantizer,
HistQuantizer,
SUPPORT_ACT_QUANTIZERS,
SUPPORT_WT_QUANTIZERS,
)
from . import ptq_registry from . import ptq_registry
from .ptq_registry import * from .ptq_registry import PTQRegistry
__all__ = []
__all__ += qat.__all__
__all__ += ptq.__all__
__all__ += ptq_config.__all__
__all__ += ptq_quantizer.__all__
__all__ += ptq_registry.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -13,8 +13,10 @@ ...@@ -13,8 +13,10 @@
# limitations under the License. # limitations under the License.
import copy import copy
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
from . import utils from . import utils
...@@ -66,7 +68,7 @@ def fuse_layers(model, layers_to_fuse, inplace=False): ...@@ -66,7 +68,7 @@ def fuse_layers(model, layers_to_fuse, inplace=False):
Return Return
fused_model(paddle.nn.Layer): The fused model. fused_model(paddle.nn.Layer): The fused model.
''' '''
if inplace == False: if inplace is False:
model = copy.deepcopy(model) model = copy.deepcopy(model)
for layers in layers_to_fuse: for layers in layers_to_fuse:
_fuse_layers(model, layers) _fuse_layers(model, layers)
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,24 +12,27 @@ ...@@ -12,24 +12,27 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging
import copy import copy
import logging
import os import os
import numpy as np import numpy as np
import paddle import paddle
import paddle.nn.quant.quant_layers as quant_layers import paddle.nn.quant.quant_layers as quant_layers
from paddle.fluid.log_helper import get_logger
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from ...static.log_helper import get_logger
from ...static.quantization.utils import (
from . import fuse_utils _get_input_name_index,
from . import utils _get_op_input_var_names,
from . import ptq_hooks _get_op_output_var_names,
from . import ptq_config _get_output_name_index,
from . import ptq_quantizer )
from . import fuse_utils, ptq_config, ptq_hooks, ptq_quantizer, utils
from .ptq_registry import PTQRegistry from .ptq_registry import PTQRegistry
__all__ = ['ImperativePTQ'] INFER_MODEL_SUFFIX = ".pdmodel"
INFER_PARAMS_SUFFIX = ".pdiparams"
_logger = get_logger( _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s' __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
...@@ -165,8 +168,8 @@ class ImperativePTQ: ...@@ -165,8 +168,8 @@ class ImperativePTQ:
infer_program, infer_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = paddle.fluid.io.load_inference_model( ] = paddle.static.load_inference_model(
dirname=dirname, path_prefix=dirname,
executor=exe, executor=exe,
model_filename=model_filename, model_filename=model_filename,
params_filename=params_filename, params_filename=params_filename,
...@@ -178,14 +181,23 @@ class ImperativePTQ: ...@@ -178,14 +181,23 @@ class ImperativePTQ:
self._remove_scale_op(infer_program) self._remove_scale_op(infer_program)
# Save final program # Save final program
paddle.fluid.io.save_inference_model( model_name = None
dirname=dirname, if model_filename is None:
feeded_var_names=feed_target_names, model_name = "model"
target_vars=fetch_targets, elif model_filename.endswith(".pdmodel"):
model_name = model_filename.rsplit(".", 1)[0]
else:
model_name = model_filename
path_prefix = os.path.join(dirname, model_name)
feed_vars = [
infer_program.global_block().var(name) for name in feed_target_names
]
paddle.static.save_inference_model(
path_prefix,
feed_vars,
fetch_targets,
executor=exe, executor=exe,
main_program=infer_program.clone(), program=infer_program.clone(),
model_filename=model_filename,
params_filename=params_filename,
) )
if is_dynamic_mode: if is_dynamic_mode:
...@@ -302,7 +314,7 @@ class ImperativePTQ: ...@@ -302,7 +314,7 @@ class ImperativePTQ:
) and PTQRegistry.is_simulated_quant_layer(sub_layer): ) and PTQRegistry.is_simulated_quant_layer(sub_layer):
quant_config = sub_layer._quant_config quant_config = sub_layer._quant_config
assert quant_config.enable_in_act_quantizer == True assert quant_config.enable_in_act_quantizer is True
wt_quantizer = quant_config.wt_quantizer wt_quantizer = quant_config.wt_quantizer
in_act_quantizer = quant_config.in_act_quantizer in_act_quantizer = quant_config.in_act_quantizer
...@@ -376,7 +388,7 @@ class ImperativePTQ: ...@@ -376,7 +388,7 @@ class ImperativePTQ:
None None
""" """
for op in utils.program_all_ops(program): for op in utils.program_all_ops(program):
for in_var_name in utils._get_op_input_var_names(op): for in_var_name in _get_op_input_var_names(op):
previous_op = utils.find_previous_op(op.block, in_var_name) previous_op = utils.find_previous_op(op.block, in_var_name)
if previous_op is None: if previous_op is None:
continue continue
...@@ -388,20 +400,16 @@ class ImperativePTQ: ...@@ -388,20 +400,16 @@ class ImperativePTQ:
attr_name = previous_op.output('OutScale')[0] attr_name = previous_op.output('OutScale')[0]
in_threshold = utils.load_variable_data(scope, attr_name) in_threshold = utils.load_variable_data(scope, attr_name)
in_threshold = utils.fp_numpy_to_naive(in_threshold) in_threshold = utils.fp_numpy_to_naive(in_threshold)
argname, index = utils._get_input_name_index( argname, index = _get_input_name_index(op, in_var_name)
op, in_var_name
)
op._set_attr( op._set_attr(
argname + str(index) + "_threshold", in_threshold argname + str(index) + "_threshold", in_threshold
) )
op._set_attr("with_quant_attr", True) op._set_attr("with_quant_attr", True)
else: else:
for out_var_name in utils._get_op_output_var_names( for out_var_name in _get_op_output_var_names(previous_op):
previous_op
):
if out_var_name != in_var_name: if out_var_name != in_var_name:
continue continue
argname, index = utils._get_output_name_index( argname, index = _get_output_name_index(
previous_op, out_var_name previous_op, out_var_name
) )
attr_name = argname + str(index) + "_threshold" attr_name = argname + str(index) + "_threshold"
...@@ -409,9 +417,7 @@ class ImperativePTQ: ...@@ -409,9 +417,7 @@ class ImperativePTQ:
continue continue
threshold = previous_op.attr(attr_name) threshold = previous_op.attr(attr_name)
argname, index = utils._get_input_name_index( argname, index = _get_input_name_index(op, in_var_name)
op, in_var_name
)
attr_name = argname + str(index) + "_threshold" attr_name = argname + str(index) + "_threshold"
op._set_attr(attr_name, threshold) op._set_attr(attr_name, threshold)
op._set_attr("with_quant_attr", True) op._set_attr("with_quant_attr", True)
...@@ -453,10 +459,10 @@ class ImperativePTQ: ...@@ -453,10 +459,10 @@ class ImperativePTQ:
continue continue
next_op = next_ops[0] next_op = next_ops[0]
argname, index = utils._get_output_name_index(op, out_var_name) argname, index = _get_output_name_index(op, out_var_name)
old_attr_name = argname + str(index) + "_threshold" old_attr_name = argname + str(index) + "_threshold"
argname, index = utils._get_output_name_index( argname, index = _get_output_name_index(
next_op, next_op.output("Out")[0] next_op, next_op.output("Out")[0]
) )
new_attr_name = argname + str(index) + "_threshold" new_attr_name = argname + str(index) + "_threshold"
...@@ -478,7 +484,7 @@ class ImperativePTQ: ...@@ -478,7 +484,7 @@ class ImperativePTQ:
@staticmethod @staticmethod
def _is_skip_layer(layer): def _is_skip_layer(layer):
return hasattr(layer, "skip_quant") and layer.skip_quant == True return hasattr(layer, "skip_quant") and layer.skip_quant is True
@staticmethod @staticmethod
def _is_quant_layer(layer): def _is_quant_layer(layer):
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import abc
import copy import copy
import paddle from .ptq_quantizer import (
SUPPORT_ACT_QUANTIZERS,
from .ptq_quantizer import * SUPPORT_WT_QUANTIZERS,
KLQuantizer,
__all__ = ['PTQConfig', 'default_ptq_config'] PerChannelAbsmaxQuantizer,
)
class PTQConfig: class PTQConfig:
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,12 +12,6 @@ ...@@ -12,12 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle
import math
import numpy as np
from . import ptq_config
from .ptq_registry import PTQRegistry
def quant_forward_post_hook(layer, inputs, outputs): def quant_forward_post_hook(layer, inputs, outputs):
""" """
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -13,24 +13,14 @@ ...@@ -13,24 +13,14 @@
# limitations under the License. # limitations under the License.
import abc import abc
import copy
import math import math
import numpy as np import numpy as np
import paddle import paddle
from ...static.quantization.cal_kl_threshold import cal_kl_threshold
from . import utils from . import utils
from ..cal_kl_threshold import cal_kl_threshold
__all__ = [
'BaseQuantizer',
'AbsmaxQuantizer',
'PerChannelAbsmaxQuantizer',
'KLQuantizer',
'HistQuantizer',
'SUPPORT_ACT_QUANTIZERS',
'SUPPORT_WT_QUANTIZERS',
]
def abs_max_value(tensor): def abs_max_value(tensor):
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
import paddle import paddle
__all__ = ['PTQRegistry']
class LayerInfo: class LayerInfo:
""" """
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,35 +12,27 @@ ...@@ -12,35 +12,27 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import collections
import logging
import numpy as np
import sys
import os import os
import warnings
import paddle import paddle
import paddle.nn as nn
import paddle.nn.quant.quant_layers as quant_layers import paddle.nn.quant.quant_layers as quant_layers
from paddle.fluid import dygraph, core, framework, unique_name
from paddle.fluid.framework import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid.executor import Executor, global_scope from paddle.framework import core
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant from ...static.quantization.quantization_pass import (
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX QuantWeightPass,
from paddle.fluid.io import load_inference_model, save_inference_model ReplaceFakeQuantDequantPass,
from ..quantization_pass import ReplaceFakeQuantDequantPass, QuantWeightPass )
from paddle.fluid.log_helper import get_logger from ...static.quantization.utils import (
from .. import quantization_pass _get_input_name_index,
from ..utils import move_persistable_var_to_global_block _get_op_input_var_names,
from . import utils _get_output_name_index,
from . import fuse_utils move_persistable_var_to_global_block,
__all__ = ['ImperativeQuantAware']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
) )
from . import fuse_utils, utils
INFER_MODEL_SUFFIX = ".pdmodel"
INFER_PARAMS_SUFFIX = ".pdiparams"
def lazy_import_fleet(layer_name_map, fake_quant_input_layers): def lazy_import_fleet(layer_name_map, fake_quant_input_layers):
...@@ -147,7 +139,7 @@ class ImperativeQuantAware: ...@@ -147,7 +139,7 @@ class ImperativeQuantAware:
.. code-block:: python .. code-block:: python
import paddle import paddle
from paddle.fluid.contrib.slim.quantization \ from paddle.static.quantization \
import ImperativeQuantAware import ImperativeQuantAware
from paddle.vision.models \ from paddle.vision.models \
import resnet import resnet
...@@ -178,7 +170,7 @@ class ImperativeQuantAware: ...@@ -178,7 +170,7 @@ class ImperativeQuantAware:
.. code-block:: python .. code-block:: python
import paddle import paddle
from paddle.fluid.contrib.slim.quantization \ from paddle.static.quantization \
import ImperativeQuantAware import ImperativeQuantAware
class ImperativeModel(paddle.nn.Layer): class ImperativeModel(paddle.nn.Layer):
...@@ -256,7 +248,7 @@ class ImperativeQuantAware: ...@@ -256,7 +248,7 @@ class ImperativeQuantAware:
.. code-block:: python .. code-block:: python
import paddle import paddle
from paddle.fluid.contrib.slim.quantization \ from paddle.static.quantization \
import ImperativeQuantAware import ImperativeQuantAware
class ImperativeModel(paddle.nn.Layer): class ImperativeModel(paddle.nn.Layer):
...@@ -288,8 +280,8 @@ class ImperativeQuantAware: ...@@ -288,8 +280,8 @@ class ImperativeQuantAware:
imperative_qat.quantize(model) imperative_qat.quantize(model)
""" """
assert isinstance( assert isinstance(
model, dygraph.Layer model, paddle.nn.Layer
), "The model must be the instance of dygraph.Layer." ), "The model must be the instance of paddle.nn.Layer."
if self.fuse_conv_bn: if self.fuse_conv_bn:
fuse_utils.fuse_conv_bn(model) fuse_utils.fuse_conv_bn(model)
...@@ -376,7 +368,7 @@ class ImperativeQuantizeInputs: ...@@ -376,7 +368,7 @@ class ImperativeQuantizeInputs:
), "activation_bits should be 1, 2,... or 16." ), "activation_bits should be 1, 2,... or 16."
layer_check = lambda method: method is None or issubclass( layer_check = lambda method: method is None or issubclass(
method, dygraph.layers.Layer method, paddle.nn.Layer
) )
assert layer_check( assert layer_check(
weight_preprocess_layer weight_preprocess_layer
...@@ -417,13 +409,13 @@ class ImperativeQuantizeInputs: ...@@ -417,13 +409,13 @@ class ImperativeQuantizeInputs:
""" """
assert isinstance( assert isinstance(
model, dygraph.Layer model, paddle.nn.Layer
), "The model must be the instance of dygraph.Layer." ), "The model must be the instance of paddle.nn.Layer."
for name, cur_layer in model.named_sublayers(): for name, cur_layer in model.named_sublayers():
if not isinstance(cur_layer, self._quantizable_layer_type) or ( if not isinstance(cur_layer, self._quantizable_layer_type) or (
hasattr(cur_layer, "skip_quant") hasattr(cur_layer, "skip_quant")
and cur_layer.skip_quant == True and cur_layer.skip_quant is True
): ):
continue continue
...@@ -480,8 +472,8 @@ class ImperativeQuantizeOutputs: ...@@ -480,8 +472,8 @@ class ImperativeQuantizeOutputs:
None None
""" """
assert isinstance( assert isinstance(
model, dygraph.Layer model, paddle.nn.Layer
), "The model must be the instance of dygraph.Layer." ), "The model must be the instance of paddle.nn.Layer."
for cur_name, cur_layer in model.named_sublayers(): for cur_name, cur_layer in model.named_sublayers():
if '_act_preprocess' in cur_name: if '_act_preprocess' in cur_name:
...@@ -535,8 +527,8 @@ class ImperativeQuantizeOutputs: ...@@ -535,8 +527,8 @@ class ImperativeQuantizeOutputs:
None None
""" """
assert isinstance( assert isinstance(
model, dygraph.Layer model, paddle.nn.Layer
), "The model must be the instance of dygraph.Layer." ), "The model must be the instance of paddle.nn.Layer."
paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config) paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config)
...@@ -546,8 +538,8 @@ class ImperativeQuantizeOutputs: ...@@ -546,8 +538,8 @@ class ImperativeQuantizeOutputs:
paddle.enable_static() paddle.enable_static()
place = core.CPUPlace() place = core.CPUPlace()
scope = global_scope() scope = paddle.static.global_scope()
exe = Executor(place) exe = paddle.static.Executor(place)
dirname = os.path.dirname(path) dirname = os.path.dirname(path)
basename = os.path.basename(path) basename = os.path.basename(path)
...@@ -558,8 +550,8 @@ class ImperativeQuantizeOutputs: ...@@ -558,8 +550,8 @@ class ImperativeQuantizeOutputs:
infer_program, infer_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = load_inference_model( ] = paddle.static.load_inference_model(
dirname=dirname, dirname,
executor=exe, executor=exe,
model_filename=model_filename, model_filename=model_filename,
params_filename=params_filename, params_filename=params_filename,
...@@ -600,14 +592,23 @@ class ImperativeQuantizeOutputs: ...@@ -600,14 +592,23 @@ class ImperativeQuantizeOutputs:
move_persistable_var_to_global_block(infer_program) move_persistable_var_to_global_block(infer_program)
save_inference_model( model_name = None
dirname=dirname, if model_filename is None:
feeded_var_names=feed_target_names, model_name = "model"
target_vars=fetch_targets, elif model_filename.endswith(".pdmodel"):
model_name = model_filename.rsplit(".", 1)[0]
else:
model_name = model_filename
path_prefix = os.path.join(dirname, model_name)
feed_vars = [
infer_program.global_block().var(name) for name in feed_target_names
]
paddle.static.save_inference_model(
path_prefix,
feed_vars,
fetch_targets,
executor=exe, executor=exe,
main_program=infer_program.clone(), program=infer_program.clone(),
model_filename=model_filename,
params_filename=params_filename,
clip_extra=clip_extra, clip_extra=clip_extra,
) )
...@@ -619,7 +620,7 @@ class ImperativeQuantizeOutputs: ...@@ -619,7 +620,7 @@ class ImperativeQuantizeOutputs:
Whether the layer needs to calculate output scales. Whether the layer needs to calculate output scales.
""" """
# exclude fake_quant ops in quant_layers file # exclude fake_quant ops in quant_layers file
if not isinstance(layer, dygraph.Layer): if not isinstance(layer, paddle.nn.Layer):
return False return False
if self._onnx_format: if self._onnx_format:
...@@ -660,7 +661,7 @@ class ImperativeQuantizeOutputs: ...@@ -660,7 +661,7 @@ class ImperativeQuantizeOutputs:
target_ops.append(op) target_ops.append(op)
for op in target_ops: for op in target_ops:
for in_var_name in utils._get_op_input_var_names(op): for in_var_name in _get_op_input_var_names(op):
previous_op = utils.find_previous_op(op.block, in_var_name) previous_op = utils.find_previous_op(op.block, in_var_name)
if previous_op is not None and ( if previous_op is not None and (
...@@ -670,9 +671,7 @@ class ImperativeQuantizeOutputs: ...@@ -670,9 +671,7 @@ class ImperativeQuantizeOutputs:
scale_name = previous_op.output('OutScale')[0] scale_name = previous_op.output('OutScale')[0]
in_scale = utils.load_variable_data(scope, scale_name) in_scale = utils.load_variable_data(scope, scale_name)
in_scale = utils.fp_numpy_to_naive(in_scale) in_scale = utils.fp_numpy_to_naive(in_scale)
argname, index = utils._get_input_name_index( argname, index = _get_input_name_index(op, in_var_name)
op, in_var_name
)
op._set_attr( op._set_attr(
argname + str(index) + "_threshold", in_scale argname + str(index) + "_threshold", in_scale
) )
...@@ -697,7 +696,7 @@ class ImperativeQuantizeOutputs: ...@@ -697,7 +696,7 @@ class ImperativeQuantizeOutputs:
out_scale = utils.fp_numpy_to_naive(out_scale) out_scale = utils.fp_numpy_to_naive(out_scale)
if previous_op.type != "feed": if previous_op.type != "feed":
res = utils._get_output_name_index(previous_op, in_var_name) res = _get_output_name_index(previous_op, in_var_name)
if res is not None: if res is not None:
argname, index = res argname, index = res
previous_op._set_attr( previous_op._set_attr(
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,19 +12,11 @@ ...@@ -12,19 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import numpy as np import numpy as np
import paddle import paddle
import paddle.nn.quant.quant_layers as quant_layers import paddle.nn.quant.quant_layers as quant_layers
from ..utils import (
_get_op_input_var_names,
_get_op_output_var_names,
_get_output_name_index,
_get_input_name_index,
)
layer_name_map = { layer_name_map = {
'Conv2DTranspose': paddle.nn.Conv2DTranspose, 'Conv2DTranspose': paddle.nn.Conv2DTranspose,
'Conv2D': paddle.nn.Conv2D, 'Conv2D': paddle.nn.Conv2D,
...@@ -42,7 +34,6 @@ layer_name_map = { ...@@ -42,7 +34,6 @@ layer_name_map = {
'Softmax': paddle.nn.Softmax, 'Softmax': paddle.nn.Softmax,
'Swish': paddle.nn.Swish, 'Swish': paddle.nn.Swish,
'Tanh': paddle.nn.Tanh, 'Tanh': paddle.nn.Tanh,
'Hardswish': paddle.nn.Hardswish,
'BatchNorm': paddle.nn.BatchNorm, 'BatchNorm': paddle.nn.BatchNorm,
'GroupNorm': paddle.nn.GroupNorm, 'GroupNorm': paddle.nn.GroupNorm,
'LayerNorm': paddle.nn.LayerNorm, 'LayerNorm': paddle.nn.LayerNorm,
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,7 +12,42 @@ ...@@ -12,7 +12,42 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from . import quantize_transpiler import logging
from .quantize_transpiler import *
__all__ = quantize_transpiler.__all__
def get_logger(name, level, fmt=None):
"""
Get logger from logging with given name, level and format without
setting logging basicConfig. For setting basicConfig in paddle
will disable basicConfig setting after import paddle.
Args:
name (str): The logger name.
level (logging.LEVEL): The base level of the logger
fmt (str): Format of logger output
Returns:
logging.Logger: logging logger with given settings
Examples:
.. code-block:: python
import paddle
import logging
logger = paddle.static.log_helper.get_logger(__name__, logging.INFO,
fmt='%(asctime)s-%(levelname)s: %(message)s')
"""
logger = logging.getLogger(name)
logger.setLevel(level)
handler = logging.StreamHandler()
if fmt:
formatter = logging.Formatter(fmt=fmt, datefmt='%a %b %d %H:%M:%S')
handler.setFormatter(formatter)
logger.addHandler(handler)
# stop propagate for propagating may print
# log multiple times
logger.propagate = False
return logger
...@@ -12,50 +12,55 @@ ...@@ -12,50 +12,55 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from ...fluid.contrib.slim.quantization.quantization_pass import ( from .quantization_pass import (
QuantizationTransformPass, QuantizationTransformPass,
) )
from ...fluid.contrib.slim.quantization.quantization_pass import ( from .quantization_pass import (
QuantizationFreezePass, QuantizationFreezePass,
) )
from ...fluid.contrib.slim.quantization.quantization_pass import ( from .quantization_pass import (
ConvertToInt8Pass, ConvertToInt8Pass,
) )
from ...fluid.contrib.slim.quantization.quantization_pass import ( from .quantization_pass import (
TransformForMobilePass, TransformForMobilePass,
) )
from ...fluid.contrib.slim.quantization.quantization_pass import ( from .quantization_pass import (
OutScaleForTrainingPass, OutScaleForTrainingPass,
) )
from ...fluid.contrib.slim.quantization.quantization_pass import ( from .quantization_pass import (
OutScaleForInferencePass, OutScaleForInferencePass,
) )
from ...fluid.contrib.slim.quantization.quantization_pass import ( from .quantization_pass import (
AddQuantDequantPass, AddQuantDequantPass,
) )
from ...fluid.contrib.slim.quantization.quantization_pass import ( from .quantization_pass import (
ReplaceFakeQuantDequantPass, ReplaceFakeQuantDequantPass,
) )
from ...fluid.contrib.slim.quantization.quantization_pass import QuantWeightPass from .quantization_pass import (
from ...fluid.contrib.slim.quantization.quantization_pass import ( QuantWeightPass,
)
from .quantization_pass import (
QuantizationTransformPassV2, QuantizationTransformPassV2,
) )
from ...fluid.contrib.slim.quantization.quantization_pass import ( from .quantization_pass import (
AddQuantDequantPassV2, AddQuantDequantPassV2,
) )
from ...fluid.contrib.slim.quantization.quant_int8_mkldnn_pass import ( from .quantization_pass import (
AddQuantDequantForInferencePass,
)
from .quant_int8_mkldnn_pass import (
QuantInt8MkldnnPass, QuantInt8MkldnnPass,
) )
from ...fluid.contrib.slim.quantization.quant2_int8_mkldnn_pass import ( from .quant2_int8_mkldnn_pass import (
Quant2Int8MkldnnPass, Quant2Int8MkldnnPass,
) )
from ...fluid.contrib.slim.quantization.post_training_quantization import ( from .post_training_quantization import (
PostTrainingQuantization, PostTrainingQuantization,
) )
from ...fluid.contrib.slim.quantization.post_training_quantization import ( from .post_training_quantization import (
PostTrainingQuantizationProgram, PostTrainingQuantizationProgram,
) )
from ...fluid.contrib.slim.quantization.post_training_quantization import ( from .post_training_quantization import (
WeightQuantization, WeightQuantization,
) )
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,25 +12,25 @@ ...@@ -12,25 +12,25 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import time
import sys
import logging import logging
import paddle import sys
import time
import numpy as np
import paddle import paddle
import paddle.fluid as fluid import paddle.static as static
from ....log_helper import get_logger from ..log_helper import get_logger
from .utils import ( from .utils import (
_channelwise_quant_axis1_ops,
bias_correction_w,
calculate_quant_cos_error,
dequant_tensor,
load_variable_data, load_variable_data,
quant_tensor,
set_variable_data, set_variable_data,
stable_sigmoid, stable_sigmoid,
quant_tensor,
dequant_tensor,
_channelwise_quant_axis1_ops,
calculate_quant_cos_error,
bias_correction_w,
) )
_logger = get_logger( _logger = get_logger(
...@@ -42,7 +42,7 @@ ZETA = 1.1 ...@@ -42,7 +42,7 @@ ZETA = 1.1
def compute_soft_rounding(alpha_v): def compute_soft_rounding(alpha_v):
return fluid.layers.clip( return paddle.clip(
paddle.nn.functional.sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA, paddle.nn.functional.sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA,
min=0, min=0,
max=1, max=1,
...@@ -83,11 +83,9 @@ class AdaRoundLoss: ...@@ -83,11 +83,9 @@ class AdaRoundLoss:
return round_loss return round_loss
round_loss = paddle.static.nn.cond( round_loss = static.nn.cond(
warm_start, warm_start,
lambda: fluid.layers.fill_constant( lambda: paddle.full(shape=[1], dtype='float32', fill_value=0.0),
shape=[1], dtype='float32', value=0.0
),
round_loss_fn, round_loss_fn,
) )
...@@ -151,7 +149,7 @@ class AdaRound: ...@@ -151,7 +149,7 @@ class AdaRound:
shape=alpha.shape, shape=alpha.shape,
dtype="float32", dtype="float32",
name=var_name + ".alpha", name=var_name + ".alpha",
default_initializer=fluid.initializer.NumpyArrayInitializer(alpha), default_initializer=paddle.nn.initializer.Assign(alpha),
) )
def _calculate_output_with_adarounded_weights( def _calculate_output_with_adarounded_weights(
...@@ -258,12 +256,12 @@ def run_adaround( ...@@ -258,12 +256,12 @@ def run_adaround(
fetch_op_name = quant_op_out_name fetch_op_name = quant_op_out_name
# build adaround program # build adaround program
exec_strategy = fluid.ExecutionStrategy() exec_strategy = static.ExecutionStrategy()
exec_strategy.num_iteration_per_drop_scope = 1 exec_strategy.num_iteration_per_drop_scope = 1
startup_program = fluid.Program() startup_program = static.Program()
train_program = fluid.Program() train_program = static.Program()
with fluid.program_guard(train_program, startup_program): with static.program_guard(train_program, startup_program):
with fluid.unique_name.guard(): with paddle.utils.unique_name.guard():
# initialize adaround # initialize adaround
adaround = AdaRound( adaround = AdaRound(
scale, scale,
...@@ -273,21 +271,21 @@ def run_adaround( ...@@ -273,21 +271,21 @@ def run_adaround(
weight_op_type=weight_op_type, weight_op_type=weight_op_type,
num_iterations=num_iterations, num_iterations=num_iterations,
) )
orig_out_tensor = fluid.data( orig_out_tensor = static.data(
name='orig_out_tensor', name='orig_out_tensor',
shape=fp32_fetch_list.shape, shape=(-1,) + fp32_fetch_list.shape,
dtype='float32', dtype='float32',
) )
adaround_out_tensor = fluid.data( adaround_out_tensor = static.data(
name='adaround_out_tensor', name='adaround_out_tensor',
shape=fp32_fetch_list.shape, shape=(-1,) + fp32_fetch_list.shape,
dtype='float32', dtype='float32',
) )
beta_tensor = fluid.data( beta_tensor = static.data(
name='beta', shape=[1], dtype='float32' name='beta', shape=[-1, 1], dtype='float32'
) )
warm_start_tensor = fluid.data( warm_start_tensor = static.data(
name='warm_start', shape=[1], dtype='bool' name='warm_start', shape=[-1, 1], dtype='bool'
) )
train_fetches_loss = adaround.get_loss( train_fetches_loss = adaround.get_loss(
...@@ -296,7 +294,7 @@ def run_adaround( ...@@ -296,7 +294,7 @@ def run_adaround(
adaround_out_tensor, adaround_out_tensor,
orig_out_tensor, orig_out_tensor,
) )
optimizer = fluid.optimizer.Adam(learning_rate=lr) optimizer = paddle.optimizer.Adam(learning_rate=lr)
loss = train_fetches_loss['loss'] loss = train_fetches_loss['loss']
optimizer.minimize(loss) optimizer.minimize(loss)
exe.run(startup_program) exe.run(startup_program)
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -14,15 +14,15 @@ ...@@ -14,15 +14,15 @@
import logging import logging
import math import math
import numpy as np import numpy as np
from ....log_helper import get_logger
from ..log_helper import get_logger
_logger = get_logger( _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s' __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
) )
__all__ = ['cal_kl_threshold']
def expand_quantized_bins(quantized_bins, reference_bins): def expand_quantized_bins(quantized_bins, reference_bins):
''' '''
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,43 +12,37 @@ ...@@ -12,43 +12,37 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import logging
import os import os
import re
import math
import shutil import shutil
import logging
import numpy as np import numpy as np
try: try:
from tqdm import tqdm from tqdm import tqdm
except: except:
from .utils import tqdm from .utils import tqdm
from inspect import isgeneratorfunction from inspect import isgeneratorfunction
from .... import io
from .... import core from paddle.fluid.framework import IrGraph, _get_var
from .... import reader
from .... import framework from ... import io, static
from .... import unique_name from ...fluid import reader
from ....executor import global_scope, Executor from ...framework import core
from ....framework import IrGraph from ...utils import unique_name
from ....log_helper import get_logger from ..log_helper import get_logger
from . import utils
from .adaround import run_adaround
from .cal_kl_threshold import cal_kl_threshold
from .quantization_pass import ( from .quantization_pass import (
AddQuantDequantPass,
AddQuantDequantPassV2,
QuantizationFreezePass,
QuantizationTransformPass, QuantizationTransformPass,
QuantizationTransformPassV2, QuantizationTransformPassV2,
QuantizationFreezePass,
QuantWeightPass, QuantWeightPass,
AddQuantDequantPass,
AddQuantDequantPassV2,
) )
from .cal_kl_threshold import cal_kl_threshold
from .adaround import run_adaround
from . import utils
__all__ = [
'PostTrainingQuantization',
'WeightQuantization',
'PostTrainingQuantizationProgram',
]
_logger = get_logger( _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s' __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
...@@ -156,10 +150,10 @@ class PostTrainingQuantization: ...@@ -156,10 +150,10 @@ class PostTrainingQuantization:
Constructor. Constructor.
Args: Args:
executor(fluid.Executor): The executor to load, run and save the executor(static.Executor): The executor to load, run and save the
quantized model. quantized model.
scope(fluid.Scope, optional): The scope of the program, use it to load scope(static.Scope, optional): The scope of the program, use it to load
and save variables. If scope=None, get scope by global_scope(). and save variables. If scope=None, get scope by static.global_scope().
model_dir(str): The path of the fp32 model that will be quantized, model_dir(str): The path of the fp32 model that will be quantized,
and the model and params files are under the path. and the model and params files are under the path.
model_filename(str, optional): The name of file to load the inference model_filename(str, optional): The name of file to load the inference
...@@ -245,10 +239,10 @@ class PostTrainingQuantization: ...@@ -245,10 +239,10 @@ class PostTrainingQuantization:
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle.static as static
from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization from paddle.static.quantization import PostTrainingQuantization
exe = fluid.Executor(fluid.CPUPlace()) exe = static.Executor(paddle.CPUPlace())
model_dir = path/to/fp32_model_params model_dir = path/to/fp32_model_params
# set model_filename as None when the filename is __model__, # set model_filename as None when the filename is __model__,
# otherwise set it as the real filename # otherwise set it as the real filename
...@@ -344,7 +338,7 @@ class PostTrainingQuantization: ...@@ -344,7 +338,7 @@ class PostTrainingQuantization:
# Save input params # Save input params
self._bias_correction = bias_correction self._bias_correction = bias_correction
self._executor = executor self._executor = executor
self._scope = global_scope() if scope is None else scope self._scope = static.global_scope() if scope is None else scope
self._model_dir = model_dir self._model_dir = model_dir
self._model_filename = model_filename self._model_filename = model_filename
self._params_filename = params_filename self._params_filename = params_filename
...@@ -537,22 +531,29 @@ class PostTrainingQuantization: ...@@ -537,22 +531,29 @@ class PostTrainingQuantization:
Args: Args:
save_model_path(str): The path to save the quantized model. save_model_path(str): The path to save the quantized model.
model_filename(str, optional): If the model_filename is None, model_filename(str, optional): If the model_filename is None,
save the model to '__model__'. Otherwise, save the model save the model to 'model.pdmodel' and 'model.pdiparams'. Otherwise, save the model to 'model_name.pdmodel' and
to the specified filename. Default: None. 'model_name.pdiparams". Default: None.
params_filename(str, optional): If the params_filename is None,
save params to separted files. Otherwise, save all params
to the specified filename.
Returns: Returns:
None None
''' '''
io.save_inference_model( model_name = None
dirname=save_model_path, if model_filename is None:
model_filename=model_filename, model_name = "model"
params_filename=params_filename, elif model_filename.endswith(".pdmodel"):
feeded_var_names=self._feed_list, model_name = model_filename.rsplit(".", 1)[0]
target_vars=self._fetch_list, else:
model_name = model_filename
path_prefix = os.path.join(save_model_path, model_name)
feed_vars = [
self._program.global_block().var(name) for name in self._feed_list
]
static.save_inference_model(
path_prefix,
feed_vars,
self._fetch_list,
executor=self._executor, executor=self._executor,
main_program=self._program, program=self._program,
clip_extra=self._clip_extra, clip_extra=self._clip_extra,
) )
_logger.info("The quantized model is saved in " + save_model_path) _logger.info("The quantized model is saved in " + save_model_path)
...@@ -567,8 +568,8 @@ class PostTrainingQuantization: ...@@ -567,8 +568,8 @@ class PostTrainingQuantization:
self._program, self._program,
self._feed_list, self._feed_list,
self._fetch_list, self._fetch_list,
] = io.load_inference_model( ] = static.load_inference_model(
dirname=self._model_dir, self._model_dir,
executor=self._executor, executor=self._executor,
model_filename=self._model_filename, model_filename=self._model_filename,
params_filename=self._params_filename, params_filename=self._params_filename,
...@@ -578,7 +579,7 @@ class PostTrainingQuantization: ...@@ -578,7 +579,7 @@ class PostTrainingQuantization:
self._optimize_fp32_model() self._optimize_fp32_model()
feed_vars = [ feed_vars = [
framework._get_var(str(var_name), self._program) _get_var(str(var_name), self._program)
for var_name in self._feed_list for var_name in self._feed_list
] ]
...@@ -1632,17 +1633,17 @@ class WeightQuantization: ...@@ -1632,17 +1633,17 @@ class WeightQuantization:
# Load model # Load model
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = static.Executor(place)
scope = global_scope() scope = static.global_scope()
[infer_program, feed_list, fetch_list] = io.load_inference_model( [infer_program, feed_list, fetch_list] = static.load_inference_model(
dirname=self._model_dir, self._model_dir,
executor=exe, executor=exe,
model_filename=self._model_filename, model_filename=self._model_filename,
params_filename=self._params_filename, params_filename=self._params_filename,
) )
# Clone and save fp16 weights # Clone and save fp16 weights
save_program = framework.Program() save_program = static.Program()
save_block = save_program.global_block() save_block = save_program.global_block()
save_var_map = {} save_var_map = {}
...@@ -1723,10 +1724,10 @@ class WeightQuantization: ...@@ -1723,10 +1724,10 @@ class WeightQuantization:
""" """
# Load model # Load model
place = core.CPUPlace() place = core.CPUPlace()
exe = Executor(place) exe = static.Executor(place)
scope = global_scope() scope = static.global_scope()
[program, feed_list, fetch_list] = io.load_inference_model( [program, feed_list, fetch_list] = static.load_inference_model(
dirname=self._model_dir, self._model_dir,
executor=exe, executor=exe,
model_filename=self._model_filename, model_filename=self._model_filename,
params_filename=self._params_filename, params_filename=self._params_filename,
...@@ -1758,15 +1759,22 @@ class WeightQuantization: ...@@ -1758,15 +1759,22 @@ class WeightQuantization:
self._weight_channel_wise_abs_max_quantization( self._weight_channel_wise_abs_max_quantization(
scope, place, weight_bits, op, var_name, for_test scope, place, weight_bits, op, var_name, for_test
) )
model_name = None
io.save_inference_model( if save_model_filename is None:
dirname=save_model_dir, model_name = "model"
feeded_var_names=feed_list, elif save_model_filename.endswith(".pdmodel"):
target_vars=fetch_list, model_name = save_model_filename.rsplit(".", 1)[0]
else:
model_name = save_model_filename
path_prefix = os.path.join(save_model_dir, model_name)
feed_vars = [program.global_block().var(name) for name in feed_list]
static.save_inference_model(
path_prefix,
feed_vars,
fetch_list,
executor=exe, executor=exe,
main_program=program, program=program,
model_filename=save_model_filename,
params_filename=save_params_filename,
) )
def _weight_abs_max_quantization( def _weight_abs_max_quantization(
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -13,11 +13,9 @@ ...@@ -13,11 +13,9 @@
# limitations under the License. # limitations under the License.
import numpy as np import numpy as np
from .... import core
from ....framework import IrGraph
from ....framework import _get_paddle_place
__all__ = ['Quant2Int8MkldnnPass'] from ...fluid.framework import IrGraph
from ...framework import _get_paddle_place, core
OpRole = core.op_proto_and_checker_maker.OpRole OpRole = core.op_proto_and_checker_maker.OpRole
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -13,12 +13,9 @@ ...@@ -13,12 +13,9 @@
# limitations under the License. # limitations under the License.
import numpy as np import numpy as np
from .... import core
from ....framework import IrGraph
from ....framework import IrNode
from ....framework import _get_paddle_place
__all__ = ['QuantInt8MkldnnPass'] from ...fluid.framework import IrGraph
from ...framework import _get_paddle_place
class QuantInt8MkldnnPass: class QuantInt8MkldnnPass:
...@@ -40,23 +37,23 @@ class QuantInt8MkldnnPass: ...@@ -40,23 +37,23 @@ class QuantInt8MkldnnPass:
def __init__(self, _scope=None, _place=None): def __init__(self, _scope=None, _place=None):
r""" r"""
Args: Args:
scope(fluid.Scope): scope is used to initialize the new parameters. scope(static.Scope): scope is used to initialize the new parameters.
place(fluid.CPUPlace|str): place is used to initialize the new parameters. place(static.CPUPlace|str): place is used to initialize the new parameters.
When it is string, it can be only 'cpu'. When it is string, it can be only 'cpu'.
Examples: Examples:
.. code-block:: python .. code-block:: python
# The original graph will be rewrite. # The original graph will be rewrite.
import paddle.fluid as fluid import paddle.static as static
from paddle.fluid.contrib.slim.quantization \ from paddle.static.quantization \
import QuantInt8MkldnnPass import QuantInt8MkldnnPass
from paddle.fluid.framework import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid import core from paddle.framework import core
graph = IrGraph(core.Graph(fluid.Program().desc), for_test=False) graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = fluid.CPUPlace() place = static.CPUPlace()
mkldnn_pass = QuantInt8MkldnnPass(fluid.global_scope(), mkldnn_pass = QuantInt8MkldnnPass(static.global_scope(),
place) place)
mkldnn_pass.apply(graph) mkldnn_pass.apply(graph)
""" """
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -13,39 +13,21 @@ ...@@ -13,39 +13,21 @@
# limitations under the License. # limitations under the License.
import collections import collections
import numpy as np import numpy as np
try: try:
from tqdm import tqdm from tqdm import tqdm
except: except:
from .utils import tqdm from .utils import tqdm
from .... import core
from ....framework import IrGraph
from ....framework import IrNode
from ....framework import Operator
from .... import unique_name
from ....framework import Program, program_guard, default_startup_program
from ....data import data
from ....executor import scope_guard
from ....framework import _get_paddle_place
from . import utils
import paddle import paddle
__all__ = [ from ...fluid.framework import IrGraph, IrNode
'QuantizationTransformPass', from ...framework import _get_paddle_place, core
'QuantizationFreezePass', from ...static import Program, data, program_guard, scope_guard
'ConvertToInt8Pass', from ...utils import unique_name
'TransformForMobilePass', from . import utils
'OutScaleForTrainingPass',
'OutScaleForInferencePass',
'AddQuantDequantPass',
'QuantizationTransformPassV2',
'AddQuantDequantPassV2',
'ReplaceFakeQuantDequantPass',
'QuantWeightPass',
'AddQuantDequantForInferencePass',
]
_fake_quant_op_list = [ _fake_quant_op_list = [
'fake_quantize_abs_max', 'fake_quantize_abs_max',
...@@ -137,10 +119,10 @@ class QuantizationTransformPass: ...@@ -137,10 +119,10 @@ class QuantizationTransformPass:
Constructor. Constructor.
Args: Args:
scope(fluid.Scope): When activation use 'range_abs_max' as the quantize scope(static.Scope): When activation use 'range_abs_max' as the quantize
type, this pass will create some new parameters. The scope is used to type, this pass will create some new parameters. The scope is used to
initialize these new parameters. initialize these new parameters.
place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to initialize new place(static.CPUPlace|static.CUDAPlace|str): place is used to initialize new
parameters described above. If it's string, It can be ``cpu``, and ``gpu:x``, parameters described above. If it's string, It can be ``cpu``, and ``gpu:x``,
where ``x`` is the index of the GPUs. where ``x`` is the index of the GPUs.
weight_bits(int): quantization bit number for weights, weight_bits(int): quantization bit number for weights,
...@@ -197,15 +179,15 @@ class QuantizationTransformPass: ...@@ -197,15 +179,15 @@ class QuantizationTransformPass:
Examples: Examples:
.. code-block:: python .. code-block:: python
# The original graph will be rewrite. # The original graph will be rewrite.
import paddle.fluid as fluid import paddle.static as static
from paddle.fluid.contrib.slim.quantization \ from paddle.static.quantization \
import QuantizationTransformPass import QuantizationTransformPass
from paddle.fluid.contrib.slim.graph import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid import core from paddle.framework import core
graph = IrGraph(core.Graph(program.desc), for_test=False) graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = fluid.CPUPlace() place = paddle.CPUPlace()
transform_pass = QuantizationTransformPass(fluid.global_scope(), transform_pass = QuantizationTransformPass(static.global_scope(),
place) place)
transform_pass.apply(graph) transform_pass.apply(graph)
""" """
...@@ -1094,8 +1076,8 @@ class QuantizationFreezePass: ...@@ -1094,8 +1076,8 @@ class QuantizationFreezePass:
and weight will be scaled offline. and weight will be scaled offline.
Args: Args:
scope(fluid.Scope): scope is used to get the weight tensor values. scope(static.Scope): scope is used to get the weight tensor values.
place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to restore the weight tensors. place(static.CPUPlace|static.CUDAPlace|str): place is used to restore the weight tensors.
If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs. If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
bias_correction(bool): whether use bias correction for post-training quantization. bias_correction(bool): whether use bias correction for post-training quantization.
https://arxiv.org/abs/1810.05723. https://arxiv.org/abs/1810.05723.
...@@ -1190,7 +1172,7 @@ class QuantizationFreezePass: ...@@ -1190,7 +1172,7 @@ class QuantizationFreezePass:
) )
quantized_param_v = np.round(quantized_param_v) quantized_param_v = np.round(quantized_param_v)
# Weight bias correction # Weight bias correction
if self._bias_correction == True: if self._bias_correction is True:
quantized_param_v = utils.bias_correction_w( quantized_param_v = utils.bias_correction_w(
param_v, param_v,
quantized_param_v, quantized_param_v,
...@@ -1459,8 +1441,8 @@ class ConvertToInt8Pass: ...@@ -1459,8 +1441,8 @@ class ConvertToInt8Pass:
Convert the weights into int8_t type. Convert the weights into int8_t type.
Args: Args:
scope(fluid.Scope): scope is used to get the weight tensor values. scope(static.Scope): scope is used to get the weight tensor values.
place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to restore the place(static.CPUPlace|static.CUDAPlace|str): place is used to restore the
8bits weight tensors. If it's string, It can be ``cpu``, and ``gpu:x``, 8bits weight tensors. If it's string, It can be ``cpu``, and ``gpu:x``,
where ``x`` is the index of the GPUs. where ``x`` is the index of the GPUs.
quantizable_op_type(list[str]): This input param will be removed latter. The pass quantizable_op_type(list[str]): This input param will be removed latter. The pass
...@@ -1602,8 +1584,8 @@ class OutScaleForTrainingPass: ...@@ -1602,8 +1584,8 @@ class OutScaleForTrainingPass:
These output scales may be used by tensorRT or some other inference engines. These output scales may be used by tensorRT or some other inference engines.
Args: Args:
scope(fluid.Scope): The scope is used to initialize these new parameters. scope(static.Scope): The scope is used to initialize these new parameters.
place(fluid.CPUPlace|fluid.CUDAPlace|str): The place is used to initialize new parameters. place(static.CPUPlace|static.CUDAPlace|str): The place is used to initialize new parameters.
If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the
index of the GPUs. index of the GPUs.
moving_rate(float): The decay coefficient of moving average. The default value is 0.9. moving_rate(float): The decay coefficient of moving average. The default value is 0.9.
...@@ -1764,7 +1746,7 @@ class OutScaleForInferencePass: ...@@ -1764,7 +1746,7 @@ class OutScaleForInferencePass:
These output scales may be used by tensorRT or some other inference engines. These output scales may be used by tensorRT or some other inference engines.
Args: Args:
scope(fluid.Scope): The scope is used to initialize these new parameters. scope(static.Scope): The scope is used to initialize these new parameters.
""" """
self._scope = scope self._scope = scope
self._teller_set = utils.QUANT_SUPPORTED_OP_TYPE_LIST self._teller_set = utils.QUANT_SUPPORTED_OP_TYPE_LIST
...@@ -1856,8 +1838,8 @@ class AddQuantDequantPass: ...@@ -1856,8 +1838,8 @@ class AddQuantDequantPass:
Constructor. Constructor.
Args: Args:
scope(fluid.Scope): The scope is used to initialize these new parameters. scope(static.Scope): The scope is used to initialize these new parameters.
place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to initialize new place(static.CPUPlace|static.CUDAPlace|str): place is used to initialize new
parameters described above. If ``place`` is string, it can be It can be ``cpu`` parameters described above. If ``place`` is string, it can be It can be ``cpu``
or ``gpu:x``, where ``x`` is the index of the GPUs. or ``gpu:x``, where ``x`` is the index of the GPUs.
moving_rate(float, optional): the param for 'quant_dequant_moving_average_abs_max' moving_rate(float, optional): the param for 'quant_dequant_moving_average_abs_max'
...@@ -2452,12 +2434,12 @@ class QuantizationTransformPassV2(QuantizationTransformPass): ...@@ -2452,12 +2434,12 @@ class QuantizationTransformPassV2(QuantizationTransformPass):
.. code-block:: python .. code-block:: python
# The original graph will be rewrite. # The original graph will be rewrite.
import paddle import paddle
from paddle.fluid.contrib.slim.quantization \ from paddle.static.quantization \
import QuantizationTransformPassV2 import QuantizationTransformPassV2
from paddle.fluid.contrib.slim.graph import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid import core from paddle.framework import core
graph = IrGraph(core.Graph(program.desc), for_test=False) graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = paddle.CPUPlace() place = paddle.CPUPlace()
scope = paddle.static.global_scope() scope = paddle.static.global_scope()
transform_pass = QuantizationTransformPassV2(scope, place) transform_pass = QuantizationTransformPassV2(scope, place)
...@@ -2810,12 +2792,12 @@ class AddQuantDequantPassV2: ...@@ -2810,12 +2792,12 @@ class AddQuantDequantPassV2:
.. code-block:: python .. code-block:: python
# The original graph will be rewrite. # The original graph will be rewrite.
import paddle import paddle
from paddle.fluid.contrib.slim.quantization \ from paddle.static.quantization \
import AddQuantDequantPassV2 import AddQuantDequantPassV2
from paddle.fluid.contrib.slim.graph import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid import core from paddle.framework import core
graph = IrGraph(core.Graph(program.desc), for_test=False) graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = paddle.CPUPlace() place = paddle.CPUPlace()
scope = paddle.static.global_scope() scope = paddle.static.global_scope()
add_quant_dequant_pass = AddQuantDequantPassV2(scope, place) add_quant_dequant_pass = AddQuantDequantPassV2(scope, place)
...@@ -2977,12 +2959,12 @@ class ReplaceFakeQuantDequantPass: ...@@ -2977,12 +2959,12 @@ class ReplaceFakeQuantDequantPass:
.. code-block:: python .. code-block:: python
# The original graph will be rewrite. # The original graph will be rewrite.
import paddle import paddle
from paddle.fluid.contrib.slim.quantization \ from paddle.static.quantization \
import ReplaceFakeQuantDequantPass import ReplaceFakeQuantDequantPass
from paddle.fluid.contrib.slim.graph import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid import core from paddle.framework import core
graph = IrGraph(core.Graph(program.desc), for_test=False) graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = paddle.CPUPlace() place = paddle.CPUPlace()
scope = paddle.static.global_scope() scope = paddle.static.global_scope()
replace_pass = ReplaceFakeQuantDequantPass(scope, place) replace_pass = ReplaceFakeQuantDequantPass(scope, place)
...@@ -3133,12 +3115,12 @@ class QuantWeightPass: ...@@ -3133,12 +3115,12 @@ class QuantWeightPass:
.. code-block:: python .. code-block:: python
# The original graph will be rewrite. # The original graph will be rewrite.
import paddle import paddle
from paddle.fluid.contrib.slim.quantization \ from paddle.static.quantization \
import QuantWeightPass import QuantWeightPass
from paddle.fluid.contrib.slim.graph import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid import core from paddle.framework import core
graph = IrGraph(core.Graph(program.desc), for_test=False) graph = IrGraph(core.Graph(paddle.static.Program().desc), for_test=False)
place = paddle.CPUPlace() place = paddle.CPUPlace()
scope = paddle.static.global_scope() scope = paddle.static.global_scope()
quant_weight_pass = QuantWeightPass(scope, place) quant_weight_pass = QuantWeightPass(scope, place)
...@@ -3207,7 +3189,7 @@ class QuantWeightPass: ...@@ -3207,7 +3189,7 @@ class QuantWeightPass:
bits_length, bits_length,
onnx_format=True, onnx_format=True,
) )
if self._bias_correction == True: if self._bias_correction is True:
quantized_param_v = utils.bias_correction_w( quantized_param_v = utils.bias_correction_w(
param_v, param_v,
quantized_param_v, quantized_param_v,
...@@ -3264,7 +3246,7 @@ class AddQuantDequantForInferencePass: ...@@ -3264,7 +3246,7 @@ class AddQuantDequantForInferencePass:
def __init__(self, scope, place, quant_bits=8): def __init__(self, scope, place, quant_bits=8):
""" """
Args: Args:
scope(fluid.Scope): The scope is used to initialize these new parameters. scope(static.Scope): The scope is used to initialize these new parameters.
place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors. place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
If it's string, it can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs. If it's string, it can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
quant_bits(int, optional): quantization bit number for weight. Default is 8. quant_bits(int, optional): quantization bit number for weight. Default is 8.
......
...@@ -250,7 +250,6 @@ if(WIN32) ...@@ -250,7 +250,6 @@ if(WIN32)
list(REMOVE_ITEM TEST_OPS test_post_training_quantization_lstm_model) list(REMOVE_ITEM TEST_OPS test_post_training_quantization_lstm_model)
list(REMOVE_ITEM TEST_OPS test_imperative_ptq) list(REMOVE_ITEM TEST_OPS test_imperative_ptq)
list(REMOVE_ITEM TEST_OPS test_weight_quantization_mobilenetv1) list(REMOVE_ITEM TEST_OPS test_weight_quantization_mobilenetv1)
list(REMOVE_ITEM TEST_OPS test_quantize_transpiler_v2)
list(REMOVE_ITEM TEST_OPS test_imperative_qat_amp) list(REMOVE_ITEM TEST_OPS test_imperative_qat_amp)
list(REMOVE_ITEM TEST_OPS test_imperative_qat_lsq) list(REMOVE_ITEM TEST_OPS test_imperative_qat_lsq)
list(REMOVE_ITEM TEST_OPS test_imperative_qat_matmul) list(REMOVE_ITEM TEST_OPS test_imperative_qat_matmul)
......
...@@ -91,17 +91,18 @@ Having gathered all the data needed for quantization we apply the `cpu_quantize_ ...@@ -91,17 +91,18 @@ Having gathered all the data needed for quantization we apply the `cpu_quantize_
The code snipped shows how the `Quant2Int8MkldnnPass` can be applied to a model graph: The code snipped shows how the `Quant2Int8MkldnnPass` can be applied to a model graph:
```python ```python
import paddle.fluid as fluid import paddle
from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass import paddle.static as static
from paddle.static.quantization import Quant2Int8MkldnnPass
from paddle.fluid.framework import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid import core from paddle.framework import core
# Create the IrGraph by Program # Create the IrGraph by Program
graph = IrGraph(core.Graph(fluid.Program().desc), for_test=False) graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = fluid.CPUPlace() place = paddle.CPUPlace()
# Convert the IrGraph to MKL-DNN supported INT8 IrGraph using the # Convert the IrGraph to MKL-DNN supported INT8 IrGraph using the
# Quant2Int8MkldnnPass. It requires a list of operators to be quantized # Quant2Int8MkldnnPass. It requires a list of operators to be quantized
mkldnn_pass = Quant2Int8MkldnnPass({'conv2d', 'pool2d'}, fluid.global_scope(), place, fluid.core, False) mkldnn_pass = Quant2Int8MkldnnPass({'conv2d', 'pool2d'}, static.global_scope(), place, core, False)
# Apply Quant2Int8MkldnnPass to IrGraph # Apply Quant2Int8MkldnnPass to IrGraph
mkldnn_pass.apply(graph) mkldnn_pass.apply(graph)
...@@ -263,7 +264,7 @@ The following options are also accepted: ...@@ -263,7 +264,7 @@ The following options are also accepted:
```bash ```bash
cd /PATH/TO/PADDLE cd /PATH/TO/PADDLE
OMP_NUM_THREADS=28 FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py --quant_model=/PATH/TO/DOWNLOADED/QUANT/MODEL --fp32_model=/PATH/TO/DOWNLOADED/FP32/MODEL --infer_data=$HOME/.cache/paddle/dataset/int8/download/int8_full_val.bin --batch_size=50 --batch_num=1000 --acc_diff_threshold=0.01 --ops_to_quantize="conv2d,pool2d" OMP_NUM_THREADS=28 FLAGS_use_mkldnn=true python python/paddle/static/quantization/slim/tests/quant2_int8_image_classification_comparison.py --quant_model=/PATH/TO/DOWNLOADED/QUANT/MODEL --fp32_model=/PATH/TO/DOWNLOADED/FP32/MODEL --infer_data=$HOME/.cache/paddle/dataset/int8/download/int8_full_val.bin --batch_size=50 --batch_num=1000 --acc_diff_threshold=0.01 --ops_to_quantize="conv2d,pool2d"
``` ```
> Notes: Due to a large amount of images in the `int8_full_val.bin` dataset (50 000), the accuracy benchmark may last long. To accelerate accuracy measuring, it is recommended to set `OMP_NUM_THREADS` to the maximum number of physical cores available on the server. > Notes: Due to a large amount of images in the `int8_full_val.bin` dataset (50 000), the accuracy benchmark may last long. To accelerate accuracy measuring, it is recommended to set `OMP_NUM_THREADS` to the maximum number of physical cores available on the server.
...@@ -276,7 +277,7 @@ To reproduce the performance results, the environment variable `OMP_NUM_THREADS= ...@@ -276,7 +277,7 @@ To reproduce the performance results, the environment variable `OMP_NUM_THREADS=
```bash ```bash
cd /PATH/TO/PADDLE/build cd /PATH/TO/PADDLE/build
python ../python/paddle/fluid/contrib/slim/tests/save_quant_model.py --quant_model_path=/PATH/TO/DOWNLOADED/QUANT/MODEL --int8_model_save_path=/PATH/TO/SAVE/QUANT/INT8/MODEL --ops_to_quantize="conv2d,pool2d" python ../python/paddle/static/quantization/slim/tests/save_quant_model.py --quant_model_path=/PATH/TO/DOWNLOADED/QUANT/MODEL --int8_model_save_path=/PATH/TO/SAVE/QUANT/INT8/MODEL --ops_to_quantize="conv2d,pool2d"
``` ```
2. Run the C-API test for performance benchmark. 2. Run the C-API test for performance benchmark.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
# copyright (c) 2020 paddlepaddle authors. all rights reserved. # copyright (c) 2022 paddlepaddle authors. all rights reserved.
# #
# licensed under the apache license, version 2.0 (the "license"); # licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license. # you may not use this file except in compliance with the license.
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import unittest import argparse
import os import os
import sys import sys
import argparse import unittest
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
import paddle import paddle
from paddle.fluid.framework import IrGraph
from paddle.framework import core
paddle.enable_static() paddle.enable_static()
...@@ -47,29 +47,32 @@ def parse_args(): ...@@ -47,29 +47,32 @@ def parse_args():
def generate_dot_for_model(model_path, save_graph_dir, save_graph_name): def generate_dot_for_model(model_path, save_graph_dir, save_graph_name):
place = fluid.CPUPlace() place = paddle.CPUPlace()
exe = fluid.Executor(place) exe = paddle.static.Executor(place)
inference_scope = fluid.executor.global_scope() inference_scope = paddle.static.global_scope()
with fluid.scope_guard(inference_scope): with paddle.static.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')): if os.path.exists(os.path.join(model_path, '__model__')):
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model(model_path, exe) ] = paddle.fluid.io.load_inference_model(model_path, exe)
else: else:
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model( ] = paddle.static.load_inference_model(
model_path, exe, 'model', 'params' model_path,
exe,
model_filename='model',
params_filename='params',
) )
graph = IrGraph(core.Graph(inference_program.desc), for_test=True) graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
if not os.path.exists(save_graph_dir): if not os.path.exists(save_graph_dir):
os.makedirs(save_graph_dir) os.makedirs(save_graph_dir)
model_name = os.path.basename(os.path.normpath(save_graph_dir)) model_name = os.path.basename(os.path.normpath(save_graph_dir))
if save_graph_name is '': if save_graph_name == '':
save_graph_name = model_name save_graph_name = model_name
graph.draw(save_graph_dir, save_graph_name, graph.all_op_nodes()) graph.draw(save_graph_dir, save_graph_name, graph.all_op_nodes())
print( print(
......
...@@ -11,18 +11,27 @@ ...@@ -11,18 +11,27 @@
# without warranties or conditions of any kind, either express or implied. # without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import numpy as np
import logging import logging
import paddle import numpy as np
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.nn import Sequential
from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
from paddle.nn import BatchNorm1D
from paddle.fluid.log_helper import get_logger import paddle
from paddle.framework import ParamAttr
from paddle.nn import (
BatchNorm1D,
BatchNorm2D,
Conv2D,
LeakyReLU,
Linear,
MaxPool2D,
PReLU,
ReLU,
ReLU6,
Sequential,
Sigmoid,
Softmax,
)
from paddle.static.log_helper import get_logger
_logger = get_logger( _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s' __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
...@@ -86,18 +95,18 @@ def train_lenet(lenet, reader, optimizer): ...@@ -86,18 +95,18 @@ def train_lenet(lenet, reader, optimizer):
return loss_list return loss_list
class ImperativeLenet(fluid.dygraph.Layer): class ImperativeLenet(paddle.nn.Layer):
def __init__(self, num_classes=10): def __init__(self, num_classes=10):
super().__init__() super().__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") conv2d_w1_attr = ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") conv2d_w2_attr = ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1") fc_w1_attr = ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2") fc_w2_attr = ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3") fc_w3_attr = ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") conv2d_b2_attr = ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1") fc_b1_attr = ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2") fc_b2_attr = ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3") fc_b3_attr = ParamAttr(name="fc_b_3")
self.features = Sequential( self.features = Sequential(
Conv2D( Conv2D(
in_channels=1, in_channels=1,
...@@ -155,26 +164,26 @@ class ImperativeLenet(fluid.dygraph.Layer): ...@@ -155,26 +164,26 @@ class ImperativeLenet(fluid.dygraph.Layer):
x = self.quant_stub(inputs) x = self.quant_stub(inputs)
x = self.features(x) x = self.features(x)
x = paddle.flatten(x, 1, -1) x = paddle.flatten(x, 1)
x = self.add(x, paddle.to_tensor(0.0)) # For CI x = self.add(x, paddle.to_tensor(0.0)) # For CI
x = self.fc(x) x = self.fc(x)
return x return x
class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer): class ImperativeLenetWithSkipQuant(paddle.nn.Layer):
def __init__(self, num_classes=10): def __init__(self, num_classes=10):
super().__init__() super().__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") conv2d_w1_attr = ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") conv2d_w2_attr = ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1") fc_w1_attr = ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2") fc_w2_attr = ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3") fc_w3_attr = ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") conv2d_b1_attr = ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") conv2d_b2_attr = ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1") fc_b1_attr = ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2") fc_b2_attr = ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3") fc_b3_attr = ParamAttr(name="fc_b_3")
self.conv2d_0 = Conv2D( self.conv2d_0 = Conv2D(
in_channels=1, in_channels=1,
out_channels=6, out_channels=6,
...@@ -240,8 +249,7 @@ class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer): ...@@ -240,8 +249,7 @@ class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
x = self.relu6_0(x) x = self.relu6_0(x)
x = self.pool2d_1(x) x = self.pool2d_1(x)
x = paddle.flatten(x, 1, -1) x = paddle.flatten(x, 1)
x = self.linear_0(x) x = self.linear_0(x)
x = self.leaky_relu_0(x) x = self.leaky_relu_0(x)
x = self.linear_1(x) x = self.linear_1(x)
...@@ -252,7 +260,7 @@ class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer): ...@@ -252,7 +260,7 @@ class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
return x return x
class ImperativeLinearBn(fluid.dygraph.Layer): class ImperativeLinearBn(paddle.nn.Layer):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
...@@ -284,7 +292,7 @@ class ImperativeLinearBn(fluid.dygraph.Layer): ...@@ -284,7 +292,7 @@ class ImperativeLinearBn(fluid.dygraph.Layer):
return x return x
class ImperativeLinearBn_hook(fluid.dygraph.Layer): class ImperativeLinearBn_hook(paddle.nn.Layer):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
......
...@@ -12,19 +12,20 @@ ...@@ -12,19 +12,20 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import unittest
import os
import sys
import argparse import argparse
import logging import logging
import os
import struct import struct
import numpy as np import sys
import time import time
import unittest
import numpy as np
import paddle import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass from paddle.framework import core
from paddle.fluid import core from paddle.static.quantization import Quant2Int8MkldnnPass
paddle.enable_static() paddle.enable_static()
...@@ -185,23 +186,26 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): ...@@ -185,23 +186,26 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase):
target='quant', target='quant',
): ):
assert target in ['quant', 'int8', 'fp32'] assert target in ['quant', 'int8', 'fp32']
place = fluid.CPUPlace() place = paddle.CPUPlace()
exe = fluid.Executor(place) exe = paddle.static.Executor(place)
inference_scope = fluid.executor.global_scope() inference_scope = paddle.static.global_scope()
with fluid.scope_guard(inference_scope): with paddle.static.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')): if os.path.exists(os.path.join(model_path, '__model__')):
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model(model_path, exe) ] = paddle.fluid.io.load_inference_model(model_path, exe)
else: else:
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model( ] = paddle.static.load_inference_model(
model_path, exe, 'model', 'params' model_path,
exe,
model_filename='model',
params_filename='params',
) )
graph = IrGraph(core.Graph(inference_program.desc), for_test=True) graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
...@@ -359,7 +363,7 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase): ...@@ -359,7 +363,7 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase):
return set(map(int, string.split(','))) return set(map(int, string.split(',')))
def test_graph_transformation(self): def test_graph_transformation(self):
if not fluid.core.is_compiled_with_mkldnn(): if not core.is_compiled_with_mkldnn():
return return
quant_model_path = test_case_args.quant_model quant_model_path = test_case_args.quant_model
......
...@@ -13,15 +13,17 @@ ...@@ -13,15 +13,17 @@
# limitations under the License. # limitations under the License.
import argparse import argparse
import numpy as np
import struct import struct
import sys import sys
import time import time
import unittest import unittest
from paddle import fluid
from paddle.fluid.core import AnalysisConfig, create_paddle_predictor import numpy as np
from save_quant_model import transform_and_save_int8_model from save_quant_model import transform_and_save_int8_model
import paddle
from paddle.framework import core
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
...@@ -80,17 +82,19 @@ class TestLstmModelPTQ(unittest.TestCase): ...@@ -80,17 +82,19 @@ class TestLstmModelPTQ(unittest.TestCase):
[len(feat) // 4 // 8, 8] [len(feat) // 4 // 8, 8]
) )
lod_feat = [feat.shape[0]] lod_feat = [feat.shape[0]]
minputs = fluid.create_lod_tensor(feat, [lod_feat], place) minputs = paddle.fluid.create_lod_tensor(
feat, [lod_feat], place
)
infer_data = fluid.core.PaddleTensor() infer_data = core.PaddleTensor()
infer_data.lod = minputs.lod() infer_data.lod = minputs.lod()
infer_data.data = fluid.core.PaddleBuf(np.array(minputs)) infer_data.data = core.PaddleBuf(np.array(minputs))
infer_data.shape = minputs.shape() infer_data.shape = minputs.shape()
infer_data.dtype = fluid.core.PaddleDType.FLOAT32 infer_data.dtype = core.PaddleDType.FLOAT32
infer_label = fluid.core.PaddleTensor() infer_label = core.PaddleTensor()
infer_label.data = fluid.core.PaddleBuf(np.array(label)) infer_label.data = core.PaddleBuf(np.array(label))
infer_label.shape = label.shape infer_label.shape = label.shape
infer_label.dtype = fluid.core.PaddleDType.INT32 infer_label.dtype = core.PaddleDType.INT32
data.append([infer_data, infer_label]) data.append([infer_data, infer_label])
warmup_data = data[:1] warmup_data = data[:1]
inputs = data[1:] inputs = data[1:]
...@@ -105,7 +109,7 @@ class TestLstmModelPTQ(unittest.TestCase): ...@@ -105,7 +109,7 @@ class TestLstmModelPTQ(unittest.TestCase):
use_analysis=False, use_analysis=False,
enable_ptq=False, enable_ptq=False,
): ):
config = AnalysisConfig(model_path) config = core.AnalysisConfig(model_path)
config.set_cpu_math_library_num_threads(num_threads) config.set_cpu_math_library_num_threads(num_threads)
if use_analysis: if use_analysis:
config.disable_gpu() config.disable_gpu()
...@@ -132,7 +136,7 @@ class TestLstmModelPTQ(unittest.TestCase): ...@@ -132,7 +136,7 @@ class TestLstmModelPTQ(unittest.TestCase):
use_analysis=False, use_analysis=False,
enable_ptq=False, enable_ptq=False,
): ):
place = fluid.CPUPlace() place = paddle.CPUPlace()
warmup_data, inputs = self.get_warmup_tensor(data_path, place) warmup_data, inputs = self.get_warmup_tensor(data_path, place)
warmup_data = [item[0] for item in warmup_data] warmup_data = [item[0] for item in warmup_data]
config = self.set_config( config = self.set_config(
...@@ -144,7 +148,7 @@ class TestLstmModelPTQ(unittest.TestCase): ...@@ -144,7 +148,7 @@ class TestLstmModelPTQ(unittest.TestCase):
enable_ptq, enable_ptq,
) )
predictor = create_paddle_predictor(config) predictor = core.create_paddle_predictor(config)
data = [item[0] for item in inputs] data = [item[0] for item in inputs]
label = np.array([item[1] for item in inputs]) label = np.array([item[1] for item in inputs])
...@@ -197,7 +201,7 @@ class TestLstmModelPTQ(unittest.TestCase): ...@@ -197,7 +201,7 @@ class TestLstmModelPTQ(unittest.TestCase):
return hx_acc, ctc_acc, fps return hx_acc, ctc_acc, fps
def test_lstm_model(self): def test_lstm_model(self):
if not fluid.core.is_compiled_with_mkldnn(): if not core.is_compiled_with_mkldnn():
return return
fp32_model = test_case_args.fp32_model fp32_model = test_case_args.fp32_model
......
...@@ -12,18 +12,19 @@ ...@@ -12,18 +12,19 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import unittest
import os
import sys
import argparse import argparse
import logging import logging
import numpy as np import os
import sys
import time import time
import unittest
import numpy as np
import paddle import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass from paddle.framework import core
from paddle.fluid import core from paddle.static.quantization import Quant2Int8MkldnnPass
paddle.enable_static() paddle.enable_static()
...@@ -158,23 +159,26 @@ class QuantInt8NLPComparisonTest(unittest.TestCase): ...@@ -158,23 +159,26 @@ class QuantInt8NLPComparisonTest(unittest.TestCase):
target='quant', target='quant',
): ):
assert target in ['quant', 'int8', 'fp32'] assert target in ['quant', 'int8', 'fp32']
place = fluid.CPUPlace() place = paddle.CPUPlace()
exe = fluid.Executor(place) exe = paddle.static.Executor(place)
inference_scope = fluid.executor.global_scope() inference_scope = paddle.static.global_scope()
with fluid.scope_guard(inference_scope): with paddle.static.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')): if os.path.exists(os.path.join(model_path, '__model__')):
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model(model_path, exe) ] = paddle.fluid.io.load_inference_model(model_path, exe)
else: else:
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model( ] = paddle.static.load_inference_model(
model_path, exe, 'model', 'params' model_path,
exe,
model_filename='model',
params_filename='params',
) )
graph = IrGraph(core.Graph(inference_program.desc), for_test=True) graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
...@@ -296,7 +300,7 @@ class QuantInt8NLPComparisonTest(unittest.TestCase): ...@@ -296,7 +300,7 @@ class QuantInt8NLPComparisonTest(unittest.TestCase):
return set(map(int, string.split(','))) return set(map(int, string.split(',')))
def test_graph_transformation(self): def test_graph_transformation(self):
if not fluid.core.is_compiled_with_mkldnn(): if not core.is_compiled_with_mkldnn():
return return
quant_model_path = test_case_args.quant_model quant_model_path = test_case_args.quant_model
......
...@@ -12,19 +12,20 @@ ...@@ -12,19 +12,20 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import unittest
import os
import sys
import argparse import argparse
import logging import logging
import os
import struct import struct
import numpy as np import sys
import time import time
import unittest
import numpy as np
import paddle import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import QuantInt8MkldnnPass from paddle.framework import core
from paddle.fluid import core from paddle.static.quantization import QuantInt8MkldnnPass
paddle.enable_static() paddle.enable_static()
...@@ -163,23 +164,26 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase): ...@@ -163,23 +164,26 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase):
skip_batch_num=0, skip_batch_num=0,
transform_to_int8=False, transform_to_int8=False,
): ):
place = fluid.CPUPlace() place = paddle.CPUPlace()
exe = fluid.Executor(place) exe = paddle.static.Executor(place)
inference_scope = fluid.executor.global_scope() inference_scope = paddle.static.global_scope()
with fluid.scope_guard(inference_scope): with paddle.static.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')): if os.path.exists(os.path.join(model_path, '__model__')):
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model(model_path, exe) ] = paddle.fluid.io.load_inference_model(model_path, exe)
else: else:
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model( ] = paddle.static.load_inference_model(
model_path, exe, 'model', 'params' model_path,
exe,
model_filename='model',
params_filename='params',
) )
graph = IrGraph(core.Graph(inference_program.desc), for_test=True) graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
...@@ -298,7 +302,7 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase): ...@@ -298,7 +302,7 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase):
assert fp32_acc1 - int8_acc1 <= threshold assert fp32_acc1 - int8_acc1 <= threshold
def test_graph_transformation(self): def test_graph_transformation(self):
if not fluid.core.is_compiled_with_mkldnn(): if not core.is_compiled_with_mkldnn():
return return
quant_model_path = test_case_args.quant_model quant_model_path = test_case_args.quant_model
......
...@@ -12,15 +12,15 @@ ...@@ -12,15 +12,15 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import unittest import argparse
import os import os
import sys import sys
import argparse import unittest
import paddle import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass from paddle.framework import core
from paddle.fluid import core from paddle.static.quantization import Quant2Int8MkldnnPass
paddle.enable_static() paddle.enable_static()
...@@ -93,35 +93,41 @@ def transform_and_save_int8_model( ...@@ -93,35 +93,41 @@ def transform_and_save_int8_model(
debug=False, debug=False,
quant_model_filename='', quant_model_filename='',
quant_params_filename='', quant_params_filename='',
save_model_filename="__model__", save_model_filename="model",
save_params_filename=None, save_params_filename=None,
): ):
place = fluid.CPUPlace() place = paddle.CPUPlace()
exe = fluid.Executor(place) exe = paddle.static.Executor(place)
inference_scope = fluid.executor.global_scope() inference_scope = paddle.static.global_scope()
with fluid.scope_guard(inference_scope): with paddle.static.scope_guard(inference_scope):
if not quant_model_filename: if not quant_model_filename:
if os.path.exists(os.path.join(original_path, '__model__')): if os.path.exists(os.path.join(original_path, '__model__')):
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model(original_path, exe) ] = paddle.fluid.io.load_inference_model(original_path, exe)
else: else:
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model( ] = paddle.static.load_inference_model(
original_path, exe, 'model', 'params' original_path,
exe,
model_filename='model',
params_filename='params',
) )
else: else:
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model( ] = paddle.static.load_inference_model(
original_path, exe, quant_model_filename, quant_params_filename original_path,
exe,
model_filename=quant_model_filename,
params_filename=quant_params_filename,
) )
ops_to_quantize_set = set() ops_to_quantize_set = set()
...@@ -147,15 +153,18 @@ def transform_and_save_int8_model( ...@@ -147,15 +153,18 @@ def transform_and_save_int8_model(
) )
graph = transform_to_mkldnn_int8_pass.apply(graph) graph = transform_to_mkldnn_int8_pass.apply(graph)
inference_program = graph.to_program() inference_program = graph.to_program()
with fluid.scope_guard(inference_scope): with paddle.static.scope_guard(inference_scope):
fluid.io.save_inference_model( path_prefix = os.path.join(save_path, save_model_filename)
save_path, feed_vars = [
feed_target_names, inference_program.global_block().var(name)
for name in feed_target_names
]
paddle.static.save_inference_model(
path_prefix,
feed_vars,
fetch_targets, fetch_targets,
exe, executor=exe,
inference_program, program=inference_program,
model_filename=save_model_filename,
params_filename=save_params_filename,
) )
print( print(
"Success! INT8 model obtained from the Quant model can be found at {}\n".format( "Success! INT8 model obtained from the Quant model can be found at {}\n".format(
......
...@@ -13,12 +13,13 @@ ...@@ -13,12 +13,13 @@
# limitations under the license. # limitations under the license.
import os import os
import numpy as np
import unittest import unittest
import numpy as np
import paddle import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph from paddle.fluid.framework import IrGraph
from paddle.fluid import core from paddle.framework import core
paddle.enable_static() paddle.enable_static()
...@@ -27,63 +28,68 @@ os.environ["CPU_NUM"] = "1" ...@@ -27,63 +28,68 @@ os.environ["CPU_NUM"] = "1"
def conv_block(): def conv_block():
img = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32') img = paddle.static.data(
label = fluid.layers.data(name='label', shape=[1], dtype='int64') name='image', shape=[-1, 1, 28, 28], dtype='float32'
conv_pool_1 = fluid.nets.simple_img_conv_pool( )
label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
conv_out_1 = paddle.static.nn.conv2d(
input=img, input=img,
filter_size=5, filter_size=5,
num_filters=20, num_filters=20,
pool_size=2, act='relu',
pool_stride=2, )
act="relu", conv_pool_1 = paddle.nn.functional.max_pool2d(
conv_out_1, kernel_size=2, stride=2
) )
conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1) conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
conv_out_2 = paddle.static.nn.conv2d(
input=conv_pool_1, input=conv_pool_1,
filter_size=5, filter_size=5,
num_filters=50, num_filters=20,
pool_size=2, act='relu',
pool_stride=2, )
act="relu", conv_pool_2 = paddle.nn.functional.max_pool2d(
conv_out_2, kernel_size=2, stride=2
) )
prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') prediction = paddle.static.nn.fc(
loss = paddle.nn.functional.cross_entropy( x=conv_pool_2, size=10, activation='softmax'
input=prediction, label=label, reduction='none', use_softmax=False
) )
loss = paddle.nn.functional.cross_entropy(input=prediction, label=label)
avg_loss = paddle.mean(loss) avg_loss = paddle.mean(loss)
return [img, label], avg_loss return [img, label], avg_loss
class TestGraph(unittest.TestCase): class TestGraph(unittest.TestCase):
def graph_apis(self, use_cuda=False, for_ci=True): def graph_apis(self, use_cuda=False, for_ci=True):
main = fluid.Program() main = paddle.static.Program()
startup = fluid.Program() startup = paddle.static.Program()
with fluid.unique_name.guard(): with paddle.utils.unique_name.guard():
with fluid.program_guard(main, startup): with paddle.static.program_guard(main, startup):
feeds, loss = conv_block() feeds, loss = conv_block()
opt = fluid.optimizer.Adam(learning_rate=0.001) opt = paddle.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss) opt.minimize(loss)
graph = IrGraph(core.Graph(main.desc), for_test=False) graph = IrGraph(core.Graph(main.desc), for_test=False)
backup_graph = graph.clone() backup_graph = graph.clone()
self.assertEqual(len(graph.all_nodes()), len(backup_graph.all_nodes())) self.assertEqual(len(graph.all_nodes()), len(backup_graph.all_nodes()))
build_strategy = fluid.BuildStrategy() build_strategy = paddle.static.BuildStrategy()
build_strategy.memory_optimize = False build_strategy.memory_optimize = False
build_strategy.enable_inplace = False build_strategy.enable_inplace = False
origin_binary = fluid.CompiledProgram(graph.graph).with_data_parallel( origin_binary = paddle.static.CompiledProgram(
loss_name=loss.name, build_strategy=build_strategy graph.graph
) ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
backup_binary = fluid.CompiledProgram( backup_binary = paddle.static.CompiledProgram(
backup_graph.graph backup_graph.graph
).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy) ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
exe = fluid.Executor(place) exe = paddle.static.Executor(place)
exe.run(startup) exe.run(startup)
iters = 5 iters = 5
batch_size = 8 batch_size = 8
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=batch_size paddle.dataset.mnist.train(), batch_size=batch_size
) )
feeder = fluid.DataFeeder(feed_list=feeds, place=place) feeder = paddle.fluid.DataFeeder(feed_list=feeds, place=place)
def _train(binary): def _train(binary):
for _ in range(iters): for _ in range(iters):
...@@ -105,17 +111,29 @@ class TestGraph(unittest.TestCase): ...@@ -105,17 +111,29 @@ class TestGraph(unittest.TestCase):
var.set(var_array, place) var.set(var_array, place)
sum_before = np.sum( sum_before = np.sum(
np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor()) np.array(
paddle.static.global_scope()
.find_var('conv2d_1.w_0')
.get_tensor()
) )
fluid.io._save_persistable_nodes(exe, checkponit_dir, graph) )
_set_zero('conv2d_1.w_0', fluid.global_scope(), place) paddle.fluid.io._save_persistable_nodes(exe, checkponit_dir, graph)
_set_zero('conv2d_1.w_0', paddle.static.global_scope(), place)
set_after = np.sum( set_after = np.sum(
np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor()) np.array(
paddle.static.global_scope()
.find_var('conv2d_1.w_0')
.get_tensor()
)
) )
self.assertEqual(set_after, 0) self.assertEqual(set_after, 0)
fluid.io._load_persistable_nodes(exe, checkponit_dir, graph) paddle.fluid.io._load_persistable_nodes(exe, checkponit_dir, graph)
sum_after = np.sum( sum_after = np.sum(
np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor()) np.array(
paddle.static.global_scope()
.find_var('conv2d_1.w_0')
.get_tensor()
)
) )
self.assertEqual(sum_before, sum_after) self.assertEqual(sum_before, sum_after)
...@@ -144,7 +162,7 @@ class TestGraph(unittest.TestCase): ...@@ -144,7 +162,7 @@ class TestGraph(unittest.TestCase):
self.graph_apis(use_cuda=False, for_ci=True) self.graph_apis(use_cuda=False, for_ci=True)
def test_graph_apis_cuda(self): def test_graph_apis_cuda(self):
if fluid.core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
self.graph_apis(use_cuda=True, for_ci=True) self.graph_apis(use_cuda=True, for_ci=True)
......
...@@ -13,38 +13,31 @@ ...@@ -13,38 +13,31 @@
# limitations under the license. # limitations under the license.
import os import os
import numpy as np
import random
import unittest
import logging
import warnings
import tempfile import tempfile
import unittest
import numpy as np
from imperative_test_utils import fix_model_dict, train_lenet
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers from paddle.framework import core, set_flags
from paddle.fluid import core from paddle.nn import (
from paddle.fluid.optimizer import AdamOptimizer BatchNorm2D,
from paddle.fluid.framework import IrGraph Conv2D,
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware Linear,
from paddle.nn import Sequential MaxPool2D,
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX Sequential,
from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, PReLU Softmax,
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D )
from paddle.fluid.log_helper import get_logger from paddle.nn.layer import LeakyReLU, PReLU, ReLU, Sigmoid
from paddle.fluid.dygraph import nn from paddle.quantization import ImperativeQuantAware
from imperative_test_utils import fix_model_dict, train_lenet
paddle.enable_static() paddle.enable_static()
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True}) set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
def get_vaild_warning_num(warning, w): def get_vaild_warning_num(warning, w):
...@@ -55,18 +48,18 @@ def get_vaild_warning_num(warning, w): ...@@ -55,18 +48,18 @@ def get_vaild_warning_num(warning, w):
return num return num
class ImperativeLenet(fluid.dygraph.Layer): class ImperativeLenet(paddle.nn.Layer):
def __init__(self, num_classes=10): def __init__(self, num_classes=10):
super().__init__() super().__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") conv2d_w1_attr = paddle.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") conv2d_w2_attr = paddle.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1") fc_w1_attr = paddle.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2") fc_w2_attr = paddle.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3") fc_w3_attr = paddle.ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") conv2d_b2_attr = paddle.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1") fc_b1_attr = paddle.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2") fc_b2_attr = paddle.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3") fc_b3_attr = paddle.ParamAttr(name="fc_b_3")
self.features = Sequential( self.features = Sequential(
Conv2D( Conv2D(
in_channels=1, in_channels=1,
...@@ -121,7 +114,7 @@ class ImperativeLenet(fluid.dygraph.Layer): ...@@ -121,7 +114,7 @@ class ImperativeLenet(fluid.dygraph.Layer):
def forward(self, inputs): def forward(self, inputs):
x = self.features(inputs) x = self.features(inputs)
x = paddle.flatten(x, 1, -1) x = paddle.flatten(x, 1)
x = self.fc(x) x = self.fc(x)
return x return x
...@@ -152,8 +145,8 @@ class TestImperativeOutSclae(unittest.TestCase): ...@@ -152,8 +145,8 @@ class TestImperativeOutSclae(unittest.TestCase):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
np.random.seed(seed) np.random.seed(seed)
fluid.default_main_program().random_seed = seed paddle.static.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed paddle.static.default_startup_program().random_seed = seed
lenet = ImperativeLenet() lenet = ImperativeLenet()
lenet = fix_model_dict(lenet) lenet = fix_model_dict(lenet)
...@@ -162,8 +155,8 @@ class TestImperativeOutSclae(unittest.TestCase): ...@@ -162,8 +155,8 @@ class TestImperativeOutSclae(unittest.TestCase):
reader = paddle.batch( reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True paddle.dataset.mnist.test(), batch_size=32, drop_last=True
) )
adam = AdamOptimizer( adam = paddle.optimizer.Adam(
learning_rate=lr, parameter_list=lenet.parameters() learning_rate=lr, parameters=lenet.parameters()
) )
loss_list = train_lenet(lenet, reader, adam) loss_list = train_lenet(lenet, reader, adam)
lenet.eval() lenet.eval()
...@@ -186,8 +179,8 @@ class TestImperativeOutSclae(unittest.TestCase): ...@@ -186,8 +179,8 @@ class TestImperativeOutSclae(unittest.TestCase):
reader = paddle.batch( reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True paddle.dataset.mnist.test(), batch_size=32, drop_last=True
) )
adam = AdamOptimizer( adam = paddle.optimizer.Adam(
learning_rate=lr, parameter_list=lenet.parameters() learning_rate=lr, parameters=lenet.parameters()
) )
loss_list = train_lenet(lenet, reader, adam) loss_list = train_lenet(lenet, reader, adam)
lenet.eval() lenet.eval()
......
# copyright (c) 2018 paddlepaddle authors. all rights reserved. # copyright (c) 2022 paddlepaddle authors. all rights reserved.
# #
# licensed under the apache license, version 2.0 (the "license"); # licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license. # you may not use this file except in compliance with the license.
...@@ -12,29 +12,32 @@ ...@@ -12,29 +12,32 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import logging
import os import os
import numpy as np import tempfile
import random
import shutil
import time import time
import unittest import unittest
import copy
import logging
import tempfile
import paddle.nn as nn
import paddle
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import *
from paddle.fluid.log_helper import get_logger
from paddle.dataset.common import download
import numpy as np
from imperative_test_utils import ( from imperative_test_utils import (
fix_model_dict,
ImperativeLenet, ImperativeLenet,
ImperativeLinearBn, ImperativeLinearBn,
ImperativeLinearBn_hook,
) )
from imperative_test_utils import ImperativeLinearBn_hook
import paddle
import paddle.nn as nn
from paddle.dataset.common import download
from paddle.fluid.framework import _test_eager_guard
from paddle.quantization import (
AbsmaxQuantizer,
HistQuantizer,
ImperativePTQ,
KLQuantizer,
PerChannelAbsmaxQuantizer,
PTQConfig,
)
from paddle.static.log_helper import get_logger
_logger = get_logger( _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s' __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
...@@ -149,8 +152,8 @@ class TestImperativePTQ(unittest.TestCase): ...@@ -149,8 +152,8 @@ class TestImperativePTQ(unittest.TestCase):
label = paddle.to_tensor(y_data) label = paddle.to_tensor(y_data)
out = model(img) out = model(img)
acc_top1 = paddle.static.accuracy(input=out, label=label, k=1) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
acc_top5 = paddle.static.accuracy(input=out, label=label, k=5) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
eval_acc_top1_list.append(float(acc_top1.numpy())) eval_acc_top1_list.append(float(acc_top1.numpy()))
if batch_id % 50 == 0: if batch_id % 50 == 0:
...@@ -207,7 +210,7 @@ class TestImperativePTQ(unittest.TestCase): ...@@ -207,7 +210,7 @@ class TestImperativePTQ(unittest.TestCase):
break break
return top1_correct_num / total_num return top1_correct_num / total_num
def test_ptq(self): def func_ptq(self):
start_time = time.time() start_time = time.time()
self.set_vars() self.set_vars()
...@@ -265,9 +268,14 @@ class TestImperativePTQ(unittest.TestCase): ...@@ -265,9 +268,14 @@ class TestImperativePTQ(unittest.TestCase):
end_time = time.time() end_time = time.time()
print("total time: %ss \n" % (end_time - start_time)) print("total time: %ss \n" % (end_time - start_time))
def test_ptq(self):
with _test_eager_guard():
self.func_ptq()
self.func_ptq()
class TestImperativePTQfuse(TestImperativePTQ): class TestImperativePTQfuse(TestImperativePTQ):
def test_ptq(self): def func_ptq(self):
start_time = time.time() start_time = time.time()
self.set_vars() self.set_vars()
...@@ -336,6 +344,11 @@ class TestImperativePTQfuse(TestImperativePTQ): ...@@ -336,6 +344,11 @@ class TestImperativePTQfuse(TestImperativePTQ):
end_time = time.time() end_time = time.time()
print("total time: %ss \n" % (end_time - start_time)) print("total time: %ss \n" % (end_time - start_time))
def test_ptq(self):
with _test_eager_guard():
self.func_ptq()
self.func_ptq()
class TestImperativePTQHist(TestImperativePTQ): class TestImperativePTQHist(TestImperativePTQ):
def set_vars(self): def set_vars(self):
......
...@@ -12,34 +12,34 @@ ...@@ -12,34 +12,34 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import logging
import os import os
import numpy as np
import random
import time
import tempfile import tempfile
import unittest import unittest
import logging
import numpy as np
from imperative_test_utils import ImperativeLenet, fix_model_dict
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import core from paddle.framework import core, set_flags
from paddle.fluid.optimizer import AdamOptimizer from paddle.nn import Conv2D, Conv2DTranspose
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.nn import Sequential
from paddle.nn import Linear, Conv2D, Softmax, Conv2DTranspose
from paddle.fluid.log_helper import get_logger
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.quant.quant_layers import ( from paddle.nn.quant.quant_layers import (
QuantizedConv2D, QuantizedConv2D,
QuantizedConv2DTranspose, QuantizedConv2DTranspose,
) )
from imperative_test_utils import fix_model_dict, ImperativeLenet from paddle.optimizer import Adam
from paddle.quantization import ImperativeQuantAware
from paddle.static.log_helper import get_logger
INFER_MODEL_SUFFIX = ".pdmodel"
INFER_PARAMS_SUFFIX = ".pdiparams"
paddle.enable_static() paddle.enable_static()
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True}) set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger( _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s' __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
...@@ -84,7 +84,7 @@ class TestImperativeQat(unittest.TestCase): ...@@ -84,7 +84,7 @@ class TestImperativeQat(unittest.TestCase):
) )
quant_conv1 = QuantizedConv2D(conv1) quant_conv1 = QuantizedConv2D(conv1)
data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
quant_conv1(fluid.dygraph.to_variable(data)) quant_conv1(paddle.to_tensor(data))
conv_transpose = Conv2DTranspose(4, 6, (3, 3)) conv_transpose = Conv2DTranspose(4, 6, (3, 3))
quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose) quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose)
...@@ -95,15 +95,13 @@ class TestImperativeQat(unittest.TestCase): ...@@ -95,15 +95,13 @@ class TestImperativeQat(unittest.TestCase):
seed = 1 seed = 1
np.random.seed(seed) np.random.seed(seed)
fluid.default_main_program().random_seed = seed paddle.static.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed paddle.static.default_startup_program().random_seed = seed
lenet = ImperativeLenet() lenet = ImperativeLenet()
lenet = fix_model_dict(lenet) lenet = fix_model_dict(lenet)
imperative_qat.quantize(lenet) imperative_qat.quantize(lenet)
adam = AdamOptimizer( adam = Adam(learning_rate=0.001, parameters=lenet.parameters())
learning_rate=0.001, parameter_list=lenet.parameters()
)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=32, drop_last=True paddle.dataset.mnist.train(), batch_size=32, drop_last=True
...@@ -125,10 +123,10 @@ class TestImperativeQat(unittest.TestCase): ...@@ -125,10 +123,10 @@ class TestImperativeQat(unittest.TestCase):
.reshape(-1, 1) .reshape(-1, 1)
) )
img = fluid.dygraph.to_variable(x_data) img = paddle.to_tensor(x_data)
label = fluid.dygraph.to_variable(y_data) label = paddle.to_tensor(y_data)
out = lenet(img) out = lenet(img)
acc = paddle.static.accuracy(out, label) acc = paddle.metric.accuracy(out, label)
loss = paddle.nn.functional.cross_entropy( loss = paddle.nn.functional.cross_entropy(
out, label, reduction='none', use_softmax=False out, label, reduction='none', use_softmax=False
) )
...@@ -157,14 +155,14 @@ class TestImperativeQat(unittest.TestCase): ...@@ -157,14 +155,14 @@ class TestImperativeQat(unittest.TestCase):
.reshape(-1, 1) .reshape(-1, 1)
) )
img = fluid.dygraph.to_variable(x_data) img = paddle.to_tensor(x_data)
label = fluid.dygraph.to_variable(y_data) label = paddle.to_tensor(y_data)
out = lenet(img) out = lenet(img)
acc_top1 = paddle.static.accuracy( acc_top1 = paddle.metric.accuracy(
input=out, label=label, k=1 input=out, label=label, k=1
) )
acc_top5 = paddle.static.accuracy( acc_top5 = paddle.metric.accuracy(
input=out, label=label, k=5 input=out, label=label, k=5
) )
...@@ -197,11 +195,11 @@ class TestImperativeQat(unittest.TestCase): ...@@ -197,11 +195,11 @@ class TestImperativeQat(unittest.TestCase):
y_data = ( y_data = (
np.array([x[1] for x in data]).astype('int64').reshape(-1, 1) np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
) )
test_img = fluid.dygraph.to_variable(test_data) test_img = paddle.to_tensor(test_data)
label = fluid.dygraph.to_variable(y_data) label = paddle.to_tensor(y_data)
lenet.eval() lenet.eval()
fp32_out = lenet(test_img) fp32_out = lenet(test_img)
fp32_acc = paddle.static.accuracy(fp32_out, label).numpy() fp32_acc = paddle.metric.accuracy(fp32_out, label).numpy()
with tempfile.TemporaryDirectory(prefix="qat_save_path_") as tmpdir: with tempfile.TemporaryDirectory(prefix="qat_save_path_") as tmpdir:
# save inference quantized model # save inference quantized model
...@@ -220,13 +218,13 @@ class TestImperativeQat(unittest.TestCase): ...@@ -220,13 +218,13 @@ class TestImperativeQat(unittest.TestCase):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
else: else:
place = core.CPUPlace() place = core.CPUPlace()
exe = fluid.Executor(place) exe = paddle.static.Executor(place)
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model( ] = paddle.static.load_inference_model(
dirname=tmpdir, tmpdir,
executor=exe, executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX, model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX,
...@@ -237,8 +235,8 @@ class TestImperativeQat(unittest.TestCase): ...@@ -237,8 +235,8 @@ class TestImperativeQat(unittest.TestCase):
fetch_list=fetch_targets, fetch_list=fetch_targets,
) )
paddle.disable_static() paddle.disable_static()
quant_out = fluid.dygraph.to_variable(quant_out) quant_out = paddle.to_tensor(quant_out)
quant_acc = paddle.static.accuracy(quant_out, label).numpy() quant_acc = paddle.metric.accuracy(quant_out, label).numpy()
paddle.enable_static() paddle.enable_static()
delta_value = fp32_acc - quant_acc delta_value = fp32_acc - quant_acc
self.assertLessEqual(delta_value, self.diff_threshold) self.assertLessEqual(delta_value, self.diff_threshold)
......
# copyright (c) 2018 paddlepaddle authors. all rights reserved. # copyright (c) 2022 paddlepaddle authors. all rights reserved.
# #
# licensed under the apache license, version 2.0 (the "license"); # licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license. # you may not use this file except in compliance with the license.
...@@ -12,25 +12,25 @@ ...@@ -12,25 +12,25 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import logging
import os import os
import numpy as np import tempfile
import random
import shutil
import time import time
import unittest import unittest
import logging
import tempfile import numpy as np
from imperative_test_utils import ImperativeLenet
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.log_helper import get_logger
from paddle.dataset.common import download from paddle.dataset.common import download
from imperative_test_utils import fix_model_dict, ImperativeLenet from paddle.framework import set_flags
from paddle.quantization import ImperativeQuantAware
from paddle.static.log_helper import get_logger
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
if paddle.is_compiled_with_cuda(): if paddle.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True}) set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger( _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s' __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
...@@ -117,7 +117,7 @@ class TestImperativeQatAmp(unittest.TestCase): ...@@ -117,7 +117,7 @@ class TestImperativeQatAmp(unittest.TestCase):
if use_amp: if use_amp:
with paddle.amp.auto_cast(): with paddle.amp.auto_cast():
out = model(img) out = model(img)
acc = paddle.static.accuracy(out, label) acc = paddle.metric.accuracy(out, label)
loss = paddle.nn.functional.cross_entropy( loss = paddle.nn.functional.cross_entropy(
out, label, reduction='none', use_softmax=False out, label, reduction='none', use_softmax=False
) )
...@@ -129,7 +129,7 @@ class TestImperativeQatAmp(unittest.TestCase): ...@@ -129,7 +129,7 @@ class TestImperativeQatAmp(unittest.TestCase):
adam.clear_gradients() adam.clear_gradients()
else: else:
out = model(img) out = model(img)
acc = paddle.static.accuracy(out, label) acc = paddle.metric.accuracy(out, label)
loss = paddle.nn.functional.cross_entropy( loss = paddle.nn.functional.cross_entropy(
out, label, reduction='none', use_softmax=False out, label, reduction='none', use_softmax=False
) )
...@@ -170,8 +170,8 @@ class TestImperativeQatAmp(unittest.TestCase): ...@@ -170,8 +170,8 @@ class TestImperativeQatAmp(unittest.TestCase):
with paddle.amp.auto_cast(use_amp): with paddle.amp.auto_cast(use_amp):
out = model(img) out = model(img)
acc_top1 = paddle.static.accuracy(input=out, label=label, k=1) acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
acc_top5 = paddle.static.accuracy(input=out, label=label, k=5) acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
acc_top1_list.append(float(acc_top1.numpy())) acc_top1_list.append(float(acc_top1.numpy()))
if batch_id % 100 == 0: if batch_id % 100 == 0:
......
# copyright (c) 2018 paddlepaddle authors. all rights reserved. # copyright (c) 2022 paddlepaddle authors. all rights reserved.
# #
# licensed under the apache license, version 2.0 (the "license"); # licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license. # you may not use this file except in compliance with the license.
...@@ -13,27 +13,18 @@ ...@@ -13,27 +13,18 @@
# limitations under the license. # limitations under the license.
import os import os
import numpy as np
import random
import unittest import unittest
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.log_helper import get_logger
from test_imperative_qat import TestImperativeQat from test_imperative_qat import TestImperativeQat
import paddle
from paddle.framework import core, set_flags
paddle.enable_static() paddle.enable_static()
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True}) set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
class TestImperativeQatChannelWise(TestImperativeQat): class TestImperativeQatChannelWise(TestImperativeQat):
......
...@@ -13,27 +13,18 @@ ...@@ -13,27 +13,18 @@
# limitations under the license. # limitations under the license.
import os import os
import numpy as np
import random
import unittest import unittest
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.log_helper import get_logger
from test_imperative_qat import TestImperativeQat from test_imperative_qat import TestImperativeQat
import paddle
from paddle.framework import core, set_flags
paddle.enable_static() paddle.enable_static()
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True}) set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
class TestImperativeQatfuseBN(TestImperativeQat): class TestImperativeQatfuseBN(TestImperativeQat):
......
...@@ -12,57 +12,53 @@ ...@@ -12,57 +12,53 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import logging
import os import os
import numpy as np
import random
import time
import tempfile
import unittest import unittest
import logging
import numpy as np
from imperative_test_utils import fix_model_dict
import paddle import paddle
import paddle.fluid as fluid from paddle.framework import core, set_flags
from paddle.fluid import core from paddle.nn import (
from paddle.fluid.optimizer import ( BatchNorm2D,
SGDOptimizer, Conv2D,
AdamOptimizer, LeakyReLU,
MomentumOptimizer, Linear,
MaxPool2D,
PReLU,
ReLU,
Sequential,
Sigmoid,
Softmax,
) )
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.quantization import ImperativeQuantAware
from paddle.nn import Sequential from paddle.static.log_helper import get_logger
from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
from paddle.fluid.log_helper import get_logger
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.quant.quant_layers import (
QuantizedConv2D,
QuantizedConv2DTranspose,
)
from imperative_test_utils import fix_model_dict
paddle.enable_static() paddle.enable_static()
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True}) set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger( _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s' __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
) )
class ImperativeLenet(fluid.dygraph.Layer): class ImperativeLenet(paddle.nn.Layer):
def __init__(self, num_classes=10): def __init__(self, num_classes=10):
super().__init__() super().__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") conv2d_w1_attr = paddle.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") conv2d_w2_attr = paddle.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1") fc_w1_attr = paddle.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2") fc_w2_attr = paddle.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3") fc_w3_attr = paddle.ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") conv2d_b2_attr = paddle.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1") fc_b1_attr = paddle.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2") fc_b2_attr = paddle.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3") fc_b3_attr = paddle.ParamAttr(name="fc_b_3")
self.features = Sequential( self.features = Sequential(
Conv2D( Conv2D(
in_channels=1, in_channels=1,
...@@ -116,7 +112,7 @@ class ImperativeLenet(fluid.dygraph.Layer): ...@@ -116,7 +112,7 @@ class ImperativeLenet(fluid.dygraph.Layer):
def forward(self, inputs): def forward(self, inputs):
x = self.features(inputs) x = self.features(inputs)
x = paddle.flatten(x, 1, -1) x = paddle.flatten(x, 1)
x = self.fc(x) x = self.fc(x)
return x return x
...@@ -139,14 +135,14 @@ class TestImperativeQatLSQ(unittest.TestCase): ...@@ -139,14 +135,14 @@ class TestImperativeQatLSQ(unittest.TestCase):
seed = 100 seed = 100
np.random.seed(seed) np.random.seed(seed)
fluid.default_main_program().random_seed = seed paddle.static.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed paddle.static.default_startup_program().random_seed = seed
paddle.disable_static() paddle.disable_static()
lenet = ImperativeLenet() lenet = ImperativeLenet()
lenet = fix_model_dict(lenet) lenet = fix_model_dict(lenet)
imperative_qat.quantize(lenet) imperative_qat.quantize(lenet)
optimizer = MomentumOptimizer( optimizer = paddle.optimizer.Momentum(
learning_rate=0.1, parameter_list=lenet.parameters(), momentum=0.9 learning_rate=0.1, parameters=lenet.parameters(), momentum=0.9
) )
train_reader = paddle.batch( train_reader = paddle.batch(
...@@ -166,10 +162,10 @@ class TestImperativeQatLSQ(unittest.TestCase): ...@@ -166,10 +162,10 @@ class TestImperativeQatLSQ(unittest.TestCase):
.reshape(-1, 1) .reshape(-1, 1)
) )
img = fluid.dygraph.to_variable(x_data) img = paddle.to_tensor(x_data)
label = fluid.dygraph.to_variable(y_data) label = paddle.to_tensor(y_data)
out = lenet(img) out = lenet(img)
acc = paddle.static.accuracy(out, label) acc = paddle.metric.accuracy(out, label)
loss = paddle.nn.functional.cross_entropy( loss = paddle.nn.functional.cross_entropy(
out, label, reduction='none', use_softmax=False out, label, reduction='none', use_softmax=False
) )
...@@ -199,14 +195,14 @@ class TestImperativeQatLSQ(unittest.TestCase): ...@@ -199,14 +195,14 @@ class TestImperativeQatLSQ(unittest.TestCase):
.astype('int64') .astype('int64')
.reshape(-1, 1) .reshape(-1, 1)
) )
img = fluid.dygraph.to_variable(x_data) img = paddle.to_tensor(x_data)
label = fluid.dygraph.to_variable(y_data) label = paddle.to_tensor(y_data)
out = lenet(img) out = lenet(img)
acc_top1 = paddle.static.accuracy( acc_top1 = paddle.metric.accuracy(
input=out, label=label, k=1 input=out, label=label, k=1
) )
acc_top5 = paddle.static.accuracy( acc_top5 = paddle.metric.accuracy(
input=out, label=label, k=5 input=out, label=label, k=5
) )
......
...@@ -12,57 +12,55 @@ ...@@ -12,57 +12,55 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import logging
import os import os
import numpy as np
import random
import time
import tempfile
import unittest import unittest
import logging
import numpy as np
from imperative_test_utils import fix_model_dict
import paddle import paddle
import paddle.fluid as fluid from paddle.framework import core, set_flags
from paddle.fluid import core from paddle.nn import (
from paddle.fluid.optimizer import ( BatchNorm2D,
SGDOptimizer, Conv2D,
AdamOptimizer, LeakyReLU,
MomentumOptimizer, Linear,
MaxPool2D,
PReLU,
ReLU,
Sequential,
Sigmoid,
Softmax,
) )
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware from paddle.nn.quant.quant_layers import QuantizedMatmul
from paddle.nn import Sequential from paddle.optimizer import Momentum
from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU from paddle.quantization import ImperativeQuantAware
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D from paddle.static.log_helper import get_logger
from paddle.fluid.log_helper import get_logger
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.quant.quant_layers import (
QuantizedConv2D,
QuantizedMatmul,
)
from imperative_test_utils import fix_model_dict
paddle.enable_static() paddle.enable_static()
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True}) set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger( _logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s' __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
) )
class ImperativeLenet(fluid.dygraph.Layer): class ImperativeLenet(paddle.nn.Layer):
def __init__(self, num_classes=10): def __init__(self, num_classes=10):
super().__init__() super().__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") conv2d_w1_attr = paddle.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") conv2d_w2_attr = paddle.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1") fc_w1_attr = paddle.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2") fc_w2_attr = paddle.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3") fc_w3_attr = paddle.ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") conv2d_b2_attr = paddle.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1") fc_b1_attr = paddle.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2") fc_b2_attr = paddle.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3") fc_b3_attr = paddle.ParamAttr(name="fc_b_3")
self.features = Sequential( self.features = Sequential(
Conv2D( Conv2D(
in_channels=1, in_channels=1,
...@@ -140,15 +138,15 @@ class TestImperativeQatMatmul(unittest.TestCase): ...@@ -140,15 +138,15 @@ class TestImperativeQatMatmul(unittest.TestCase):
seed = 100 seed = 100
np.random.seed(seed) np.random.seed(seed)
fluid.default_main_program().random_seed = seed paddle.static.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed paddle.static.default_startup_program().random_seed = seed
paddle.disable_static() paddle.disable_static()
lenet = ImperativeLenet() lenet = ImperativeLenet()
lenet = fix_model_dict(lenet) lenet = fix_model_dict(lenet)
imperative_qat.quantize(lenet) imperative_qat.quantize(lenet)
optimizer = MomentumOptimizer( optimizer = Momentum(
learning_rate=0.1, parameter_list=lenet.parameters(), momentum=0.9 learning_rate=0.1, parameters=lenet.parameters(), momentum=0.9
) )
train_reader = paddle.batch( train_reader = paddle.batch(
...@@ -168,18 +166,18 @@ class TestImperativeQatMatmul(unittest.TestCase): ...@@ -168,18 +166,18 @@ class TestImperativeQatMatmul(unittest.TestCase):
.reshape(-1, 1) .reshape(-1, 1)
) )
img = fluid.dygraph.to_variable(x_data) img = paddle.to_tensor(x_data)
label = fluid.dygraph.to_variable(y_data) label = paddle.to_tensor(y_data)
out = lenet(img) out = lenet(img)
acc = paddle.static.accuracy(out, label) acc = paddle.metric.accuracy(out, label)
loss = paddle.nn.functional.cross_entropy( loss = paddle.nn.functional.cross_entropy(
out, label, reduction='none', use_softmax=False out, label, reduction='none', use_softmax=False
) )
avg_loss = paddle.mean(loss) avg_loss = paddle.mean(loss)
avg_loss.backward() avg_loss.backward()
optimizer.minimize(avg_loss) optimizer.step()
lenet.clear_gradients() optimizer.clear_grad()
if batch_id % 100 == 0: if batch_id % 100 == 0:
_logger.info( _logger.info(
...@@ -201,14 +199,14 @@ class TestImperativeQatMatmul(unittest.TestCase): ...@@ -201,14 +199,14 @@ class TestImperativeQatMatmul(unittest.TestCase):
.astype('int64') .astype('int64')
.reshape(-1, 1) .reshape(-1, 1)
) )
img = fluid.dygraph.to_variable(x_data) img = paddle.to_tensor(x_data)
label = fluid.dygraph.to_variable(y_data) label = paddle.to_tensor(y_data)
out = lenet(img) out = lenet(img)
acc_top1 = paddle.static.accuracy( acc_top1 = paddle.metric.accuracy(
input=out, label=label, k=1 input=out, label=label, k=1
) )
acc_top5 = paddle.static.accuracy( acc_top5 = paddle.metric.accuracy(
input=out, label=label, k=5 input=out, label=label, k=5
) )
......
...@@ -12,20 +12,19 @@ ...@@ -12,20 +12,19 @@
# see the license for the specific language governing permissions and # see the license for the specific language governing permissions and
# limitations under the license. # limitations under the license.
import logging
import os import os
import numpy as np
import random
import unittest import unittest
import logging
import numpy as np
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
from paddle.optimizer import Adam from paddle.fluid.framework import _test_eager_guard
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.nn import Sequential from paddle.nn import Sequential
from paddle.nn import Linear from paddle.optimizer import Adam
from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose from paddle.quantization import ImperativeQuantAware
from paddle.fluid.log_helper import get_logger from paddle.static.log_helper import get_logger
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
...@@ -110,7 +109,7 @@ class ModelForConv2dT(nn.Layer): ...@@ -110,7 +109,7 @@ class ModelForConv2dT(nn.Layer):
def __init__(self, num_classes=10): def __init__(self, num_classes=10):
super().__init__() super().__init__()
self.features = nn.Conv2DTranspose(4, 6, (3, 3)) self.features = nn.Conv2DTranspose(4, 6, (3, 3))
self.fc = Linear(600, num_classes) self.fc = nn.Linear(in_features=600, out_features=num_classes)
def forward(self, inputs): def forward(self, inputs):
x = self.features(inputs) x = self.features(inputs)
...@@ -123,28 +122,28 @@ class ImperativeLenet(paddle.nn.Layer): ...@@ -123,28 +122,28 @@ class ImperativeLenet(paddle.nn.Layer):
def __init__(self, num_classes=10, classifier_activation='softmax'): def __init__(self, num_classes=10, classifier_activation='softmax'):
super().__init__() super().__init__()
self.features = Sequential( self.features = Sequential(
paddle.nn.Conv2D( nn.Conv2D(
in_channels=1, in_channels=1,
out_channels=6, out_channels=6,
kernel_size=3, kernel_size=3,
stride=1, stride=1,
padding=1, padding=1,
), ),
paddle.nn.MaxPool2D(kernel_size=2, stride=2), nn.MaxPool2D(kernel_size=2, stride=2),
paddle.nn.Conv2D( nn.Conv2D(
in_channels=6, in_channels=6,
out_channels=16, out_channels=16,
kernel_size=5, kernel_size=5,
stride=1, stride=1,
padding=0, padding=0,
), ),
paddle.nn.MaxPool2D(kernel_size=2, stride=2), nn.MaxPool2D(kernel_size=2, stride=2),
) )
self.fc = Sequential( self.fc = Sequential(
Linear(400, 120), nn.Linear(in_features=400, out_features=120),
Linear(120, 84), nn.Linear(in_features=120, out_features=84),
Linear(84, num_classes), nn.Linear(in_features=84, out_features=num_classes),
) )
def forward(self, inputs): def forward(self, inputs):
...@@ -160,7 +159,7 @@ class TestUserDefinedActPreprocess(unittest.TestCase): ...@@ -160,7 +159,7 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
_logger.info("test act_preprocess") _logger.info("test act_preprocess")
self.imperative_qat = ImperativeQuantAware(act_preprocess_layer=PACT) self.imperative_qat = ImperativeQuantAware(act_preprocess_layer=PACT)
def test_quant_aware_training(self): def func_quant_aware_training(self):
imperative_qat = self.imperative_qat imperative_qat = self.imperative_qat
seed = 1 seed = 1
np.random.seed(seed) np.random.seed(seed)
...@@ -170,8 +169,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase): ...@@ -170,8 +169,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
fixed_state = {} fixed_state = {}
param_init_map = {} param_init_map = {}
for name, param in lenet.named_parameters(): for name, param in lenet.named_parameters():
p_shape = param.numpy().shape p_shape = np.array(param).shape
p_value = param.numpy() p_value = np.array(param)
if name.endswith("bias"): if name.endswith("bias"):
value = np.zeros_like(p_value).astype('float32') value = np.zeros_like(p_value).astype('float32')
else: else:
...@@ -217,8 +216,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase): ...@@ -217,8 +216,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
loss = nn.functional.loss.cross_entropy(out, label) loss = nn.functional.loss.cross_entropy(out, label)
avg_loss = paddle.mean(loss) avg_loss = paddle.mean(loss)
avg_loss.backward() avg_loss.backward()
adam.minimize(avg_loss) adam.step()
model.clear_gradients() adam.clear_grad()
if batch_id % 50 == 0: if batch_id % 50 == 0:
_logger.info( _logger.info(
"Train | At epoch {} step {}: loss = {:}, acc= {:}".format( "Train | At epoch {} step {}: loss = {:}, acc= {:}".format(
...@@ -262,6 +261,11 @@ class TestUserDefinedActPreprocess(unittest.TestCase): ...@@ -262,6 +261,11 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
train(lenet) train(lenet)
test(lenet) test(lenet)
def test_quant_aware_training(self):
with _test_eager_guard():
self.func_quant_aware_training()
self.func_quant_aware_training()
class TestUserDefinedWeightPreprocess(TestUserDefinedActPreprocess): class TestUserDefinedWeightPreprocess(TestUserDefinedActPreprocess):
def setUp(self): def setUp(self):
......
# copyright (c) 2018 paddlepaddle authors. all rights reserved. # copyright (c) 2022 paddlepaddle authors. all rights reserved.
# #
# licensed under the apache license, version 2.0 (the "license"); # licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license. # you may not use this file except in compliance with the license.
...@@ -13,34 +13,25 @@ ...@@ -13,34 +13,25 @@
# limitations under the license. # limitations under the license.
import os import os
import numpy as np
import random
import unittest import unittest
import logging
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid import core
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm
from paddle.fluid.log_helper import get_logger
import numpy as np
from imperative_test_utils import ( from imperative_test_utils import (
ImperativeLenetWithSkipQuant,
fix_model_dict, fix_model_dict,
train_lenet, train_lenet,
ImperativeLenetWithSkipQuant,
) )
import paddle
from paddle.framework import core, set_flags
from paddle.optimizer import Adam
from paddle.quantization import ImperativeQuantAware
INFER_MODEL_SUFFIX = ".pdmodel"
INFER_PARAMS_SUFFIX = ".pdiparams"
os.environ["CPU_NUM"] = "1" os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda(): if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True}) set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
class TestImperativeOutSclae(unittest.TestCase): class TestImperativeOutSclae(unittest.TestCase):
...@@ -60,9 +51,7 @@ class TestImperativeOutSclae(unittest.TestCase): ...@@ -60,9 +51,7 @@ class TestImperativeOutSclae(unittest.TestCase):
lenet = fix_model_dict(lenet) lenet = fix_model_dict(lenet)
qat.quantize(lenet) qat.quantize(lenet)
adam = AdamOptimizer( adam = Adam(learning_rate=lr, parameters=lenet.parameters())
learning_rate=lr, parameter_list=lenet.parameters()
)
dynamic_loss_rec = [] dynamic_loss_rec = []
lenet.train() lenet.train()
loss_list = train_lenet(lenet, reader, adam) loss_list = train_lenet(lenet, reader, adam)
...@@ -88,14 +77,14 @@ class TestImperativeOutSclae(unittest.TestCase): ...@@ -88,14 +77,14 @@ class TestImperativeOutSclae(unittest.TestCase):
place = core.CUDAPlace(0) place = core.CUDAPlace(0)
else: else:
place = core.CPUPlace() place = core.CPUPlace()
exe = fluid.Executor(place) exe = paddle.static.Executor(place)
[ [
inference_program, inference_program,
feed_target_names, feed_target_names,
fetch_targets, fetch_targets,
] = fluid.io.load_inference_model( ] = paddle.static.load_inference_model(
dirname=save_dir, save_dir,
executor=exe, executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX, model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX,
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# limitations under the License. # limitations under the License.
import unittest import unittest
import numpy as np import numpy as np
import paddle import paddle
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
import paddle.nn.quant.quant_layers as quant_layers import paddle.nn.quant.quant_layers as quant_layers
from paddle.framework import core
paddle.enable_static() paddle.enable_static()
...@@ -38,23 +38,23 @@ def init_data(batch_size=32, img_shape=[784], label_range=9): ...@@ -38,23 +38,23 @@ def init_data(batch_size=32, img_shape=[784], label_range=9):
class TestMovingAverageAbsMaxScaleOp(unittest.TestCase): class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
def check_backward(self, use_cuda): def check_backward(self, use_cuda):
main_program = fluid.Program() main_program = paddle.static.Program()
startup_program = fluid.Program() startup_program = paddle.static.Program()
with fluid.program_guard(main_program, startup_program): with paddle.static.program_guard(main_program, startup_program):
image = fluid.layers.data( image = paddle.static.data(
name='image', shape=[784], dtype='float32' name='image', shape=[-1, 784], dtype='float32'
) )
label = fluid.layers.data(name='label', shape=[1], dtype='int64') label = paddle.static.data(
fc_tmp = fluid.layers.fc(image, size=10, act='softmax') name='label', shape=[-1, 1], dtype='int64'
)
fc_tmp = paddle.static.nn.fc(image, size=10, activation='softmax')
out_scale = quant_layers.MovingAverageAbsMaxScale( out_scale = quant_layers.MovingAverageAbsMaxScale(
name=fc_tmp.name, dtype=fc_tmp.dtype name=fc_tmp.name, dtype=fc_tmp.dtype
) )
fc_tmp_1 = out_scale(fc_tmp) fc_tmp_1 = out_scale(fc_tmp)
cross_entropy = paddle.nn.functional.softmax_with_cross_entropy( cross_entropy = paddle.nn.functional.cross_entropy(fc_tmp, label)
fc_tmp, label
)
loss = paddle.mean(cross_entropy) loss = paddle.mean(cross_entropy)
sgd = fluid.optimizer.SGD(learning_rate=1e-3) sgd = paddle.optimizer.SGD(learning_rate=1e-3)
sgd.minimize(loss) sgd.minimize(loss)
moving_average_abs_max_scale_ops = [ moving_average_abs_max_scale_ops = [
...@@ -66,13 +66,13 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase): ...@@ -66,13 +66,13 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
len(moving_average_abs_max_scale_ops) == 1 len(moving_average_abs_max_scale_ops) == 1
), "The number of moving_average_abs_max_scale_ops should be 1." ), "The number of moving_average_abs_max_scale_ops should be 1."
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
exe = fluid.Executor(place) exe = paddle.static.Executor(place)
exe.run(startup_program) exe.run(startup_program)
binary = fluid.compiler.CompiledProgram( binary = paddle.static.CompiledProgram(main_program).with_data_parallel(
main_program loss_name=loss.name
).with_data_parallel(loss_name=loss.name) )
img, label = init_data() img, label = init_data()
feed_dict = {"image": img, "label": label} feed_dict = {"image": img, "label": label}
......
此差异已折叠。
此差异已折叠。
...@@ -486,7 +486,7 @@ def get_filenames(full_test=False): ...@@ -486,7 +486,7 @@ def get_filenames(full_test=False):
''' '''
global whl_error global whl_error
import paddle # noqa: F401 import paddle # noqa: F401
import paddle.fluid.contrib.slim.quantization # noqa: F401 import paddle.static.quantization # noqa: F401
whl_error = [] whl_error = []
if full_test: if full_test:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册