未验证 提交 72973d5a 编写于 作者: Z zhouzj 提交者: GitHub

[clean fluid api] Move fluid/contrib/slim and remove fluid api. (#48717)

上级 a186e60d
......@@ -119,7 +119,7 @@ if(WITH_TESTING)
add_subdirectory(paddle/tests)
add_subdirectory(paddle/fluid/tests)
add_subdirectory(paddle/fluid/contrib/tests)
add_subdirectory(paddle/fluid/contrib/slim/tests)
add_subdirectory(paddle/static/quantization/tests)
endif()
if(NOT WITH_SETUP_INSTALL)
......
......@@ -1617,9 +1617,7 @@ class Engine:
fetch_vars = self._fetch_vars["predict"]['outputs']
dist_main_prog = self._dist_main_progs["predict"][self._cur_rank]
if self._strategy.qat.enable and self._strategy.qat.onnx_format:
from paddle.fluid.contrib.slim.quantization import (
QuantWeightPass,
)
from paddle.static.quantization import QuantWeightPass
self._logger.info("export quantized model.")
self._logger.info(
......
......@@ -18,14 +18,14 @@ import numpy as np
import paddle
from paddle.fluid import core, framework
from paddle.fluid.contrib.slim.quantization import (
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.static.quantization import (
AddQuantDequantForInferencePass,
AddQuantDequantPassV2,
OutScaleForTrainingPass,
QuantizationTransformPassV2,
utils,
)
from paddle.fluid.dygraph.parallel import ParallelEnv
from ..auto_parallel.converter import Converter
from ..auto_parallel.dist_attribute import (
......
......@@ -18,9 +18,6 @@ from . import memory_usage_calc
from .memory_usage_calc import *
from . import op_frequence
from .op_frequence import *
from . import quantize
from .quantize import *
from . import slim
from . import extend_optimizer
from .extend_optimizer import *
from . import model_stat
......@@ -36,7 +33,6 @@ __all__ = []
__all__ += memory_usage_calc.__all__
__all__ += op_frequence.__all__
__all__ += quantize.__all__
__all__ += extend_optimizer.__all__
__all__ += ['mixed_precision']
__all__ += layers.__all__
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import quantization_pass
from .quantization_pass import *
from . import quant_int8_mkldnn_pass
from .quant_int8_mkldnn_pass import *
from . import quant2_int8_mkldnn_pass
from .quant2_int8_mkldnn_pass import *
from . import post_training_quantization
from .post_training_quantization import *
from . import imperative
from .imperative import *
__all__ = []
__all__ += quantization_pass.__all__
__all__ += quant_int8_mkldnn_pass.__all__
__all__ += quant2_int8_mkldnn_pass.__all__
__all__ += post_training_quantization.__all__
__all__ += imperative.__all__
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import logging
import numpy as np
from .... import core
from ....framework import Program, Operator, Variable, program_guard
from ....executor import global_scope
from .... import unique_name
from ....layer_helper import LayerHelper
from ....param_attr import ParamAttr
from ....initializer import Constant
from ....log_helper import get_logger
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
def find_next_ops(block, var_name):
"""
Find all followed ops for the input variable.
"""
res_ops = []
for op in block.ops:
if var_name in op.input_arg_names:
res_ops.append(op)
return res_ops
def load_variable_data(scope, var_name):
'''
Load variable value from scope
'''
var_node = scope.find_var(var_name)
assert var_node is not None, "Cannot find " + var_name + " in scope."
return np.array(var_node.get_tensor())
class QuantizeTranspilerV2:
def __init__(
self,
weight_bits=8,
activation_bits=8,
weight_quantize_type='abs_max',
activation_quantize_type='moving_average_abs_max',
quantizable_op_type=[
'conv2d',
'depthwise_conv2d',
'mul',
],
skip_pattern=['skip_quant'],
):
"""
Apply fake quant for the quantized ops.
Args:
weight_bits(int): the bit of quantized weight.
activation_bits(int): the bit of quantized activation.
weight_quantize_type(str): the quantization type for weight.
Only support to be 'abs_max' and 'channel_wise_abs_max'.
activation_quantize_type(str): the quantization type for activation.
Only support to be 'abs_max' and 'moving_average_abs_max'.
quantizable_op_type(str): set the op type for quantization.
skip_pattern(str|list): The user-defined quantization skip pattern, which
will be presented in the name scope of an op. When the skip pattern is
detected in an op's name scope, the corresponding op will not be quantized.
"""
self._weight_bits = weight_bits
self._activation_bits = activation_bits
assert activation_quantize_type in [
"abs_max",
"moving_average_abs_max",
], (
"activation_quantize_type should be abs_max "
"or moving_average_abs_max for now."
)
assert weight_quantize_type in [
"abs_max",
"channel_wise_abs_max",
], "weight_quantize_type should be abs_max or channel_wise_abs_max."
self._activation_quantize_type = activation_quantize_type
self._weight_quantize_type = weight_quantize_type
for op_type in quantizable_op_type:
assert op_type in [
'conv2d',
'depthwise_conv2d',
'mul',
], "Quantize op should be ['conv2d', 'depthwise_conv2d', 'mul']"
self._quantizable_ops = quantizable_op_type
self._quantizable_grad_ops = [
'%s_grad' % (op) for op in self._quantizable_ops
]
self._skip_pattern = skip_pattern
self._helper = LayerHelper(self.__class__.__name__)
self._moving_rate = 0.9
self._out_ch_axis1_ops = ['conv2d_transpose', 'mul', 'matmul']
def apply(self, program, startup_program, is_test=False):
"""
Apply quantization to fluid Program.
Args:
program(Program): the train or test program to be quantized.
startup_program(Program): the corresponding startup_program.
is_test(bool): Whethe the program is used for test.
Returns:
None
"""
assert isinstance(
program, Program
), "program must be the instance of Program"
assert isinstance(
startup_program, Program
), "startup_program must be the instance of Program"
var_rename_map = [
collections.OrderedDict() for _ in range(len(program.blocks))
]
with program_guard(program, startup_program):
for block in program.blocks:
ops = list(block.ops)
for op in ops:
if op.type in self._quantizable_ops and (
not self._is_skip_quant(op)
):
self._transform_forward(
block, op, var_rename_map, is_test
)
for block in program.blocks:
ops = list(block.ops)
for op in ops:
if op.type in self._quantizable_grad_ops and (
not self._is_skip_quant(op)
):
self._transform_backward(block, op, var_rename_map)
def convert(self, test_program, scope=None):
"""
Convert the test program.
Get the out scale from the moving_average_abs_max_scale op and save the
out scale into the quantized op.
Args:
test_program(Program): the test program to be converted.
scope(fluid.Scope, optional): The scope of the program, use it to load
and save variables. If scope=None, get scope by global_scope().
"""
scope = global_scope() if scope is None else scope
for block in test_program.blocks:
for op in block.ops:
if (
op.has_attr("quantization_type")
and op.attr("quantization_type") == "qat_with_weight"
):
# quant op -> var1 -> fake op -> var2
assert len(op.output_arg_names) == 1
var1_name = op.output_arg_names[0]
fake_ops = find_next_ops(block, var1_name)
assert len(fake_ops) == 1
fake_op = fake_ops[0]
assert fake_op.type == "moving_average_abs_max_scale"
out_scale_name = fake_op.output("OutScale")
out_threshold = load_variable_data(scope, out_scale_name[0])
op._set_attr("out_threshold", float(out_threshold))
var2_name = fake_op.output("Out")[0]
op._rename_output(var1_name, var2_name)
fake_op._rename_output(var2_name, var1_name)
def _transform_forward(self, block, op, var_rename_map, is_test):
"""
Insert fake quant op before the target ops.
"""
op._set_attr("quantization_type", "qat_with_weight")
# insert fake quant op before the quantized op
for in_name in op.input_arg_names:
block_id = block.idx
idx = block.ops.index(op)
if in_name in var_rename_map[block_id]:
new_in_name = var_rename_map[block_id][in_name]
else:
in_var = block.var(in_name)
target_dtype = [
core.VarDesc.VarType.FP32,
core.VarDesc.VarType.FP16,
]
if in_var.dtype not in target_dtype:
continue
quant_bits = (
self._weight_bits
if in_var.persistable
else self._activation_bits
)
quant_type = (
self._weight_quantize_type
if in_var.persistable
else self._activation_quantize_type
)
if quant_type == "abs_max":
new_var = self._insert_abs_max_fq_op(
block, idx, in_var, quant_bits
)
elif quant_type == "moving_average_abs_max":
new_var = self._insert_ma_abs_max_fq_op(
block, idx, in_var, quant_bits, is_test
)
elif quant_type == "channel_wise_abs_max":
ch_axis = 1 if op.type in self._out_ch_axis1_ops else 0
new_var = self._insert_pc_abs_max_fq_op(
block, idx, in_var, quant_bits, ch_axis
)
else:
_logger.error(
"Don't support the quant_type: %s" % quant_type
)
continue
new_in_name = new_var.name
var_rename_map[block_id][in_name] = new_in_name
op._rename_input(in_name, new_in_name)
# insert out scale op followed the quantized op
for out_name in op.output_arg_names:
next_ops = find_next_ops(block, out_name)
idx = block.ops.index(op)
out_var = block.var(out_name)
new_out_var = self._insert_ma_abs_max_scale_op(
block, idx + 1, out_var, is_test, True
)
for next_op in next_ops:
if "_grad" not in next_op.type:
next_op._rename_input(out_name, new_out_var.name)
def _is_skip_quant(self, op):
"""
Analyse whether the op should skip quantization or not.
"""
user_skipped = False
if isinstance(self._skip_pattern, list):
user_skipped = op.has_attr("op_namescope") and any(
pattern in op.attr("op_namescope")
for pattern in self._skip_pattern
)
elif isinstance(self._skip_pattern, str):
user_skipped = (
op.has_attr("op_namescope")
and op.attr("op_namescope").find(self._skip_pattern) != -1
)
return user_skipped
def _transform_backward(self, block, op, var_rename_map):
"""
Update the backword of the target ops.
Note: for the grad ops, only rename the input, skip rename the output.
"""
block_id = block.idx
no_dequanted_input_vars = True
for name in op.input_arg_names:
if name in var_rename_map[block_id]:
new_var_name = var_rename_map[block_id][name]
op._rename_input(name, new_var_name)
no_dequanted_input_vars = False
if no_dequanted_input_vars:
raise ValueError(
"There is no dequanted inputs for op %s." % (op.type)
)
def _insert_abs_max_fq_op(self, block, idx, in_var, quant_bits):
"""
Inset abs max fake quant op.
"""
quant_dequant_var = block.create_var(
type=in_var.type,
name="{}.quant_dequant".format(in_var.name),
shape=in_var.shape,
dtype=in_var.dtype,
)
scale_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.quant_dequant.scale".format(in_var.name),
initializer=Constant(0.0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
scale_var.stop_gradient = True
inputs = {'X': in_var}
outputs = {'Out': quant_dequant_var, 'OutScale': scale_var}
attrs = {'bit_length': quant_bits}
block._insert_op(
idx,
type='fake_quantize_dequantize_abs_max',
attrs=attrs,
inputs=inputs,
outputs=outputs,
)
return quant_dequant_var
def _insert_ma_abs_max_fq_op(self, block, idx, in_var, quant_bits, is_test):
"""
Insert moving average abs max fake quant op.
"""
quant_dequant_var = block.create_var(
type=in_var.type,
name="{}.quant_dequant".format(in_var.name),
shape=in_var.shape,
dtype=in_var.dtype,
)
scale_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.quant_dequant.scale".format(in_var.name),
initializer=Constant(0.0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
scale_var.stop_gradient = True
if not is_test:
state_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.quant_dequant.state".format(in_var.name),
initializer=Constant(0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
state_var.stop_gradient = True
accum_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.quant_dequant.accum".format(in_var.name),
initializer=Constant(0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
accum_var.stop_gradient = True
attrs = {
'moving_rate': self._moving_rate,
'bit_length': quant_bits,
'is_test': is_test,
}
inputs = {'X': in_var, 'InScale': scale_var}
outputs = {'Out': quant_dequant_var, 'OutScale': scale_var}
if not is_test:
inputs['InState'] = state_var
inputs['InAccum'] = accum_var
outputs['OutState'] = state_var
outputs['OutAccum'] = accum_var
block._insert_op(
idx,
type='fake_quantize_dequantize_moving_average_abs_max',
attrs=attrs,
inputs=inputs,
outputs=outputs,
)
return quant_dequant_var
def _insert_pc_abs_max_fq_op(self, block, idx, in_var, quant_bits, ch_axis):
"""
Insert per channel abs max fake quant op.
"""
quant_dequant_var = block.create_var(
type=in_var.type,
name="{}.quant_dequant".format(in_var.name),
shape=in_var.shape,
dtype=in_var.dtype,
)
scale_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.quant_dequant.scale".format(in_var.name),
initializer=Constant(0.0),
trainable=False,
),
shape=[in_var.shape[ch_axis]],
dtype=in_var.dtype,
)
scale_var.stop_gradient = True
inputs = {'X': in_var}
outputs = {'Out': quant_dequant_var, 'OutScale': scale_var}
attrs = {'bit_length': quant_bits, 'quant_axis': ch_axis}
block._insert_op(
idx,
type='fake_channel_wise_quantize_dequantize_abs_max',
attrs=attrs,
inputs=inputs,
outputs=outputs,
)
return quant_dequant_var
def _insert_ma_abs_max_scale_op(
self, block, idx, in_var, is_test, has_out_var=False
):
"""
Insert moving average abs max scale op.
"""
scale_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.outscale.scale".format(in_var.name),
initializer=Constant(0.0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
scale_var.stop_gradient = True
attrs = {'moving_rate': self._moving_rate, 'is_test': is_test}
inputs = {'X': in_var}
outputs = {'OutScale': scale_var}
if not is_test:
state_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.outscale.state".format(in_var.name),
initializer=Constant(0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
state_var.stop_gradient = True
accum_var = self._helper.create_parameter(
attr=ParamAttr(
name="{}.outscale.accum".format(in_var.name),
initializer=Constant(0),
trainable=False,
),
shape=[1],
dtype=in_var.dtype,
)
accum_var.stop_gradient = True
inputs['InState'] = state_var
inputs['InAccum'] = accum_var
outputs['OutState'] = state_var
outputs['OutAccum'] = accum_var
if has_out_var:
out_var = block.create_var(
type=in_var.type,
name="{}.tmp".format(in_var.name),
shape=in_var.shape,
dtype=in_var.dtype,
)
outputs['Out'] = out_var
block._insert_op(
idx,
type='moving_average_abs_max_scale',
attrs=attrs,
inputs=inputs,
outputs=outputs,
)
if has_out_var:
return out_var
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import os
import unittest
import random
import numpy as np
import paddle.fluid as fluid
import paddle
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization.quantize_transpiler_v2 import (
QuantizeTranspilerV2,
)
from paddle.fluid import core
paddle.enable_static()
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"
def conv_net(img, label):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
pool_type='max',
act="relu",
)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
pool_type='avg',
act="relu",
)
with fluid.name_scope("skip_quant"):
hidden = fluid.layers.fc(input=conv_pool_1, size=100, act='relu')
prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
loss = paddle.nn.functional.cross_entropy(
input=prediction, label=label, reduction='none', use_softmax=False
)
avg_loss = paddle.mean(loss)
return avg_loss
class TestQuantizeProgramPass(unittest.TestCase):
def quantize_program(
self,
use_cuda,
seed,
activation_quant_type='abs_max',
weight_quant_type='abs_max',
for_ci=False,
):
def build_program(main, startup, is_test):
main.random_seed = seed
startup.random_seed = seed
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32'
)
label = fluid.layers.data(
name='label', shape=[1], dtype='int64'
)
loss = conv_net(img, label)
if not is_test:
opt = fluid.optimizer.Adam(learning_rate=0.0001)
opt.minimize(loss)
return [img, label], loss
random.seed(0)
np.random.seed(0)
# 1 Define program
train_program = fluid.Program()
startup_program = fluid.Program()
test_program = fluid.Program()
feeds, loss = build_program(train_program, startup_program, False)
build_program(test_program, startup_program, True)
test_program = test_program.clone(for_test=True)
if not for_ci:
train_graph = IrGraph(
core.Graph(train_program.desc), for_test=False
)
train_graph.draw('.', 'train_program_1')
test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
test_graph.draw('.', 'test_program_1')
# 2 Apply quantization
qt = QuantizeTranspilerV2(
activation_quantize_type=activation_quant_type,
weight_quantize_type=weight_quant_type,
)
qt.apply(train_program, startup_program, is_test=False)
qt.apply(test_program, startup_program, is_test=True)
# 3 Train
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
scope = fluid.Scope()
with fluid.scope_guard(scope):
exe.run(startup_program)
if not for_ci:
train_graph = IrGraph(
core.Graph(train_program.desc), for_test=False
)
train_graph.draw('.', 'train_program_2')
test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
test_graph.draw('.', 'test_program_2')
build_strategy = fluid.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
build_strategy.fuse_all_reduce_ops = False
binary = fluid.CompiledProgram(train_program).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy
)
iters = 5
batch_size = 8
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=batch_size
)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
with fluid.scope_guard(scope):
for idx in range(iters):
data = next(train_reader())
loss_v = exe.run(
binary, feed=feeder.feed(data), fetch_list=[loss]
)
if not for_ci and idx % 20 == 0:
print('{}: {}'.format('loss', np.mean(loss_v)))
print('{}: {}'.format('loss', np.mean(loss_v)))
# 4 Convert
qt.convert(test_program, scope)
if not for_ci:
with fluid.scope_guard(scope):
fluid.io.save_inference_model(
'./infer_model',
['image', 'label'],
[loss],
exe,
test_program,
clip_extra=True,
)
def test_gpu_1(self):
if fluid.core.is_compiled_with_cuda():
self.quantize_program(
use_cuda=True,
seed=1,
activation_quant_type='abs_max',
weight_quant_type='abs_max',
for_ci=True,
)
def test_gpu_2(self):
if fluid.core.is_compiled_with_cuda():
self.quantize_program(
use_cuda=True,
seed=1,
activation_quant_type='moving_average_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True,
)
def test_cpu_1(self):
self.quantize_program(
use_cuda=False,
seed=2,
activation_quant_type='abs_max',
weight_quant_type='abs_max',
for_ci=True,
)
def test_cpu_2(self):
self.quantize_program(
use_cuda=False,
seed=2,
activation_quant_type='moving_average_abs_max',
weight_quant_type='channel_wise_abs_max',
for_ci=True,
)
if __name__ == '__main__':
unittest.main()
......@@ -25,5 +25,4 @@ set_tests_properties(test_multi_precision_fp16_train PROPERTIES TIMEOUT 120)
if(APPLE)
set_tests_properties(test_model_cast_to_bf16 PROPERTIES TIMEOUT 300)
set_tests_properties(test_quantize_transpiler PROPERTIES TIMEOUT 300)
endif()
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import numpy as np
import unittest
import paddle
import paddle.fluid as fluid
from paddle.fluid.contrib.quantize.quantize_transpiler import _original_var_name
from paddle.fluid.contrib.quantize.quantize_transpiler import QuantizeTranspiler
import paddle
paddle.enable_static()
def linear_fc(num):
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = data
for _ in range(num):
hidden = fluid.layers.fc(hidden, size=128, act='relu')
loss = paddle.nn.functional.cross_entropy(
input=hidden, label=label, reduction='none', use_softmax=False
)
loss = paddle.mean(loss)
return loss
def residual_block(num):
def conv_bn_layer(
input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
):
tmp = paddle.static.nn.conv2d(
input=input,
filter_size=filter_size,
num_filters=ch_out,
stride=stride,
padding=padding,
act=None,
bias_attr=bias_attr,
)
return paddle.static.nn.batch_norm(input=tmp, act=act)
data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
hidden = data
for _ in range(num):
conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
hidden = paddle.nn.functional.relu(paddle.add(x=conv, y=short))
fc = fluid.layers.fc(input=hidden, size=10)
loss = paddle.nn.functional.cross_entropy(
input=fc, label=label, reduction='none', use_softmax=False
)
loss = paddle.mean(loss)
return loss
def conv_net(img, label):
conv_pool_1 = fluid.nets.simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu",
)
conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu",
)
prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
loss = paddle.nn.functional.cross_entropy(
input=prediction, label=label, reduction='none', use_softmax=False
)
avg_loss = paddle.mean(loss)
return avg_loss
class TestQuantizeTranspiler(unittest.TestCase):
def setUp(self):
# since quant_op and dequant_op is not ready, use cos and sin for test
self.weight_quant_op_type = 'fake_quantize_abs_max'
self.dequant_op_type = 'fake_dequantize_max_abs'
self.quantizable_op_and_inputs = {
'conv2d': ['Input', 'Filter'],
'depthwise_conv2d': ['Input', 'Filter'],
'mul': ['X', 'Y'],
}
self.quantizable_op_grad_and_inputs = {
'conv2d_grad': ['Input', 'Filter'],
'depthwise_conv2d_grad': ['Input', 'Filter'],
'mul_grad': ['X', 'Y'],
}
def check_program(self, program):
quantized_ops = {}
persistable_vars = [
v.name
for v in filter(lambda var: var.persistable, program.list_vars())
]
for block in program.blocks:
for idx, op in enumerate(block.ops):
# check forward
if op.type in self.quantizable_op_and_inputs:
for i, arg_name in enumerate(op.input_arg_names):
quant_op_type = (
self.weight_quant_op_type
if _original_var_name(arg_name) in persistable_vars
else self.act_quant_op_type
)
self.assertTrue(
arg_name.endswith('.quantized.dequantized')
)
if arg_name not in quantized_ops:
self.assertEqual(
block.ops[idx - 2 * i - 1].type,
self.dequant_op_type,
)
self.assertEqual(
block.ops[idx - 2 * i - 2].type, quant_op_type
)
quantized_ops[arg_name] = block.ops[idx - 2 * i - 2]
else:
op_idx = block.ops.index(quantized_ops[arg_name])
self.assertLess(op_idx, idx)
# check backward
if op.type in self.quantizable_op_grad_and_inputs:
for pname in self.quantizable_op_grad_and_inputs[op.type]:
arg_name = op.input(pname)[0]
self.assertTrue(
arg_name.endswith('.quantized.dequantized')
)
self.assertTrue(arg_name in quantized_ops)
def linear_fc_quant(self, quant_type):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = linear_fc(3)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
t = QuantizeTranspiler(activation_quantize_type=quant_type)
t.training_transpile(main)
self.check_program(main)
def test_linear_fc_quant_abs_max(self):
self.act_quant_op_type = 'fake_quantize_abs_max'
self.linear_fc_quant('abs_max')
def test_linear_fc_quant_range_abs_max(self):
self.act_quant_op_type = 'fake_quantize_range_abs_max'
self.linear_fc_quant('range_abs_max')
def residual_block_quant(self, quant_type):
main = fluid.Program()
startup = fluid.Program()
with fluid.program_guard(main, startup):
loss = residual_block(2)
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
t = QuantizeTranspiler(activation_quantize_type=quant_type)
t.training_transpile(main)
self.check_program(main)
def test_residual_block_abs_max(self):
self.act_quant_op_type = 'fake_quantize_abs_max'
self.residual_block_quant('abs_max')
def test_residual_block_range_abs_max(self):
self.act_quant_op_type = 'fake_quantize_range_abs_max'
self.residual_block_quant('range_abs_max')
def freeze_program(self, use_cuda, seed):
def build_program(main, startup, is_test):
main.random_seed = seed
startup.random_seed = seed
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32'
)
label = fluid.layers.data(
name='label', shape=[1], dtype='int64'
)
loss = conv_net(img, label)
if not is_test:
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
return [img, label], loss
main = fluid.Program()
startup = fluid.Program()
test_program = fluid.Program()
import random
random.seed(0)
np.random.seed(0)
feeds, loss = build_program(main, startup, False)
build_program(test_program, startup, True)
test_program = test_program.clone(for_test=True)
quant_type = 'range_abs_max' # 'range_abs_max' or 'abs_max'
quant_transpiler = QuantizeTranspiler(
activation_quantize_type=quant_type
)
quant_transpiler.training_transpile(main, startup)
quant_transpiler.training_transpile(test_program, startup)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
iters = 5
batch_size = 8
class_num = 10
exe.run(startup)
train_reader = paddle.batch(
paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500),
batch_size=batch_size,
)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=batch_size
)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
with fluid.program_guard(main):
for _ in range(iters):
data = next(train_reader())
loss_v = exe.run(
program=main, feed=feeder.feed(data), fetch_list=[loss]
)
with fluid.program_guard(test_program):
test_data = next(test_reader())
w_var = fluid.framework._get_var(
'conv2d_1.w_0.quantized', test_program
)
# Testing during training
test_loss1, w_quant = exe.run(
program=test_program,
feed=feeder.feed(test_data),
fetch_list=[loss, w_var],
)
# Freeze program for inference, but the weight of fc/conv is still float type.
quant_transpiler.freeze_program(test_program, place)
(test_loss2,) = exe.run(
program=test_program,
feed=feeder.feed(test_data),
fetch_list=[loss],
)
self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
w_freeze = np.array(
fluid.global_scope().find_var('conv2d_1.w_0').get_tensor()
)
# fail: -432.0 != -433.0, this is due to the calculation precision
# self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
# Convert parameter to 8-bit.
quant_transpiler.convert_to_int8(test_program, place)
# Save the 8-bit parameter and model file.
fluid.io.save_inference_model(
'model_8bit',
['image', 'label'],
[loss],
exe,
test_program,
clip_extra=True,
)
# Test whether the 8-bit parameter and model file can be loaded successfully.
[infer, feed, fetch] = fluid.io.load_inference_model(
'model_8bit', exe
)
# Check the loaded 8-bit weight.
w_8bit = np.array(
fluid.global_scope().find_var('conv2d_1.w_0.int8').get_tensor()
)
self.assertEqual(w_8bit.dtype, np.int8)
self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
def not_test_freeze_program_cuda(self):
if fluid.core.is_compiled_with_cuda():
with fluid.unique_name.guard():
self.freeze_program(True, seed=1)
def not_test_freeze_program_cpu(self):
with fluid.unique_name.guard():
self.freeze_program(False, seed=2)
if __name__ == '__main__':
unittest.main()
......@@ -23,7 +23,7 @@ import paddle.distributed.fleet as fleet
import paddle.fluid as fluid
import paddle.nn as nn
from paddle.distributed.utils.launch_utils import find_free_ports, get_cluster
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.quantization import ImperativeQuantAware
def set_random_seed(seed, dp_id, rank_id):
......
......@@ -20,10 +20,6 @@ import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.contrib.slim.quantization import (
QuantizationFreezePass,
QuantizationTransformPass,
)
from paddle.fluid.executor import global_scope
from paddle.fluid.framework import (
IrGraph,
......@@ -32,6 +28,10 @@ from paddle.fluid.framework import (
convert_np_dtype_to_dtype_,
)
from paddle.fluid.initializer import NumpyArrayInitializer
from paddle.static.quantization import (
QuantizationFreezePass,
QuantizationTransformPass,
)
class TensorConfig:
......
......@@ -21,16 +21,16 @@ import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, Variable, core
from paddle.fluid.contrib.slim.quantization import (
from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
from paddle.fluid.framework import IrGraph
from paddle.fluid.io import append_fetch_ops, prepend_feed_ops
from paddle.static.quantization import (
AddQuantDequantPass,
OutScaleForInferencePass,
OutScaleForTrainingPass,
QuantizationFreezePass,
QuantizationTransformPass,
)
from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
from paddle.fluid.framework import IrGraph
from paddle.fluid.io import append_fetch_ops, prepend_feed_ops
class QuantDequantTest(unittest.TestCase):
......
......@@ -18,9 +18,9 @@ import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid import core
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.framework import IrGraph, Program, program_guard
from paddle.fluid.tests.unittests.op_test import OpTestTool
from paddle.static.quantization import QuantizationTransformPass
paddle.enable_static()
......
......@@ -24,7 +24,7 @@ from PIL import Image
import paddle
import paddle.fluid as fluid
from paddle.dataset.common import download
from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
from paddle.static.quantization import PostTrainingQuantization
paddle.enable_static()
......
......@@ -12,40 +12,41 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from ..fluid.contrib.slim.quantization.imperative.ptq_config import (
from .imperative.ptq_config import (
PTQConfig,
default_ptq_config,
)
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
from .imperative.ptq_quantizer import (
BaseQuantizer,
)
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
from .imperative.ptq_quantizer import (
AbsmaxQuantizer,
)
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
from .imperative.ptq_quantizer import (
PerChannelAbsmaxQuantizer,
)
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
from .imperative.ptq_quantizer import (
KLQuantizer,
)
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
from .imperative.ptq_quantizer import (
HistQuantizer,
)
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
from .imperative.ptq_quantizer import (
SUPPORT_ACT_QUANTIZERS,
)
from ..fluid.contrib.slim.quantization.imperative.ptq_quantizer import (
from .imperative.ptq_quantizer import (
SUPPORT_WT_QUANTIZERS,
)
from ..fluid.contrib.slim.quantization.imperative.ptq_registry import (
from .imperative.ptq_registry import (
PTQRegistry,
)
from ..fluid.contrib.slim.quantization.imperative.ptq import ImperativePTQ
from ..fluid.contrib.slim.quantization.imperative.qat import (
from .imperative.ptq import (
ImperativePTQ,
)
from .imperative.qat import (
ImperativeQuantAware,
)
from .config import QuantConfig
from .base_quanter import BaseQuanter
from .factory import quanter
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,23 +13,24 @@
# limitations under the License.
from . import qat
from .qat import *
from .qat import ImperativeQuantAware
from . import ptq
from .ptq import *
from .ptq import ImperativePTQ
from . import ptq_config
from .ptq_config import *
from .ptq_config import PTQConfig, default_ptq_config
from . import ptq_quantizer
from .ptq_quantizer import *
from .ptq_quantizer import (
BaseQuantizer,
AbsmaxQuantizer,
PerChannelAbsmaxQuantizer,
KLQuantizer,
HistQuantizer,
SUPPORT_ACT_QUANTIZERS,
SUPPORT_WT_QUANTIZERS,
)
from . import ptq_registry
from .ptq_registry import *
__all__ = []
__all__ += qat.__all__
__all__ += ptq.__all__
__all__ += ptq_config.__all__
__all__ += ptq_quantizer.__all__
__all__ += ptq_registry.__all__
from .ptq_registry import PTQRegistry
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,8 +13,10 @@
# limitations under the License.
import copy
import paddle
import paddle.nn as nn
from . import utils
......@@ -66,7 +68,7 @@ def fuse_layers(model, layers_to_fuse, inplace=False):
Return
fused_model(paddle.nn.Layer): The fused model.
'''
if inplace == False:
if inplace is False:
model = copy.deepcopy(model)
for layers in layers_to_fuse:
_fuse_layers(model, layers)
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,24 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import copy
import logging
import os
import numpy as np
import paddle
import paddle.nn.quant.quant_layers as quant_layers
from paddle.fluid.log_helper import get_logger
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from . import fuse_utils
from . import utils
from . import ptq_hooks
from . import ptq_config
from . import ptq_quantizer
from ...static.log_helper import get_logger
from ...static.quantization.utils import (
_get_input_name_index,
_get_op_input_var_names,
_get_op_output_var_names,
_get_output_name_index,
)
from . import fuse_utils, ptq_config, ptq_hooks, ptq_quantizer, utils
from .ptq_registry import PTQRegistry
__all__ = ['ImperativePTQ']
INFER_MODEL_SUFFIX = ".pdmodel"
INFER_PARAMS_SUFFIX = ".pdiparams"
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
......@@ -165,8 +168,8 @@ class ImperativePTQ:
infer_program,
feed_target_names,
fetch_targets,
] = paddle.fluid.io.load_inference_model(
dirname=dirname,
] = paddle.static.load_inference_model(
path_prefix=dirname,
executor=exe,
model_filename=model_filename,
params_filename=params_filename,
......@@ -178,14 +181,23 @@ class ImperativePTQ:
self._remove_scale_op(infer_program)
# Save final program
paddle.fluid.io.save_inference_model(
dirname=dirname,
feeded_var_names=feed_target_names,
target_vars=fetch_targets,
model_name = None
if model_filename is None:
model_name = "model"
elif model_filename.endswith(".pdmodel"):
model_name = model_filename.rsplit(".", 1)[0]
else:
model_name = model_filename
path_prefix = os.path.join(dirname, model_name)
feed_vars = [
infer_program.global_block().var(name) for name in feed_target_names
]
paddle.static.save_inference_model(
path_prefix,
feed_vars,
fetch_targets,
executor=exe,
main_program=infer_program.clone(),
model_filename=model_filename,
params_filename=params_filename,
program=infer_program.clone(),
)
if is_dynamic_mode:
......@@ -302,7 +314,7 @@ class ImperativePTQ:
) and PTQRegistry.is_simulated_quant_layer(sub_layer):
quant_config = sub_layer._quant_config
assert quant_config.enable_in_act_quantizer == True
assert quant_config.enable_in_act_quantizer is True
wt_quantizer = quant_config.wt_quantizer
in_act_quantizer = quant_config.in_act_quantizer
......@@ -376,7 +388,7 @@ class ImperativePTQ:
None
"""
for op in utils.program_all_ops(program):
for in_var_name in utils._get_op_input_var_names(op):
for in_var_name in _get_op_input_var_names(op):
previous_op = utils.find_previous_op(op.block, in_var_name)
if previous_op is None:
continue
......@@ -388,20 +400,16 @@ class ImperativePTQ:
attr_name = previous_op.output('OutScale')[0]
in_threshold = utils.load_variable_data(scope, attr_name)
in_threshold = utils.fp_numpy_to_naive(in_threshold)
argname, index = utils._get_input_name_index(
op, in_var_name
)
argname, index = _get_input_name_index(op, in_var_name)
op._set_attr(
argname + str(index) + "_threshold", in_threshold
)
op._set_attr("with_quant_attr", True)
else:
for out_var_name in utils._get_op_output_var_names(
previous_op
):
for out_var_name in _get_op_output_var_names(previous_op):
if out_var_name != in_var_name:
continue
argname, index = utils._get_output_name_index(
argname, index = _get_output_name_index(
previous_op, out_var_name
)
attr_name = argname + str(index) + "_threshold"
......@@ -409,9 +417,7 @@ class ImperativePTQ:
continue
threshold = previous_op.attr(attr_name)
argname, index = utils._get_input_name_index(
op, in_var_name
)
argname, index = _get_input_name_index(op, in_var_name)
attr_name = argname + str(index) + "_threshold"
op._set_attr(attr_name, threshold)
op._set_attr("with_quant_attr", True)
......@@ -453,10 +459,10 @@ class ImperativePTQ:
continue
next_op = next_ops[0]
argname, index = utils._get_output_name_index(op, out_var_name)
argname, index = _get_output_name_index(op, out_var_name)
old_attr_name = argname + str(index) + "_threshold"
argname, index = utils._get_output_name_index(
argname, index = _get_output_name_index(
next_op, next_op.output("Out")[0]
)
new_attr_name = argname + str(index) + "_threshold"
......@@ -478,7 +484,7 @@ class ImperativePTQ:
@staticmethod
def _is_skip_layer(layer):
return hasattr(layer, "skip_quant") and layer.skip_quant == True
return hasattr(layer, "skip_quant") and layer.skip_quant is True
@staticmethod
def _is_quant_layer(layer):
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import abc
import copy
import paddle
from .ptq_quantizer import *
__all__ = ['PTQConfig', 'default_ptq_config']
from .ptq_quantizer import (
SUPPORT_ACT_QUANTIZERS,
SUPPORT_WT_QUANTIZERS,
KLQuantizer,
PerChannelAbsmaxQuantizer,
)
class PTQConfig:
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,12 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import math
import numpy as np
from . import ptq_config
from .ptq_registry import PTQRegistry
def quant_forward_post_hook(layer, inputs, outputs):
"""
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,24 +13,14 @@
# limitations under the License.
import abc
import copy
import math
import numpy as np
import paddle
from ...static.quantization.cal_kl_threshold import cal_kl_threshold
from . import utils
from ..cal_kl_threshold import cal_kl_threshold
__all__ = [
'BaseQuantizer',
'AbsmaxQuantizer',
'PerChannelAbsmaxQuantizer',
'KLQuantizer',
'HistQuantizer',
'SUPPORT_ACT_QUANTIZERS',
'SUPPORT_WT_QUANTIZERS',
]
def abs_max_value(tensor):
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -14,8 +14,6 @@
import paddle
__all__ = ['PTQRegistry']
class LayerInfo:
"""
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,35 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
import logging
import numpy as np
import sys
import os
import warnings
import paddle
import paddle.nn as nn
import paddle.nn.quant.quant_layers as quant_layers
from paddle.fluid import dygraph, core, framework, unique_name
from paddle.fluid.framework import IrGraph
from paddle.fluid.executor import Executor, global_scope
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Constant
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.fluid.io import load_inference_model, save_inference_model
from ..quantization_pass import ReplaceFakeQuantDequantPass, QuantWeightPass
from paddle.fluid.log_helper import get_logger
from .. import quantization_pass
from ..utils import move_persistable_var_to_global_block
from . import utils
from . import fuse_utils
__all__ = ['ImperativeQuantAware']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
from paddle.framework import core
from ...static.quantization.quantization_pass import (
QuantWeightPass,
ReplaceFakeQuantDequantPass,
)
from ...static.quantization.utils import (
_get_input_name_index,
_get_op_input_var_names,
_get_output_name_index,
move_persistable_var_to_global_block,
)
from . import fuse_utils, utils
INFER_MODEL_SUFFIX = ".pdmodel"
INFER_PARAMS_SUFFIX = ".pdiparams"
def lazy_import_fleet(layer_name_map, fake_quant_input_layers):
......@@ -147,7 +139,7 @@ class ImperativeQuantAware:
.. code-block:: python
import paddle
from paddle.fluid.contrib.slim.quantization \
from paddle.static.quantization \
import ImperativeQuantAware
from paddle.vision.models \
import resnet
......@@ -178,7 +170,7 @@ class ImperativeQuantAware:
.. code-block:: python
import paddle
from paddle.fluid.contrib.slim.quantization \
from paddle.static.quantization \
import ImperativeQuantAware
class ImperativeModel(paddle.nn.Layer):
......@@ -256,7 +248,7 @@ class ImperativeQuantAware:
.. code-block:: python
import paddle
from paddle.fluid.contrib.slim.quantization \
from paddle.static.quantization \
import ImperativeQuantAware
class ImperativeModel(paddle.nn.Layer):
......@@ -288,8 +280,8 @@ class ImperativeQuantAware:
imperative_qat.quantize(model)
"""
assert isinstance(
model, dygraph.Layer
), "The model must be the instance of dygraph.Layer."
model, paddle.nn.Layer
), "The model must be the instance of paddle.nn.Layer."
if self.fuse_conv_bn:
fuse_utils.fuse_conv_bn(model)
......@@ -376,7 +368,7 @@ class ImperativeQuantizeInputs:
), "activation_bits should be 1, 2,... or 16."
layer_check = lambda method: method is None or issubclass(
method, dygraph.layers.Layer
method, paddle.nn.Layer
)
assert layer_check(
weight_preprocess_layer
......@@ -417,13 +409,13 @@ class ImperativeQuantizeInputs:
"""
assert isinstance(
model, dygraph.Layer
), "The model must be the instance of dygraph.Layer."
model, paddle.nn.Layer
), "The model must be the instance of paddle.nn.Layer."
for name, cur_layer in model.named_sublayers():
if not isinstance(cur_layer, self._quantizable_layer_type) or (
hasattr(cur_layer, "skip_quant")
and cur_layer.skip_quant == True
and cur_layer.skip_quant is True
):
continue
......@@ -480,8 +472,8 @@ class ImperativeQuantizeOutputs:
None
"""
assert isinstance(
model, dygraph.Layer
), "The model must be the instance of dygraph.Layer."
model, paddle.nn.Layer
), "The model must be the instance of paddle.nn.Layer."
for cur_name, cur_layer in model.named_sublayers():
if '_act_preprocess' in cur_name:
......@@ -535,8 +527,8 @@ class ImperativeQuantizeOutputs:
None
"""
assert isinstance(
model, dygraph.Layer
), "The model must be the instance of dygraph.Layer."
model, paddle.nn.Layer
), "The model must be the instance of paddle.nn.Layer."
paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config)
......@@ -546,8 +538,8 @@ class ImperativeQuantizeOutputs:
paddle.enable_static()
place = core.CPUPlace()
scope = global_scope()
exe = Executor(place)
scope = paddle.static.global_scope()
exe = paddle.static.Executor(place)
dirname = os.path.dirname(path)
basename = os.path.basename(path)
......@@ -558,8 +550,8 @@ class ImperativeQuantizeOutputs:
infer_program,
feed_target_names,
fetch_targets,
] = load_inference_model(
dirname=dirname,
] = paddle.static.load_inference_model(
dirname,
executor=exe,
model_filename=model_filename,
params_filename=params_filename,
......@@ -600,14 +592,23 @@ class ImperativeQuantizeOutputs:
move_persistable_var_to_global_block(infer_program)
save_inference_model(
dirname=dirname,
feeded_var_names=feed_target_names,
target_vars=fetch_targets,
model_name = None
if model_filename is None:
model_name = "model"
elif model_filename.endswith(".pdmodel"):
model_name = model_filename.rsplit(".", 1)[0]
else:
model_name = model_filename
path_prefix = os.path.join(dirname, model_name)
feed_vars = [
infer_program.global_block().var(name) for name in feed_target_names
]
paddle.static.save_inference_model(
path_prefix,
feed_vars,
fetch_targets,
executor=exe,
main_program=infer_program.clone(),
model_filename=model_filename,
params_filename=params_filename,
program=infer_program.clone(),
clip_extra=clip_extra,
)
......@@ -619,7 +620,7 @@ class ImperativeQuantizeOutputs:
Whether the layer needs to calculate output scales.
"""
# exclude fake_quant ops in quant_layers file
if not isinstance(layer, dygraph.Layer):
if not isinstance(layer, paddle.nn.Layer):
return False
if self._onnx_format:
......@@ -660,7 +661,7 @@ class ImperativeQuantizeOutputs:
target_ops.append(op)
for op in target_ops:
for in_var_name in utils._get_op_input_var_names(op):
for in_var_name in _get_op_input_var_names(op):
previous_op = utils.find_previous_op(op.block, in_var_name)
if previous_op is not None and (
......@@ -670,9 +671,7 @@ class ImperativeQuantizeOutputs:
scale_name = previous_op.output('OutScale')[0]
in_scale = utils.load_variable_data(scope, scale_name)
in_scale = utils.fp_numpy_to_naive(in_scale)
argname, index = utils._get_input_name_index(
op, in_var_name
)
argname, index = _get_input_name_index(op, in_var_name)
op._set_attr(
argname + str(index) + "_threshold", in_scale
)
......@@ -697,7 +696,7 @@ class ImperativeQuantizeOutputs:
out_scale = utils.fp_numpy_to_naive(out_scale)
if previous_op.type != "feed":
res = utils._get_output_name_index(previous_op, in_var_name)
res = _get_output_name_index(previous_op, in_var_name)
if res is not None:
argname, index = res
previous_op._set_attr(
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,19 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import numpy as np
import paddle
import paddle.nn.quant.quant_layers as quant_layers
from ..utils import (
_get_op_input_var_names,
_get_op_output_var_names,
_get_output_name_index,
_get_input_name_index,
)
layer_name_map = {
'Conv2DTranspose': paddle.nn.Conv2DTranspose,
'Conv2D': paddle.nn.Conv2D,
......@@ -42,7 +34,6 @@ layer_name_map = {
'Softmax': paddle.nn.Softmax,
'Swish': paddle.nn.Swish,
'Tanh': paddle.nn.Tanh,
'Hardswish': paddle.nn.Hardswish,
'BatchNorm': paddle.nn.BatchNorm,
'GroupNorm': paddle.nn.GroupNorm,
'LayerNorm': paddle.nn.LayerNorm,
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,7 +12,42 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from . import quantize_transpiler
from .quantize_transpiler import *
import logging
__all__ = quantize_transpiler.__all__
def get_logger(name, level, fmt=None):
"""
Get logger from logging with given name, level and format without
setting logging basicConfig. For setting basicConfig in paddle
will disable basicConfig setting after import paddle.
Args:
name (str): The logger name.
level (logging.LEVEL): The base level of the logger
fmt (str): Format of logger output
Returns:
logging.Logger: logging logger with given settings
Examples:
.. code-block:: python
import paddle
import logging
logger = paddle.static.log_helper.get_logger(__name__, logging.INFO,
fmt='%(asctime)s-%(levelname)s: %(message)s')
"""
logger = logging.getLogger(name)
logger.setLevel(level)
handler = logging.StreamHandler()
if fmt:
formatter = logging.Formatter(fmt=fmt, datefmt='%a %b %d %H:%M:%S')
handler.setFormatter(formatter)
logger.addHandler(handler)
# stop propagate for propagating may print
# log multiple times
logger.propagate = False
return logger
......@@ -12,50 +12,55 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from ...fluid.contrib.slim.quantization.quantization_pass import (
from .quantization_pass import (
QuantizationTransformPass,
)
from ...fluid.contrib.slim.quantization.quantization_pass import (
from .quantization_pass import (
QuantizationFreezePass,
)
from ...fluid.contrib.slim.quantization.quantization_pass import (
from .quantization_pass import (
ConvertToInt8Pass,
)
from ...fluid.contrib.slim.quantization.quantization_pass import (
from .quantization_pass import (
TransformForMobilePass,
)
from ...fluid.contrib.slim.quantization.quantization_pass import (
from .quantization_pass import (
OutScaleForTrainingPass,
)
from ...fluid.contrib.slim.quantization.quantization_pass import (
from .quantization_pass import (
OutScaleForInferencePass,
)
from ...fluid.contrib.slim.quantization.quantization_pass import (
from .quantization_pass import (
AddQuantDequantPass,
)
from ...fluid.contrib.slim.quantization.quantization_pass import (
from .quantization_pass import (
ReplaceFakeQuantDequantPass,
)
from ...fluid.contrib.slim.quantization.quantization_pass import QuantWeightPass
from ...fluid.contrib.slim.quantization.quantization_pass import (
from .quantization_pass import (
QuantWeightPass,
)
from .quantization_pass import (
QuantizationTransformPassV2,
)
from ...fluid.contrib.slim.quantization.quantization_pass import (
from .quantization_pass import (
AddQuantDequantPassV2,
)
from ...fluid.contrib.slim.quantization.quant_int8_mkldnn_pass import (
from .quantization_pass import (
AddQuantDequantForInferencePass,
)
from .quant_int8_mkldnn_pass import (
QuantInt8MkldnnPass,
)
from ...fluid.contrib.slim.quantization.quant2_int8_mkldnn_pass import (
from .quant2_int8_mkldnn_pass import (
Quant2Int8MkldnnPass,
)
from ...fluid.contrib.slim.quantization.post_training_quantization import (
from .post_training_quantization import (
PostTrainingQuantization,
)
from ...fluid.contrib.slim.quantization.post_training_quantization import (
from .post_training_quantization import (
PostTrainingQuantizationProgram,
)
from ...fluid.contrib.slim.quantization.post_training_quantization import (
from .post_training_quantization import (
WeightQuantization,
)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,25 +12,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import time
import sys
import logging
import paddle
import sys
import time
import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.static as static
from ....log_helper import get_logger
from ..log_helper import get_logger
from .utils import (
_channelwise_quant_axis1_ops,
bias_correction_w,
calculate_quant_cos_error,
dequant_tensor,
load_variable_data,
quant_tensor,
set_variable_data,
stable_sigmoid,
quant_tensor,
dequant_tensor,
_channelwise_quant_axis1_ops,
calculate_quant_cos_error,
bias_correction_w,
)
_logger = get_logger(
......@@ -42,7 +42,7 @@ ZETA = 1.1
def compute_soft_rounding(alpha_v):
return fluid.layers.clip(
return paddle.clip(
paddle.nn.functional.sigmoid(alpha_v) * (ZETA - GAMMA) + GAMMA,
min=0,
max=1,
......@@ -83,11 +83,9 @@ class AdaRoundLoss:
return round_loss
round_loss = paddle.static.nn.cond(
round_loss = static.nn.cond(
warm_start,
lambda: fluid.layers.fill_constant(
shape=[1], dtype='float32', value=0.0
),
lambda: paddle.full(shape=[1], dtype='float32', fill_value=0.0),
round_loss_fn,
)
......@@ -151,7 +149,7 @@ class AdaRound:
shape=alpha.shape,
dtype="float32",
name=var_name + ".alpha",
default_initializer=fluid.initializer.NumpyArrayInitializer(alpha),
default_initializer=paddle.nn.initializer.Assign(alpha),
)
def _calculate_output_with_adarounded_weights(
......@@ -258,12 +256,12 @@ def run_adaround(
fetch_op_name = quant_op_out_name
# build adaround program
exec_strategy = fluid.ExecutionStrategy()
exec_strategy = static.ExecutionStrategy()
exec_strategy.num_iteration_per_drop_scope = 1
startup_program = fluid.Program()
train_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
with fluid.unique_name.guard():
startup_program = static.Program()
train_program = static.Program()
with static.program_guard(train_program, startup_program):
with paddle.utils.unique_name.guard():
# initialize adaround
adaround = AdaRound(
scale,
......@@ -273,21 +271,21 @@ def run_adaround(
weight_op_type=weight_op_type,
num_iterations=num_iterations,
)
orig_out_tensor = fluid.data(
orig_out_tensor = static.data(
name='orig_out_tensor',
shape=fp32_fetch_list.shape,
shape=(-1,) + fp32_fetch_list.shape,
dtype='float32',
)
adaround_out_tensor = fluid.data(
adaround_out_tensor = static.data(
name='adaround_out_tensor',
shape=fp32_fetch_list.shape,
shape=(-1,) + fp32_fetch_list.shape,
dtype='float32',
)
beta_tensor = fluid.data(
name='beta', shape=[1], dtype='float32'
beta_tensor = static.data(
name='beta', shape=[-1, 1], dtype='float32'
)
warm_start_tensor = fluid.data(
name='warm_start', shape=[1], dtype='bool'
warm_start_tensor = static.data(
name='warm_start', shape=[-1, 1], dtype='bool'
)
train_fetches_loss = adaround.get_loss(
......@@ -296,7 +294,7 @@ def run_adaround(
adaround_out_tensor,
orig_out_tensor,
)
optimizer = fluid.optimizer.Adam(learning_rate=lr)
optimizer = paddle.optimizer.Adam(learning_rate=lr)
loss = train_fetches_loss['loss']
optimizer.minimize(loss)
exe.run(startup_program)
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -14,15 +14,15 @@
import logging
import math
import numpy as np
from ....log_helper import get_logger
from ..log_helper import get_logger
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
__all__ = ['cal_kl_threshold']
def expand_quantized_bins(quantized_bins, reference_bins):
'''
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,43 +12,37 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import re
import math
import shutil
import logging
import numpy as np
try:
from tqdm import tqdm
except:
from .utils import tqdm
from inspect import isgeneratorfunction
from .... import io
from .... import core
from .... import reader
from .... import framework
from .... import unique_name
from ....executor import global_scope, Executor
from ....framework import IrGraph
from ....log_helper import get_logger
from paddle.fluid.framework import IrGraph, _get_var
from ... import io, static
from ...fluid import reader
from ...framework import core
from ...utils import unique_name
from ..log_helper import get_logger
from . import utils
from .adaround import run_adaround
from .cal_kl_threshold import cal_kl_threshold
from .quantization_pass import (
AddQuantDequantPass,
AddQuantDequantPassV2,
QuantizationFreezePass,
QuantizationTransformPass,
QuantizationTransformPassV2,
QuantizationFreezePass,
QuantWeightPass,
AddQuantDequantPass,
AddQuantDequantPassV2,
)
from .cal_kl_threshold import cal_kl_threshold
from .adaround import run_adaround
from . import utils
__all__ = [
'PostTrainingQuantization',
'WeightQuantization',
'PostTrainingQuantizationProgram',
]
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
......@@ -156,10 +150,10 @@ class PostTrainingQuantization:
Constructor.
Args:
executor(fluid.Executor): The executor to load, run and save the
executor(static.Executor): The executor to load, run and save the
quantized model.
scope(fluid.Scope, optional): The scope of the program, use it to load
and save variables. If scope=None, get scope by global_scope().
scope(static.Scope, optional): The scope of the program, use it to load
and save variables. If scope=None, get scope by static.global_scope().
model_dir(str): The path of the fp32 model that will be quantized,
and the model and params files are under the path.
model_filename(str, optional): The name of file to load the inference
......@@ -245,10 +239,10 @@ class PostTrainingQuantization:
Examples:
.. code-block:: python
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
import paddle.static as static
from paddle.static.quantization import PostTrainingQuantization
exe = fluid.Executor(fluid.CPUPlace())
exe = static.Executor(paddle.CPUPlace())
model_dir = path/to/fp32_model_params
# set model_filename as None when the filename is __model__,
# otherwise set it as the real filename
......@@ -344,7 +338,7 @@ class PostTrainingQuantization:
# Save input params
self._bias_correction = bias_correction
self._executor = executor
self._scope = global_scope() if scope is None else scope
self._scope = static.global_scope() if scope is None else scope
self._model_dir = model_dir
self._model_filename = model_filename
self._params_filename = params_filename
......@@ -537,22 +531,29 @@ class PostTrainingQuantization:
Args:
save_model_path(str): The path to save the quantized model.
model_filename(str, optional): If the model_filename is None,
save the model to '__model__'. Otherwise, save the model
to the specified filename. Default: None.
params_filename(str, optional): If the params_filename is None,
save params to separted files. Otherwise, save all params
to the specified filename.
save the model to 'model.pdmodel' and 'model.pdiparams'. Otherwise, save the model to 'model_name.pdmodel' and
'model_name.pdiparams". Default: None.
Returns:
None
'''
io.save_inference_model(
dirname=save_model_path,
model_filename=model_filename,
params_filename=params_filename,
feeded_var_names=self._feed_list,
target_vars=self._fetch_list,
model_name = None
if model_filename is None:
model_name = "model"
elif model_filename.endswith(".pdmodel"):
model_name = model_filename.rsplit(".", 1)[0]
else:
model_name = model_filename
path_prefix = os.path.join(save_model_path, model_name)
feed_vars = [
self._program.global_block().var(name) for name in self._feed_list
]
static.save_inference_model(
path_prefix,
feed_vars,
self._fetch_list,
executor=self._executor,
main_program=self._program,
program=self._program,
clip_extra=self._clip_extra,
)
_logger.info("The quantized model is saved in " + save_model_path)
......@@ -567,8 +568,8 @@ class PostTrainingQuantization:
self._program,
self._feed_list,
self._fetch_list,
] = io.load_inference_model(
dirname=self._model_dir,
] = static.load_inference_model(
self._model_dir,
executor=self._executor,
model_filename=self._model_filename,
params_filename=self._params_filename,
......@@ -578,7 +579,7 @@ class PostTrainingQuantization:
self._optimize_fp32_model()
feed_vars = [
framework._get_var(str(var_name), self._program)
_get_var(str(var_name), self._program)
for var_name in self._feed_list
]
......@@ -1632,17 +1633,17 @@ class WeightQuantization:
# Load model
place = core.CPUPlace()
exe = Executor(place)
scope = global_scope()
[infer_program, feed_list, fetch_list] = io.load_inference_model(
dirname=self._model_dir,
exe = static.Executor(place)
scope = static.global_scope()
[infer_program, feed_list, fetch_list] = static.load_inference_model(
self._model_dir,
executor=exe,
model_filename=self._model_filename,
params_filename=self._params_filename,
)
# Clone and save fp16 weights
save_program = framework.Program()
save_program = static.Program()
save_block = save_program.global_block()
save_var_map = {}
......@@ -1723,10 +1724,10 @@ class WeightQuantization:
"""
# Load model
place = core.CPUPlace()
exe = Executor(place)
scope = global_scope()
[program, feed_list, fetch_list] = io.load_inference_model(
dirname=self._model_dir,
exe = static.Executor(place)
scope = static.global_scope()
[program, feed_list, fetch_list] = static.load_inference_model(
self._model_dir,
executor=exe,
model_filename=self._model_filename,
params_filename=self._params_filename,
......@@ -1758,15 +1759,22 @@ class WeightQuantization:
self._weight_channel_wise_abs_max_quantization(
scope, place, weight_bits, op, var_name, for_test
)
io.save_inference_model(
dirname=save_model_dir,
feeded_var_names=feed_list,
target_vars=fetch_list,
model_name = None
if save_model_filename is None:
model_name = "model"
elif save_model_filename.endswith(".pdmodel"):
model_name = save_model_filename.rsplit(".", 1)[0]
else:
model_name = save_model_filename
path_prefix = os.path.join(save_model_dir, model_name)
feed_vars = [program.global_block().var(name) for name in feed_list]
static.save_inference_model(
path_prefix,
feed_vars,
fetch_list,
executor=exe,
main_program=program,
model_filename=save_model_filename,
params_filename=save_params_filename,
program=program,
)
def _weight_abs_max_quantization(
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,11 +13,9 @@
# limitations under the License.
import numpy as np
from .... import core
from ....framework import IrGraph
from ....framework import _get_paddle_place
__all__ = ['Quant2Int8MkldnnPass']
from ...fluid.framework import IrGraph
from ...framework import _get_paddle_place, core
OpRole = core.op_proto_and_checker_maker.OpRole
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,12 +13,9 @@
# limitations under the License.
import numpy as np
from .... import core
from ....framework import IrGraph
from ....framework import IrNode
from ....framework import _get_paddle_place
__all__ = ['QuantInt8MkldnnPass']
from ...fluid.framework import IrGraph
from ...framework import _get_paddle_place
class QuantInt8MkldnnPass:
......@@ -40,23 +37,23 @@ class QuantInt8MkldnnPass:
def __init__(self, _scope=None, _place=None):
r"""
Args:
scope(fluid.Scope): scope is used to initialize the new parameters.
place(fluid.CPUPlace|str): place is used to initialize the new parameters.
scope(static.Scope): scope is used to initialize the new parameters.
place(static.CPUPlace|str): place is used to initialize the new parameters.
When it is string, it can be only 'cpu'.
Examples:
.. code-block:: python
# The original graph will be rewrite.
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization \
import paddle.static as static
from paddle.static.quantization \
import QuantInt8MkldnnPass
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
from paddle.framework import core
graph = IrGraph(core.Graph(fluid.Program().desc), for_test=False)
place = fluid.CPUPlace()
mkldnn_pass = QuantInt8MkldnnPass(fluid.global_scope(),
graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = static.CPUPlace()
mkldnn_pass = QuantInt8MkldnnPass(static.global_scope(),
place)
mkldnn_pass.apply(graph)
"""
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -13,39 +13,21 @@
# limitations under the License.
import collections
import numpy as np
try:
from tqdm import tqdm
except:
from .utils import tqdm
from .... import core
from ....framework import IrGraph
from ....framework import IrNode
from ....framework import Operator
from .... import unique_name
from ....framework import Program, program_guard, default_startup_program
from ....data import data
from ....executor import scope_guard
from ....framework import _get_paddle_place
from . import utils
import paddle
__all__ = [
'QuantizationTransformPass',
'QuantizationFreezePass',
'ConvertToInt8Pass',
'TransformForMobilePass',
'OutScaleForTrainingPass',
'OutScaleForInferencePass',
'AddQuantDequantPass',
'QuantizationTransformPassV2',
'AddQuantDequantPassV2',
'ReplaceFakeQuantDequantPass',
'QuantWeightPass',
'AddQuantDequantForInferencePass',
]
from ...fluid.framework import IrGraph, IrNode
from ...framework import _get_paddle_place, core
from ...static import Program, data, program_guard, scope_guard
from ...utils import unique_name
from . import utils
_fake_quant_op_list = [
'fake_quantize_abs_max',
......@@ -137,10 +119,10 @@ class QuantizationTransformPass:
Constructor.
Args:
scope(fluid.Scope): When activation use 'range_abs_max' as the quantize
scope(static.Scope): When activation use 'range_abs_max' as the quantize
type, this pass will create some new parameters. The scope is used to
initialize these new parameters.
place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to initialize new
place(static.CPUPlace|static.CUDAPlace|str): place is used to initialize new
parameters described above. If it's string, It can be ``cpu``, and ``gpu:x``,
where ``x`` is the index of the GPUs.
weight_bits(int): quantization bit number for weights,
......@@ -197,15 +179,15 @@ class QuantizationTransformPass:
Examples:
.. code-block:: python
# The original graph will be rewrite.
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization \
import paddle.static as static
from paddle.static.quantization \
import QuantizationTransformPass
from paddle.fluid.contrib.slim.graph import IrGraph
from paddle.fluid import core
from paddle.fluid.framework import IrGraph
from paddle.framework import core
graph = IrGraph(core.Graph(program.desc), for_test=False)
place = fluid.CPUPlace()
transform_pass = QuantizationTransformPass(fluid.global_scope(),
graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = paddle.CPUPlace()
transform_pass = QuantizationTransformPass(static.global_scope(),
place)
transform_pass.apply(graph)
"""
......@@ -1094,8 +1076,8 @@ class QuantizationFreezePass:
and weight will be scaled offline.
Args:
scope(fluid.Scope): scope is used to get the weight tensor values.
place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to restore the weight tensors.
scope(static.Scope): scope is used to get the weight tensor values.
place(static.CPUPlace|static.CUDAPlace|str): place is used to restore the weight tensors.
If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
bias_correction(bool): whether use bias correction for post-training quantization.
https://arxiv.org/abs/1810.05723.
......@@ -1190,7 +1172,7 @@ class QuantizationFreezePass:
)
quantized_param_v = np.round(quantized_param_v)
# Weight bias correction
if self._bias_correction == True:
if self._bias_correction is True:
quantized_param_v = utils.bias_correction_w(
param_v,
quantized_param_v,
......@@ -1459,8 +1441,8 @@ class ConvertToInt8Pass:
Convert the weights into int8_t type.
Args:
scope(fluid.Scope): scope is used to get the weight tensor values.
place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to restore the
scope(static.Scope): scope is used to get the weight tensor values.
place(static.CPUPlace|static.CUDAPlace|str): place is used to restore the
8bits weight tensors. If it's string, It can be ``cpu``, and ``gpu:x``,
where ``x`` is the index of the GPUs.
quantizable_op_type(list[str]): This input param will be removed latter. The pass
......@@ -1602,8 +1584,8 @@ class OutScaleForTrainingPass:
These output scales may be used by tensorRT or some other inference engines.
Args:
scope(fluid.Scope): The scope is used to initialize these new parameters.
place(fluid.CPUPlace|fluid.CUDAPlace|str): The place is used to initialize new parameters.
scope(static.Scope): The scope is used to initialize these new parameters.
place(static.CPUPlace|static.CUDAPlace|str): The place is used to initialize new parameters.
If it's string, It can be ``cpu``, and ``gpu:x``, where ``x`` is the
index of the GPUs.
moving_rate(float): The decay coefficient of moving average. The default value is 0.9.
......@@ -1764,7 +1746,7 @@ class OutScaleForInferencePass:
These output scales may be used by tensorRT or some other inference engines.
Args:
scope(fluid.Scope): The scope is used to initialize these new parameters.
scope(static.Scope): The scope is used to initialize these new parameters.
"""
self._scope = scope
self._teller_set = utils.QUANT_SUPPORTED_OP_TYPE_LIST
......@@ -1856,8 +1838,8 @@ class AddQuantDequantPass:
Constructor.
Args:
scope(fluid.Scope): The scope is used to initialize these new parameters.
place(fluid.CPUPlace|fluid.CUDAPlace|str): place is used to initialize new
scope(static.Scope): The scope is used to initialize these new parameters.
place(static.CPUPlace|static.CUDAPlace|str): place is used to initialize new
parameters described above. If ``place`` is string, it can be It can be ``cpu``
or ``gpu:x``, where ``x`` is the index of the GPUs.
moving_rate(float, optional): the param for 'quant_dequant_moving_average_abs_max'
......@@ -2452,12 +2434,12 @@ class QuantizationTransformPassV2(QuantizationTransformPass):
.. code-block:: python
# The original graph will be rewrite.
import paddle
from paddle.fluid.contrib.slim.quantization \
from paddle.static.quantization \
import QuantizationTransformPassV2
from paddle.fluid.contrib.slim.graph import IrGraph
from paddle.fluid import core
from paddle.fluid.framework import IrGraph
from paddle.framework import core
graph = IrGraph(core.Graph(program.desc), for_test=False)
graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = paddle.CPUPlace()
scope = paddle.static.global_scope()
transform_pass = QuantizationTransformPassV2(scope, place)
......@@ -2810,12 +2792,12 @@ class AddQuantDequantPassV2:
.. code-block:: python
# The original graph will be rewrite.
import paddle
from paddle.fluid.contrib.slim.quantization \
from paddle.static.quantization \
import AddQuantDequantPassV2
from paddle.fluid.contrib.slim.graph import IrGraph
from paddle.fluid import core
from paddle.fluid.framework import IrGraph
from paddle.framework import core
graph = IrGraph(core.Graph(program.desc), for_test=False)
graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = paddle.CPUPlace()
scope = paddle.static.global_scope()
add_quant_dequant_pass = AddQuantDequantPassV2(scope, place)
......@@ -2977,12 +2959,12 @@ class ReplaceFakeQuantDequantPass:
.. code-block:: python
# The original graph will be rewrite.
import paddle
from paddle.fluid.contrib.slim.quantization \
from paddle.static.quantization \
import ReplaceFakeQuantDequantPass
from paddle.fluid.contrib.slim.graph import IrGraph
from paddle.fluid import core
from paddle.fluid.framework import IrGraph
from paddle.framework import core
graph = IrGraph(core.Graph(program.desc), for_test=False)
graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = paddle.CPUPlace()
scope = paddle.static.global_scope()
replace_pass = ReplaceFakeQuantDequantPass(scope, place)
......@@ -3133,12 +3115,12 @@ class QuantWeightPass:
.. code-block:: python
# The original graph will be rewrite.
import paddle
from paddle.fluid.contrib.slim.quantization \
from paddle.static.quantization \
import QuantWeightPass
from paddle.fluid.contrib.slim.graph import IrGraph
from paddle.fluid import core
from paddle.fluid.framework import IrGraph
from paddle.framework import core
graph = IrGraph(core.Graph(program.desc), for_test=False)
graph = IrGraph(core.Graph(paddle.static.Program().desc), for_test=False)
place = paddle.CPUPlace()
scope = paddle.static.global_scope()
quant_weight_pass = QuantWeightPass(scope, place)
......@@ -3207,7 +3189,7 @@ class QuantWeightPass:
bits_length,
onnx_format=True,
)
if self._bias_correction == True:
if self._bias_correction is True:
quantized_param_v = utils.bias_correction_w(
param_v,
quantized_param_v,
......@@ -3264,7 +3246,7 @@ class AddQuantDequantForInferencePass:
def __init__(self, scope, place, quant_bits=8):
"""
Args:
scope(fluid.Scope): The scope is used to initialize these new parameters.
scope(static.Scope): The scope is used to initialize these new parameters.
place(paddle.CPUPlace|paddle.CUDAPlace|str): place is used to restore the weight tensors.
If it's string, it can be ``cpu``, and ``gpu:x``, where ``x`` is the index of the GPUs.
quant_bits(int, optional): quantization bit number for weight. Default is 8.
......
......@@ -250,7 +250,6 @@ if(WIN32)
list(REMOVE_ITEM TEST_OPS test_post_training_quantization_lstm_model)
list(REMOVE_ITEM TEST_OPS test_imperative_ptq)
list(REMOVE_ITEM TEST_OPS test_weight_quantization_mobilenetv1)
list(REMOVE_ITEM TEST_OPS test_quantize_transpiler_v2)
list(REMOVE_ITEM TEST_OPS test_imperative_qat_amp)
list(REMOVE_ITEM TEST_OPS test_imperative_qat_lsq)
list(REMOVE_ITEM TEST_OPS test_imperative_qat_matmul)
......
......@@ -91,17 +91,18 @@ Having gathered all the data needed for quantization we apply the `cpu_quantize_
The code snipped shows how the `Quant2Int8MkldnnPass` can be applied to a model graph:
```python
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass
import paddle
import paddle.static as static
from paddle.static.quantization import Quant2Int8MkldnnPass
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
from paddle.framework import core
# Create the IrGraph by Program
graph = IrGraph(core.Graph(fluid.Program().desc), for_test=False)
place = fluid.CPUPlace()
graph = IrGraph(core.Graph(static.Program().desc), for_test=False)
place = paddle.CPUPlace()
# Convert the IrGraph to MKL-DNN supported INT8 IrGraph using the
# Quant2Int8MkldnnPass. It requires a list of operators to be quantized
mkldnn_pass = Quant2Int8MkldnnPass({'conv2d', 'pool2d'}, fluid.global_scope(), place, fluid.core, False)
mkldnn_pass = Quant2Int8MkldnnPass({'conv2d', 'pool2d'}, static.global_scope(), place, core, False)
# Apply Quant2Int8MkldnnPass to IrGraph
mkldnn_pass.apply(graph)
......@@ -263,7 +264,7 @@ The following options are also accepted:
```bash
cd /PATH/TO/PADDLE
OMP_NUM_THREADS=28 FLAGS_use_mkldnn=true python python/paddle/fluid/contrib/slim/tests/quant2_int8_image_classification_comparison.py --quant_model=/PATH/TO/DOWNLOADED/QUANT/MODEL --fp32_model=/PATH/TO/DOWNLOADED/FP32/MODEL --infer_data=$HOME/.cache/paddle/dataset/int8/download/int8_full_val.bin --batch_size=50 --batch_num=1000 --acc_diff_threshold=0.01 --ops_to_quantize="conv2d,pool2d"
OMP_NUM_THREADS=28 FLAGS_use_mkldnn=true python python/paddle/static/quantization/slim/tests/quant2_int8_image_classification_comparison.py --quant_model=/PATH/TO/DOWNLOADED/QUANT/MODEL --fp32_model=/PATH/TO/DOWNLOADED/FP32/MODEL --infer_data=$HOME/.cache/paddle/dataset/int8/download/int8_full_val.bin --batch_size=50 --batch_num=1000 --acc_diff_threshold=0.01 --ops_to_quantize="conv2d,pool2d"
```
> Notes: Due to a large amount of images in the `int8_full_val.bin` dataset (50 000), the accuracy benchmark may last long. To accelerate accuracy measuring, it is recommended to set `OMP_NUM_THREADS` to the maximum number of physical cores available on the server.
......@@ -276,7 +277,7 @@ To reproduce the performance results, the environment variable `OMP_NUM_THREADS=
```bash
cd /PATH/TO/PADDLE/build
python ../python/paddle/fluid/contrib/slim/tests/save_quant_model.py --quant_model_path=/PATH/TO/DOWNLOADED/QUANT/MODEL --int8_model_save_path=/PATH/TO/SAVE/QUANT/INT8/MODEL --ops_to_quantize="conv2d,pool2d"
python ../python/paddle/static/quantization/slim/tests/save_quant_model.py --quant_model_path=/PATH/TO/DOWNLOADED/QUANT/MODEL --int8_model_save_path=/PATH/TO/SAVE/QUANT/INT8/MODEL --ops_to_quantize="conv2d,pool2d"
```
2. Run the C-API test for performance benchmark.
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......
# copyright (c) 2020 paddlepaddle authors. all rights reserved.
# copyright (c) 2022 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
......@@ -12,14 +12,14 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import argparse
import os
import sys
import argparse
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
import unittest
import paddle
from paddle.fluid.framework import IrGraph
from paddle.framework import core
paddle.enable_static()
......@@ -47,29 +47,32 @@ def parse_args():
def generate_dot_for_model(model_path, save_graph_dir, save_graph_name):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.executor.global_scope()
with fluid.scope_guard(inference_scope):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
inference_scope = paddle.static.global_scope()
with paddle.static.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')):
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(model_path, exe)
] = paddle.fluid.io.load_inference_model(model_path, exe)
else:
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(
model_path, exe, 'model', 'params'
] = paddle.static.load_inference_model(
model_path,
exe,
model_filename='model',
params_filename='params',
)
graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
if not os.path.exists(save_graph_dir):
os.makedirs(save_graph_dir)
model_name = os.path.basename(os.path.normpath(save_graph_dir))
if save_graph_name is '':
if save_graph_name == '':
save_graph_name = model_name
graph.draw(save_graph_dir, save_graph_name, graph.all_op_nodes())
print(
......
......@@ -11,18 +11,27 @@
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
import numpy as np
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.nn import Sequential
from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
from paddle.nn import BatchNorm1D
import numpy as np
from paddle.fluid.log_helper import get_logger
import paddle
from paddle.framework import ParamAttr
from paddle.nn import (
BatchNorm1D,
BatchNorm2D,
Conv2D,
LeakyReLU,
Linear,
MaxPool2D,
PReLU,
ReLU,
ReLU6,
Sequential,
Sigmoid,
Softmax,
)
from paddle.static.log_helper import get_logger
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
......@@ -86,18 +95,18 @@ def train_lenet(lenet, reader, optimizer):
return loss_list
class ImperativeLenet(fluid.dygraph.Layer):
class ImperativeLenet(paddle.nn.Layer):
def __init__(self, num_classes=10):
super().__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
conv2d_w1_attr = ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = ParamAttr(name="conv2d_w_2")
fc_w1_attr = ParamAttr(name="fc_w_1")
fc_w2_attr = ParamAttr(name="fc_w_2")
fc_w3_attr = ParamAttr(name="fc_w_3")
conv2d_b2_attr = ParamAttr(name="conv2d_b_2")
fc_b1_attr = ParamAttr(name="fc_b_1")
fc_b2_attr = ParamAttr(name="fc_b_2")
fc_b3_attr = ParamAttr(name="fc_b_3")
self.features = Sequential(
Conv2D(
in_channels=1,
......@@ -155,26 +164,26 @@ class ImperativeLenet(fluid.dygraph.Layer):
x = self.quant_stub(inputs)
x = self.features(x)
x = paddle.flatten(x, 1, -1)
x = paddle.flatten(x, 1)
x = self.add(x, paddle.to_tensor(0.0)) # For CI
x = self.fc(x)
return x
class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
class ImperativeLenetWithSkipQuant(paddle.nn.Layer):
def __init__(self, num_classes=10):
super().__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
conv2d_w1_attr = ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = ParamAttr(name="conv2d_w_2")
fc_w1_attr = ParamAttr(name="fc_w_1")
fc_w2_attr = ParamAttr(name="fc_w_2")
fc_w3_attr = ParamAttr(name="fc_w_3")
conv2d_b1_attr = ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = ParamAttr(name="conv2d_b_2")
fc_b1_attr = ParamAttr(name="fc_b_1")
fc_b2_attr = ParamAttr(name="fc_b_2")
fc_b3_attr = ParamAttr(name="fc_b_3")
self.conv2d_0 = Conv2D(
in_channels=1,
out_channels=6,
......@@ -240,8 +249,7 @@ class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
x = self.relu6_0(x)
x = self.pool2d_1(x)
x = paddle.flatten(x, 1, -1)
x = paddle.flatten(x, 1)
x = self.linear_0(x)
x = self.leaky_relu_0(x)
x = self.linear_1(x)
......@@ -252,7 +260,7 @@ class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
return x
class ImperativeLinearBn(fluid.dygraph.Layer):
class ImperativeLinearBn(paddle.nn.Layer):
def __init__(self):
super().__init__()
......@@ -284,7 +292,7 @@ class ImperativeLinearBn(fluid.dygraph.Layer):
return x
class ImperativeLinearBn_hook(fluid.dygraph.Layer):
class ImperativeLinearBn_hook(paddle.nn.Layer):
def __init__(self):
super().__init__()
......
......@@ -12,19 +12,20 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import os
import sys
import argparse
import logging
import os
import struct
import numpy as np
import sys
import time
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass
from paddle.fluid import core
from paddle.framework import core
from paddle.static.quantization import Quant2Int8MkldnnPass
paddle.enable_static()
......@@ -185,23 +186,26 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase):
target='quant',
):
assert target in ['quant', 'int8', 'fp32']
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.executor.global_scope()
with fluid.scope_guard(inference_scope):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
inference_scope = paddle.static.global_scope()
with paddle.static.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')):
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(model_path, exe)
] = paddle.fluid.io.load_inference_model(model_path, exe)
else:
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(
model_path, exe, 'model', 'params'
] = paddle.static.load_inference_model(
model_path,
exe,
model_filename='model',
params_filename='params',
)
graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
......@@ -359,7 +363,7 @@ class Quant2Int8ImageClassificationComparisonTest(unittest.TestCase):
return set(map(int, string.split(',')))
def test_graph_transformation(self):
if not fluid.core.is_compiled_with_mkldnn():
if not core.is_compiled_with_mkldnn():
return
quant_model_path = test_case_args.quant_model
......
......@@ -13,15 +13,17 @@
# limitations under the License.
import argparse
import numpy as np
import struct
import sys
import time
import unittest
from paddle import fluid
from paddle.fluid.core import AnalysisConfig, create_paddle_predictor
import numpy as np
from save_quant_model import transform_and_save_int8_model
import paddle
from paddle.framework import core
def parse_args():
parser = argparse.ArgumentParser()
......@@ -80,17 +82,19 @@ class TestLstmModelPTQ(unittest.TestCase):
[len(feat) // 4 // 8, 8]
)
lod_feat = [feat.shape[0]]
minputs = fluid.create_lod_tensor(feat, [lod_feat], place)
minputs = paddle.fluid.create_lod_tensor(
feat, [lod_feat], place
)
infer_data = fluid.core.PaddleTensor()
infer_data = core.PaddleTensor()
infer_data.lod = minputs.lod()
infer_data.data = fluid.core.PaddleBuf(np.array(minputs))
infer_data.data = core.PaddleBuf(np.array(minputs))
infer_data.shape = minputs.shape()
infer_data.dtype = fluid.core.PaddleDType.FLOAT32
infer_label = fluid.core.PaddleTensor()
infer_label.data = fluid.core.PaddleBuf(np.array(label))
infer_data.dtype = core.PaddleDType.FLOAT32
infer_label = core.PaddleTensor()
infer_label.data = core.PaddleBuf(np.array(label))
infer_label.shape = label.shape
infer_label.dtype = fluid.core.PaddleDType.INT32
infer_label.dtype = core.PaddleDType.INT32
data.append([infer_data, infer_label])
warmup_data = data[:1]
inputs = data[1:]
......@@ -105,7 +109,7 @@ class TestLstmModelPTQ(unittest.TestCase):
use_analysis=False,
enable_ptq=False,
):
config = AnalysisConfig(model_path)
config = core.AnalysisConfig(model_path)
config.set_cpu_math_library_num_threads(num_threads)
if use_analysis:
config.disable_gpu()
......@@ -132,7 +136,7 @@ class TestLstmModelPTQ(unittest.TestCase):
use_analysis=False,
enable_ptq=False,
):
place = fluid.CPUPlace()
place = paddle.CPUPlace()
warmup_data, inputs = self.get_warmup_tensor(data_path, place)
warmup_data = [item[0] for item in warmup_data]
config = self.set_config(
......@@ -144,7 +148,7 @@ class TestLstmModelPTQ(unittest.TestCase):
enable_ptq,
)
predictor = create_paddle_predictor(config)
predictor = core.create_paddle_predictor(config)
data = [item[0] for item in inputs]
label = np.array([item[1] for item in inputs])
......@@ -197,7 +201,7 @@ class TestLstmModelPTQ(unittest.TestCase):
return hx_acc, ctc_acc, fps
def test_lstm_model(self):
if not fluid.core.is_compiled_with_mkldnn():
if not core.is_compiled_with_mkldnn():
return
fp32_model = test_case_args.fp32_model
......
......@@ -12,18 +12,19 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import os
import sys
import argparse
import logging
import numpy as np
import os
import sys
import time
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass
from paddle.fluid import core
from paddle.framework import core
from paddle.static.quantization import Quant2Int8MkldnnPass
paddle.enable_static()
......@@ -158,23 +159,26 @@ class QuantInt8NLPComparisonTest(unittest.TestCase):
target='quant',
):
assert target in ['quant', 'int8', 'fp32']
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.executor.global_scope()
with fluid.scope_guard(inference_scope):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
inference_scope = paddle.static.global_scope()
with paddle.static.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')):
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(model_path, exe)
] = paddle.fluid.io.load_inference_model(model_path, exe)
else:
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(
model_path, exe, 'model', 'params'
] = paddle.static.load_inference_model(
model_path,
exe,
model_filename='model',
params_filename='params',
)
graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
......@@ -296,7 +300,7 @@ class QuantInt8NLPComparisonTest(unittest.TestCase):
return set(map(int, string.split(',')))
def test_graph_transformation(self):
if not fluid.core.is_compiled_with_mkldnn():
if not core.is_compiled_with_mkldnn():
return
quant_model_path = test_case_args.quant_model
......
......@@ -12,19 +12,20 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import os
import sys
import argparse
import logging
import os
import struct
import numpy as np
import sys
import time
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import QuantInt8MkldnnPass
from paddle.fluid import core
from paddle.framework import core
from paddle.static.quantization import QuantInt8MkldnnPass
paddle.enable_static()
......@@ -163,23 +164,26 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase):
skip_batch_num=0,
transform_to_int8=False,
):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.executor.global_scope()
with fluid.scope_guard(inference_scope):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
inference_scope = paddle.static.global_scope()
with paddle.static.scope_guard(inference_scope):
if os.path.exists(os.path.join(model_path, '__model__')):
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(model_path, exe)
] = paddle.fluid.io.load_inference_model(model_path, exe)
else:
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(
model_path, exe, 'model', 'params'
] = paddle.static.load_inference_model(
model_path,
exe,
model_filename='model',
params_filename='params',
)
graph = IrGraph(core.Graph(inference_program.desc), for_test=True)
......@@ -298,7 +302,7 @@ class QuantInt8ImageClassificationComparisonTest(unittest.TestCase):
assert fp32_acc1 - int8_acc1 <= threshold
def test_graph_transformation(self):
if not fluid.core.is_compiled_with_mkldnn():
if not core.is_compiled_with_mkldnn():
return
quant_model_path = test_case_args.quant_model
......
......@@ -12,15 +12,15 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import unittest
import argparse
import os
import sys
import argparse
import unittest
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import Quant2Int8MkldnnPass
from paddle.fluid import core
from paddle.framework import core
from paddle.static.quantization import Quant2Int8MkldnnPass
paddle.enable_static()
......@@ -93,35 +93,41 @@ def transform_and_save_int8_model(
debug=False,
quant_model_filename='',
quant_params_filename='',
save_model_filename="__model__",
save_model_filename="model",
save_params_filename=None,
):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.executor.global_scope()
with fluid.scope_guard(inference_scope):
place = paddle.CPUPlace()
exe = paddle.static.Executor(place)
inference_scope = paddle.static.global_scope()
with paddle.static.scope_guard(inference_scope):
if not quant_model_filename:
if os.path.exists(os.path.join(original_path, '__model__')):
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(original_path, exe)
] = paddle.fluid.io.load_inference_model(original_path, exe)
else:
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(
original_path, exe, 'model', 'params'
] = paddle.static.load_inference_model(
original_path,
exe,
model_filename='model',
params_filename='params',
)
else:
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(
original_path, exe, quant_model_filename, quant_params_filename
] = paddle.static.load_inference_model(
original_path,
exe,
model_filename=quant_model_filename,
params_filename=quant_params_filename,
)
ops_to_quantize_set = set()
......@@ -147,15 +153,18 @@ def transform_and_save_int8_model(
)
graph = transform_to_mkldnn_int8_pass.apply(graph)
inference_program = graph.to_program()
with fluid.scope_guard(inference_scope):
fluid.io.save_inference_model(
save_path,
feed_target_names,
with paddle.static.scope_guard(inference_scope):
path_prefix = os.path.join(save_path, save_model_filename)
feed_vars = [
inference_program.global_block().var(name)
for name in feed_target_names
]
paddle.static.save_inference_model(
path_prefix,
feed_vars,
fetch_targets,
exe,
inference_program,
model_filename=save_model_filename,
params_filename=save_params_filename,
executor=exe,
program=inference_program,
)
print(
"Success! INT8 model obtained from the Quant model can be found at {}\n".format(
......
......@@ -13,12 +13,13 @@
# limitations under the license.
import os
import numpy as np
import unittest
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
from paddle.framework import core
paddle.enable_static()
......@@ -27,63 +28,68 @@ os.environ["CPU_NUM"] = "1"
def conv_block():
img = fluid.layers.data(name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
conv_pool_1 = fluid.nets.simple_img_conv_pool(
img = paddle.static.data(
name='image', shape=[-1, 1, 28, 28], dtype='float32'
)
label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
conv_out_1 = paddle.static.nn.conv2d(
input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu",
act='relu',
)
conv_pool_1 = paddle.nn.functional.max_pool2d(
conv_out_1, kernel_size=2, stride=2
)
conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
conv_pool_2 = fluid.nets.simple_img_conv_pool(
conv_out_2 = paddle.static.nn.conv2d(
input=conv_pool_1,
filter_size=5,
num_filters=50,
pool_size=2,
pool_stride=2,
act="relu",
num_filters=20,
act='relu',
)
conv_pool_2 = paddle.nn.functional.max_pool2d(
conv_out_2, kernel_size=2, stride=2
)
prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
loss = paddle.nn.functional.cross_entropy(
input=prediction, label=label, reduction='none', use_softmax=False
prediction = paddle.static.nn.fc(
x=conv_pool_2, size=10, activation='softmax'
)
loss = paddle.nn.functional.cross_entropy(input=prediction, label=label)
avg_loss = paddle.mean(loss)
return [img, label], avg_loss
class TestGraph(unittest.TestCase):
def graph_apis(self, use_cuda=False, for_ci=True):
main = fluid.Program()
startup = fluid.Program()
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
main = paddle.static.Program()
startup = paddle.static.Program()
with paddle.utils.unique_name.guard():
with paddle.static.program_guard(main, startup):
feeds, loss = conv_block()
opt = fluid.optimizer.Adam(learning_rate=0.001)
opt = paddle.optimizer.Adam(learning_rate=0.001)
opt.minimize(loss)
graph = IrGraph(core.Graph(main.desc), for_test=False)
backup_graph = graph.clone()
self.assertEqual(len(graph.all_nodes()), len(backup_graph.all_nodes()))
build_strategy = fluid.BuildStrategy()
build_strategy = paddle.static.BuildStrategy()
build_strategy.memory_optimize = False
build_strategy.enable_inplace = False
origin_binary = fluid.CompiledProgram(graph.graph).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy
)
backup_binary = fluid.CompiledProgram(
origin_binary = paddle.static.CompiledProgram(
graph.graph
).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
backup_binary = paddle.static.CompiledProgram(
backup_graph.graph
).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
exe = paddle.static.Executor(place)
exe.run(startup)
iters = 5
batch_size = 8
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=batch_size
)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
feeder = paddle.fluid.DataFeeder(feed_list=feeds, place=place)
def _train(binary):
for _ in range(iters):
......@@ -105,17 +111,29 @@ class TestGraph(unittest.TestCase):
var.set(var_array, place)
sum_before = np.sum(
np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())
np.array(
paddle.static.global_scope()
.find_var('conv2d_1.w_0')
.get_tensor()
)
fluid.io._save_persistable_nodes(exe, checkponit_dir, graph)
_set_zero('conv2d_1.w_0', fluid.global_scope(), place)
)
paddle.fluid.io._save_persistable_nodes(exe, checkponit_dir, graph)
_set_zero('conv2d_1.w_0', paddle.static.global_scope(), place)
set_after = np.sum(
np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())
np.array(
paddle.static.global_scope()
.find_var('conv2d_1.w_0')
.get_tensor()
)
)
self.assertEqual(set_after, 0)
fluid.io._load_persistable_nodes(exe, checkponit_dir, graph)
paddle.fluid.io._load_persistable_nodes(exe, checkponit_dir, graph)
sum_after = np.sum(
np.array(fluid.global_scope().find_var('conv2d_1.w_0').get_tensor())
np.array(
paddle.static.global_scope()
.find_var('conv2d_1.w_0')
.get_tensor()
)
)
self.assertEqual(sum_before, sum_after)
......@@ -144,7 +162,7 @@ class TestGraph(unittest.TestCase):
self.graph_apis(use_cuda=False, for_ci=True)
def test_graph_apis_cuda(self):
if fluid.core.is_compiled_with_cuda():
if core.is_compiled_with_cuda():
self.graph_apis(use_cuda=True, for_ci=True)
......
......@@ -13,38 +13,31 @@
# limitations under the license.
import os
import numpy as np
import random
import unittest
import logging
import warnings
import tempfile
import unittest
import numpy as np
from imperative_test_utils import fix_model_dict, train_lenet
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid import core
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.nn import Sequential
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, PReLU
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
from paddle.fluid.log_helper import get_logger
from paddle.fluid.dygraph import nn
from imperative_test_utils import fix_model_dict, train_lenet
from paddle.framework import core, set_flags
from paddle.nn import (
BatchNorm2D,
Conv2D,
Linear,
MaxPool2D,
Sequential,
Softmax,
)
from paddle.nn.layer import LeakyReLU, PReLU, ReLU, Sigmoid
from paddle.quantization import ImperativeQuantAware
paddle.enable_static()
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
set_flags({"FLAGS_cudnn_deterministic": True})
def get_vaild_warning_num(warning, w):
......@@ -55,18 +48,18 @@ def get_vaild_warning_num(warning, w):
return num
class ImperativeLenet(fluid.dygraph.Layer):
class ImperativeLenet(paddle.nn.Layer):
def __init__(self, num_classes=10):
super().__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
conv2d_w1_attr = paddle.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = paddle.ParamAttr(name="conv2d_w_2")
fc_w1_attr = paddle.ParamAttr(name="fc_w_1")
fc_w2_attr = paddle.ParamAttr(name="fc_w_2")
fc_w3_attr = paddle.ParamAttr(name="fc_w_3")
conv2d_b2_attr = paddle.ParamAttr(name="conv2d_b_2")
fc_b1_attr = paddle.ParamAttr(name="fc_b_1")
fc_b2_attr = paddle.ParamAttr(name="fc_b_2")
fc_b3_attr = paddle.ParamAttr(name="fc_b_3")
self.features = Sequential(
Conv2D(
in_channels=1,
......@@ -121,7 +114,7 @@ class ImperativeLenet(fluid.dygraph.Layer):
def forward(self, inputs):
x = self.features(inputs)
x = paddle.flatten(x, 1, -1)
x = paddle.flatten(x, 1)
x = self.fc(x)
return x
......@@ -152,8 +145,8 @@ class TestImperativeOutSclae(unittest.TestCase):
with fluid.dygraph.guard():
np.random.seed(seed)
fluid.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed
paddle.static.default_main_program().random_seed = seed
paddle.static.default_startup_program().random_seed = seed
lenet = ImperativeLenet()
lenet = fix_model_dict(lenet)
......@@ -162,8 +155,8 @@ class TestImperativeOutSclae(unittest.TestCase):
reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True
)
adam = AdamOptimizer(
learning_rate=lr, parameter_list=lenet.parameters()
adam = paddle.optimizer.Adam(
learning_rate=lr, parameters=lenet.parameters()
)
loss_list = train_lenet(lenet, reader, adam)
lenet.eval()
......@@ -186,8 +179,8 @@ class TestImperativeOutSclae(unittest.TestCase):
reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True
)
adam = AdamOptimizer(
learning_rate=lr, parameter_list=lenet.parameters()
adam = paddle.optimizer.Adam(
learning_rate=lr, parameters=lenet.parameters()
)
loss_list = train_lenet(lenet, reader, adam)
lenet.eval()
......
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
# copyright (c) 2022 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
......@@ -12,29 +12,32 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import logging
import os
import numpy as np
import random
import shutil
import tempfile
import time
import unittest
import copy
import logging
import tempfile
import paddle.nn as nn
import paddle
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import *
from paddle.fluid.log_helper import get_logger
from paddle.dataset.common import download
import numpy as np
from imperative_test_utils import (
fix_model_dict,
ImperativeLenet,
ImperativeLinearBn,
ImperativeLinearBn_hook,
)
from imperative_test_utils import ImperativeLinearBn_hook
import paddle
import paddle.nn as nn
from paddle.dataset.common import download
from paddle.fluid.framework import _test_eager_guard
from paddle.quantization import (
AbsmaxQuantizer,
HistQuantizer,
ImperativePTQ,
KLQuantizer,
PerChannelAbsmaxQuantizer,
PTQConfig,
)
from paddle.static.log_helper import get_logger
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
......@@ -149,8 +152,8 @@ class TestImperativePTQ(unittest.TestCase):
label = paddle.to_tensor(y_data)
out = model(img)
acc_top1 = paddle.static.accuracy(input=out, label=label, k=1)
acc_top5 = paddle.static.accuracy(input=out, label=label, k=5)
acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
eval_acc_top1_list.append(float(acc_top1.numpy()))
if batch_id % 50 == 0:
......@@ -207,7 +210,7 @@ class TestImperativePTQ(unittest.TestCase):
break
return top1_correct_num / total_num
def test_ptq(self):
def func_ptq(self):
start_time = time.time()
self.set_vars()
......@@ -265,9 +268,14 @@ class TestImperativePTQ(unittest.TestCase):
end_time = time.time()
print("total time: %ss \n" % (end_time - start_time))
def test_ptq(self):
with _test_eager_guard():
self.func_ptq()
self.func_ptq()
class TestImperativePTQfuse(TestImperativePTQ):
def test_ptq(self):
def func_ptq(self):
start_time = time.time()
self.set_vars()
......@@ -336,6 +344,11 @@ class TestImperativePTQfuse(TestImperativePTQ):
end_time = time.time()
print("total time: %ss \n" % (end_time - start_time))
def test_ptq(self):
with _test_eager_guard():
self.func_ptq()
self.func_ptq()
class TestImperativePTQHist(TestImperativePTQ):
def set_vars(self):
......
......@@ -12,34 +12,34 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import logging
import os
import numpy as np
import random
import time
import tempfile
import unittest
import logging
import numpy as np
from imperative_test_utils import ImperativeLenet, fix_model_dict
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.nn import Sequential
from paddle.nn import Linear, Conv2D, Softmax, Conv2DTranspose
from paddle.fluid.log_helper import get_logger
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.framework import core, set_flags
from paddle.nn import Conv2D, Conv2DTranspose
from paddle.nn.quant.quant_layers import (
QuantizedConv2D,
QuantizedConv2DTranspose,
)
from imperative_test_utils import fix_model_dict, ImperativeLenet
from paddle.optimizer import Adam
from paddle.quantization import ImperativeQuantAware
from paddle.static.log_helper import get_logger
INFER_MODEL_SUFFIX = ".pdmodel"
INFER_PARAMS_SUFFIX = ".pdiparams"
paddle.enable_static()
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
......@@ -84,7 +84,7 @@ class TestImperativeQat(unittest.TestCase):
)
quant_conv1 = QuantizedConv2D(conv1)
data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
quant_conv1(fluid.dygraph.to_variable(data))
quant_conv1(paddle.to_tensor(data))
conv_transpose = Conv2DTranspose(4, 6, (3, 3))
quant_conv_transpose = QuantizedConv2DTranspose(conv_transpose)
......@@ -95,15 +95,13 @@ class TestImperativeQat(unittest.TestCase):
seed = 1
np.random.seed(seed)
fluid.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed
paddle.static.default_main_program().random_seed = seed
paddle.static.default_startup_program().random_seed = seed
lenet = ImperativeLenet()
lenet = fix_model_dict(lenet)
imperative_qat.quantize(lenet)
adam = AdamOptimizer(
learning_rate=0.001, parameter_list=lenet.parameters()
)
adam = Adam(learning_rate=0.001, parameters=lenet.parameters())
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=32, drop_last=True
......@@ -125,10 +123,10 @@ class TestImperativeQat(unittest.TestCase):
.reshape(-1, 1)
)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
img = paddle.to_tensor(x_data)
label = paddle.to_tensor(y_data)
out = lenet(img)
acc = paddle.static.accuracy(out, label)
acc = paddle.metric.accuracy(out, label)
loss = paddle.nn.functional.cross_entropy(
out, label, reduction='none', use_softmax=False
)
......@@ -157,14 +155,14 @@ class TestImperativeQat(unittest.TestCase):
.reshape(-1, 1)
)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
img = paddle.to_tensor(x_data)
label = paddle.to_tensor(y_data)
out = lenet(img)
acc_top1 = paddle.static.accuracy(
acc_top1 = paddle.metric.accuracy(
input=out, label=label, k=1
)
acc_top5 = paddle.static.accuracy(
acc_top5 = paddle.metric.accuracy(
input=out, label=label, k=5
)
......@@ -197,11 +195,11 @@ class TestImperativeQat(unittest.TestCase):
y_data = (
np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
)
test_img = fluid.dygraph.to_variable(test_data)
label = fluid.dygraph.to_variable(y_data)
test_img = paddle.to_tensor(test_data)
label = paddle.to_tensor(y_data)
lenet.eval()
fp32_out = lenet(test_img)
fp32_acc = paddle.static.accuracy(fp32_out, label).numpy()
fp32_acc = paddle.metric.accuracy(fp32_out, label).numpy()
with tempfile.TemporaryDirectory(prefix="qat_save_path_") as tmpdir:
# save inference quantized model
......@@ -220,13 +218,13 @@ class TestImperativeQat(unittest.TestCase):
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = fluid.Executor(place)
exe = paddle.static.Executor(place)
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(
dirname=tmpdir,
] = paddle.static.load_inference_model(
tmpdir,
executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX,
......@@ -237,8 +235,8 @@ class TestImperativeQat(unittest.TestCase):
fetch_list=fetch_targets,
)
paddle.disable_static()
quant_out = fluid.dygraph.to_variable(quant_out)
quant_acc = paddle.static.accuracy(quant_out, label).numpy()
quant_out = paddle.to_tensor(quant_out)
quant_acc = paddle.metric.accuracy(quant_out, label).numpy()
paddle.enable_static()
delta_value = fp32_acc - quant_acc
self.assertLessEqual(delta_value, self.diff_threshold)
......
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
# copyright (c) 2022 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
......@@ -12,25 +12,25 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import logging
import os
import numpy as np
import random
import shutil
import tempfile
import time
import unittest
import logging
import tempfile
import numpy as np
from imperative_test_utils import ImperativeLenet
import paddle
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.log_helper import get_logger
from paddle.dataset.common import download
from imperative_test_utils import fix_model_dict, ImperativeLenet
from paddle.framework import set_flags
from paddle.quantization import ImperativeQuantAware
from paddle.static.log_helper import get_logger
os.environ["CPU_NUM"] = "1"
if paddle.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
......@@ -117,7 +117,7 @@ class TestImperativeQatAmp(unittest.TestCase):
if use_amp:
with paddle.amp.auto_cast():
out = model(img)
acc = paddle.static.accuracy(out, label)
acc = paddle.metric.accuracy(out, label)
loss = paddle.nn.functional.cross_entropy(
out, label, reduction='none', use_softmax=False
)
......@@ -129,7 +129,7 @@ class TestImperativeQatAmp(unittest.TestCase):
adam.clear_gradients()
else:
out = model(img)
acc = paddle.static.accuracy(out, label)
acc = paddle.metric.accuracy(out, label)
loss = paddle.nn.functional.cross_entropy(
out, label, reduction='none', use_softmax=False
)
......@@ -170,8 +170,8 @@ class TestImperativeQatAmp(unittest.TestCase):
with paddle.amp.auto_cast(use_amp):
out = model(img)
acc_top1 = paddle.static.accuracy(input=out, label=label, k=1)
acc_top5 = paddle.static.accuracy(input=out, label=label, k=5)
acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
acc_top1_list.append(float(acc_top1.numpy()))
if batch_id % 100 == 0:
......
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
# copyright (c) 2022 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
......@@ -13,27 +13,18 @@
# limitations under the license.
import os
import numpy as np
import random
import unittest
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.log_helper import get_logger
from test_imperative_qat import TestImperativeQat
import paddle
from paddle.framework import core, set_flags
paddle.enable_static()
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
set_flags({"FLAGS_cudnn_deterministic": True})
class TestImperativeQatChannelWise(TestImperativeQat):
......
......@@ -13,27 +13,18 @@
# limitations under the license.
import os
import numpy as np
import random
import unittest
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.log_helper import get_logger
from test_imperative_qat import TestImperativeQat
import paddle
from paddle.framework import core, set_flags
paddle.enable_static()
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
set_flags({"FLAGS_cudnn_deterministic": True})
class TestImperativeQatfuseBN(TestImperativeQat):
......
......@@ -12,57 +12,53 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import logging
import os
import numpy as np
import random
import time
import tempfile
import unittest
import logging
import numpy as np
from imperative_test_utils import fix_model_dict
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import (
SGDOptimizer,
AdamOptimizer,
MomentumOptimizer,
from paddle.framework import core, set_flags
from paddle.nn import (
BatchNorm2D,
Conv2D,
LeakyReLU,
Linear,
MaxPool2D,
PReLU,
ReLU,
Sequential,
Sigmoid,
Softmax,
)
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.nn import Sequential
from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
from paddle.fluid.log_helper import get_logger
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.quant.quant_layers import (
QuantizedConv2D,
QuantizedConv2DTranspose,
)
from imperative_test_utils import fix_model_dict
from paddle.quantization import ImperativeQuantAware
from paddle.static.log_helper import get_logger
paddle.enable_static()
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
class ImperativeLenet(fluid.dygraph.Layer):
class ImperativeLenet(paddle.nn.Layer):
def __init__(self, num_classes=10):
super().__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
conv2d_w1_attr = paddle.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = paddle.ParamAttr(name="conv2d_w_2")
fc_w1_attr = paddle.ParamAttr(name="fc_w_1")
fc_w2_attr = paddle.ParamAttr(name="fc_w_2")
fc_w3_attr = paddle.ParamAttr(name="fc_w_3")
conv2d_b2_attr = paddle.ParamAttr(name="conv2d_b_2")
fc_b1_attr = paddle.ParamAttr(name="fc_b_1")
fc_b2_attr = paddle.ParamAttr(name="fc_b_2")
fc_b3_attr = paddle.ParamAttr(name="fc_b_3")
self.features = Sequential(
Conv2D(
in_channels=1,
......@@ -116,7 +112,7 @@ class ImperativeLenet(fluid.dygraph.Layer):
def forward(self, inputs):
x = self.features(inputs)
x = paddle.flatten(x, 1, -1)
x = paddle.flatten(x, 1)
x = self.fc(x)
return x
......@@ -139,14 +135,14 @@ class TestImperativeQatLSQ(unittest.TestCase):
seed = 100
np.random.seed(seed)
fluid.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed
paddle.static.default_main_program().random_seed = seed
paddle.static.default_startup_program().random_seed = seed
paddle.disable_static()
lenet = ImperativeLenet()
lenet = fix_model_dict(lenet)
imperative_qat.quantize(lenet)
optimizer = MomentumOptimizer(
learning_rate=0.1, parameter_list=lenet.parameters(), momentum=0.9
optimizer = paddle.optimizer.Momentum(
learning_rate=0.1, parameters=lenet.parameters(), momentum=0.9
)
train_reader = paddle.batch(
......@@ -166,10 +162,10 @@ class TestImperativeQatLSQ(unittest.TestCase):
.reshape(-1, 1)
)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
img = paddle.to_tensor(x_data)
label = paddle.to_tensor(y_data)
out = lenet(img)
acc = paddle.static.accuracy(out, label)
acc = paddle.metric.accuracy(out, label)
loss = paddle.nn.functional.cross_entropy(
out, label, reduction='none', use_softmax=False
)
......@@ -199,14 +195,14 @@ class TestImperativeQatLSQ(unittest.TestCase):
.astype('int64')
.reshape(-1, 1)
)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
img = paddle.to_tensor(x_data)
label = paddle.to_tensor(y_data)
out = lenet(img)
acc_top1 = paddle.static.accuracy(
acc_top1 = paddle.metric.accuracy(
input=out, label=label, k=1
)
acc_top5 = paddle.static.accuracy(
acc_top5 = paddle.metric.accuracy(
input=out, label=label, k=5
)
......
......@@ -12,57 +12,55 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import logging
import os
import numpy as np
import random
import time
import tempfile
import unittest
import logging
import numpy as np
from imperative_test_utils import fix_model_dict
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import (
SGDOptimizer,
AdamOptimizer,
MomentumOptimizer,
from paddle.framework import core, set_flags
from paddle.nn import (
BatchNorm2D,
Conv2D,
LeakyReLU,
Linear,
MaxPool2D,
PReLU,
ReLU,
Sequential,
Sigmoid,
Softmax,
)
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.nn import Sequential
from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
from paddle.fluid.log_helper import get_logger
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.quant.quant_layers import (
QuantizedConv2D,
QuantizedMatmul,
)
from imperative_test_utils import fix_model_dict
from paddle.nn.quant.quant_layers import QuantizedMatmul
from paddle.optimizer import Momentum
from paddle.quantization import ImperativeQuantAware
from paddle.static.log_helper import get_logger
paddle.enable_static()
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
class ImperativeLenet(fluid.dygraph.Layer):
class ImperativeLenet(paddle.nn.Layer):
def __init__(self, num_classes=10):
super().__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
conv2d_w1_attr = paddle.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = paddle.ParamAttr(name="conv2d_w_2")
fc_w1_attr = paddle.ParamAttr(name="fc_w_1")
fc_w2_attr = paddle.ParamAttr(name="fc_w_2")
fc_w3_attr = paddle.ParamAttr(name="fc_w_3")
conv2d_b2_attr = paddle.ParamAttr(name="conv2d_b_2")
fc_b1_attr = paddle.ParamAttr(name="fc_b_1")
fc_b2_attr = paddle.ParamAttr(name="fc_b_2")
fc_b3_attr = paddle.ParamAttr(name="fc_b_3")
self.features = Sequential(
Conv2D(
in_channels=1,
......@@ -140,15 +138,15 @@ class TestImperativeQatMatmul(unittest.TestCase):
seed = 100
np.random.seed(seed)
fluid.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed
paddle.static.default_main_program().random_seed = seed
paddle.static.default_startup_program().random_seed = seed
paddle.disable_static()
lenet = ImperativeLenet()
lenet = fix_model_dict(lenet)
imperative_qat.quantize(lenet)
optimizer = MomentumOptimizer(
learning_rate=0.1, parameter_list=lenet.parameters(), momentum=0.9
optimizer = Momentum(
learning_rate=0.1, parameters=lenet.parameters(), momentum=0.9
)
train_reader = paddle.batch(
......@@ -168,18 +166,18 @@ class TestImperativeQatMatmul(unittest.TestCase):
.reshape(-1, 1)
)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
img = paddle.to_tensor(x_data)
label = paddle.to_tensor(y_data)
out = lenet(img)
acc = paddle.static.accuracy(out, label)
acc = paddle.metric.accuracy(out, label)
loss = paddle.nn.functional.cross_entropy(
out, label, reduction='none', use_softmax=False
)
avg_loss = paddle.mean(loss)
avg_loss.backward()
optimizer.minimize(avg_loss)
lenet.clear_gradients()
optimizer.step()
optimizer.clear_grad()
if batch_id % 100 == 0:
_logger.info(
......@@ -201,14 +199,14 @@ class TestImperativeQatMatmul(unittest.TestCase):
.astype('int64')
.reshape(-1, 1)
)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
img = paddle.to_tensor(x_data)
label = paddle.to_tensor(y_data)
out = lenet(img)
acc_top1 = paddle.static.accuracy(
acc_top1 = paddle.metric.accuracy(
input=out, label=label, k=1
)
acc_top5 = paddle.static.accuracy(
acc_top5 = paddle.metric.accuracy(
input=out, label=label, k=5
)
......
......@@ -12,20 +12,19 @@
# see the license for the specific language governing permissions and
# limitations under the license.
import logging
import os
import numpy as np
import random
import unittest
import logging
import numpy as np
import paddle
import paddle.nn as nn
from paddle.optimizer import Adam
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.framework import _test_eager_guard
from paddle.nn import Sequential
from paddle.nn import Linear
from paddle.nn.quant.quant_layers import QuantizedConv2DTranspose
from paddle.fluid.log_helper import get_logger
from paddle.optimizer import Adam
from paddle.quantization import ImperativeQuantAware
from paddle.static.log_helper import get_logger
os.environ["CPU_NUM"] = "1"
......@@ -110,7 +109,7 @@ class ModelForConv2dT(nn.Layer):
def __init__(self, num_classes=10):
super().__init__()
self.features = nn.Conv2DTranspose(4, 6, (3, 3))
self.fc = Linear(600, num_classes)
self.fc = nn.Linear(in_features=600, out_features=num_classes)
def forward(self, inputs):
x = self.features(inputs)
......@@ -123,28 +122,28 @@ class ImperativeLenet(paddle.nn.Layer):
def __init__(self, num_classes=10, classifier_activation='softmax'):
super().__init__()
self.features = Sequential(
paddle.nn.Conv2D(
nn.Conv2D(
in_channels=1,
out_channels=6,
kernel_size=3,
stride=1,
padding=1,
),
paddle.nn.MaxPool2D(kernel_size=2, stride=2),
paddle.nn.Conv2D(
nn.MaxPool2D(kernel_size=2, stride=2),
nn.Conv2D(
in_channels=6,
out_channels=16,
kernel_size=5,
stride=1,
padding=0,
),
paddle.nn.MaxPool2D(kernel_size=2, stride=2),
nn.MaxPool2D(kernel_size=2, stride=2),
)
self.fc = Sequential(
Linear(400, 120),
Linear(120, 84),
Linear(84, num_classes),
nn.Linear(in_features=400, out_features=120),
nn.Linear(in_features=120, out_features=84),
nn.Linear(in_features=84, out_features=num_classes),
)
def forward(self, inputs):
......@@ -160,7 +159,7 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
_logger.info("test act_preprocess")
self.imperative_qat = ImperativeQuantAware(act_preprocess_layer=PACT)
def test_quant_aware_training(self):
def func_quant_aware_training(self):
imperative_qat = self.imperative_qat
seed = 1
np.random.seed(seed)
......@@ -170,8 +169,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
fixed_state = {}
param_init_map = {}
for name, param in lenet.named_parameters():
p_shape = param.numpy().shape
p_value = param.numpy()
p_shape = np.array(param).shape
p_value = np.array(param)
if name.endswith("bias"):
value = np.zeros_like(p_value).astype('float32')
else:
......@@ -217,8 +216,8 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
loss = nn.functional.loss.cross_entropy(out, label)
avg_loss = paddle.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
model.clear_gradients()
adam.step()
adam.clear_grad()
if batch_id % 50 == 0:
_logger.info(
"Train | At epoch {} step {}: loss = {:}, acc= {:}".format(
......@@ -262,6 +261,11 @@ class TestUserDefinedActPreprocess(unittest.TestCase):
train(lenet)
test(lenet)
def test_quant_aware_training(self):
with _test_eager_guard():
self.func_quant_aware_training()
self.func_quant_aware_training()
class TestUserDefinedWeightPreprocess(TestUserDefinedActPreprocess):
def setUp(self):
......
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
# copyright (c) 2022 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
......@@ -13,34 +13,25 @@
# limitations under the license.
import os
import numpy as np
import random
import unittest
import logging
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid import core
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6
from paddle.nn import Linear, Conv2D, Softmax, BatchNorm
from paddle.fluid.log_helper import get_logger
import numpy as np
from imperative_test_utils import (
ImperativeLenetWithSkipQuant,
fix_model_dict,
train_lenet,
ImperativeLenetWithSkipQuant,
)
import paddle
from paddle.framework import core, set_flags
from paddle.optimizer import Adam
from paddle.quantization import ImperativeQuantAware
INFER_MODEL_SUFFIX = ".pdmodel"
INFER_PARAMS_SUFFIX = ".pdiparams"
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s'
)
set_flags({"FLAGS_cudnn_deterministic": True})
class TestImperativeOutSclae(unittest.TestCase):
......@@ -60,9 +51,7 @@ class TestImperativeOutSclae(unittest.TestCase):
lenet = fix_model_dict(lenet)
qat.quantize(lenet)
adam = AdamOptimizer(
learning_rate=lr, parameter_list=lenet.parameters()
)
adam = Adam(learning_rate=lr, parameters=lenet.parameters())
dynamic_loss_rec = []
lenet.train()
loss_list = train_lenet(lenet, reader, adam)
......@@ -88,14 +77,14 @@ class TestImperativeOutSclae(unittest.TestCase):
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = fluid.Executor(place)
exe = paddle.static.Executor(place)
[
inference_program,
feed_target_names,
fetch_targets,
] = fluid.io.load_inference_model(
dirname=save_dir,
] = paddle.static.load_inference_model(
save_dir,
executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX,
......
......@@ -13,12 +13,12 @@
# limitations under the License.
import unittest
import numpy as np
import paddle
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
import paddle.nn.quant.quant_layers as quant_layers
from paddle.framework import core
paddle.enable_static()
......@@ -38,23 +38,23 @@ def init_data(batch_size=32, img_shape=[784], label_range=9):
class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
def check_backward(self, use_cuda):
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
image = fluid.layers.data(
name='image', shape=[784], dtype='float32'
main_program = paddle.static.Program()
startup_program = paddle.static.Program()
with paddle.static.program_guard(main_program, startup_program):
image = paddle.static.data(
name='image', shape=[-1, 784], dtype='float32'
)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
fc_tmp = fluid.layers.fc(image, size=10, act='softmax')
label = paddle.static.data(
name='label', shape=[-1, 1], dtype='int64'
)
fc_tmp = paddle.static.nn.fc(image, size=10, activation='softmax')
out_scale = quant_layers.MovingAverageAbsMaxScale(
name=fc_tmp.name, dtype=fc_tmp.dtype
)
fc_tmp_1 = out_scale(fc_tmp)
cross_entropy = paddle.nn.functional.softmax_with_cross_entropy(
fc_tmp, label
)
cross_entropy = paddle.nn.functional.cross_entropy(fc_tmp, label)
loss = paddle.mean(cross_entropy)
sgd = fluid.optimizer.SGD(learning_rate=1e-3)
sgd = paddle.optimizer.SGD(learning_rate=1e-3)
sgd.minimize(loss)
moving_average_abs_max_scale_ops = [
......@@ -66,13 +66,13 @@ class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
len(moving_average_abs_max_scale_ops) == 1
), "The number of moving_average_abs_max_scale_ops should be 1."
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
exe = paddle.static.Executor(place)
exe.run(startup_program)
binary = fluid.compiler.CompiledProgram(
main_program
).with_data_parallel(loss_name=loss.name)
binary = paddle.static.CompiledProgram(main_program).with_data_parallel(
loss_name=loss.name
)
img, label = init_data()
feed_dict = {"image": img, "label": label}
......
此差异已折叠。
此差异已折叠。
......@@ -486,7 +486,7 @@ def get_filenames(full_test=False):
'''
global whl_error
import paddle # noqa: F401
import paddle.fluid.contrib.slim.quantization # noqa: F401
import paddle.static.quantization # noqa: F401
whl_error = []
if full_test:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册