未验证 提交 6bbb6e7f 编写于 作者: G guofei 提交者: GitHub

Implement the function of OutScaleForTraining/OutScaleForInference in dygraph (#26601)

* Implement the function of OueScaleForTraining/OutScaleForInference in dygraph

test=develop
上级 0140d74e
......@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include "paddle/fluid/framework/ir/shuffle_channel_detect_pass.h"
#include "paddle/fluid/framework/op_version_registry.h"
......
......@@ -51,6 +51,7 @@ std::map<std::string, std::set<std::string>> op_ins_map = {
{"warpctc", {"Logits", "Label", "LogitsLength", "LabelLength"}},
{"hierarchical_sigmoid",
{"X", "W", "Label", "PathTable", "PathCode", "Bias"}},
{"moving_average_abs_max_scale", {"X", "InAccum", "InState"}},
};
// NOTE(zhiqiu): Like op_ins_map.
......@@ -75,6 +76,7 @@ std::map<std::string, std::set<std::string>> op_outs_map = {
{"collect_fpn_proposals", {"FpnRois", "RoisNum"}},
{"distribute_fpn_proposals",
{"MultiFpnRois", "RestoreIndex", "MultiLevelRoIsNum"}},
{"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}},
};
// NOTE(zhiqiu): Commonly, the outputs in auto-generated OP function are
......@@ -118,6 +120,7 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = {
{"check_finite_and_unscale", {"Out", "FoundInfinite"}},
{"update_loss_scaling",
{"Out", "LossScaling", "OutGoodSteps", "OutBadSteps"}},
{"moving_average_abs_max_scale", {"OutScale", "OutAccum", "OutState"}},
};
// clang-format off
......
......@@ -15,18 +15,37 @@
import logging
import numpy as np
import sys
import os
import paddle
from paddle.fluid import dygraph
from paddle.fluid.dygraph.nn import Conv2D
from paddle.fluid.dygraph.nn import Linear
from paddle.fluid import dygraph, core, framework
from paddle.fluid.executor import Executor
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.fluid.dygraph.nn import Conv2D, Linear, BatchNorm, Pool2D, Conv2DTranspose
from paddle.fluid.io import load_inference_model, save_inference_model
from paddle.nn.layer.activation import ReLU, LeakyReLU, Sigmoid, ReLU6, Tanh, Softmax, PReLU
from paddle.fluid.log_helper import get_logger
from . import quant_nn
__all__ = ['ImperativeQuantAware']
__all__ = ['ImperativeQuantAware', 'ImperativeCalcOutScale']
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
_op_real_in_out_name = {
"conv2d": [["Input", "Filter"], ["Output"]],
"conv2d_transpose": [["Input", "Filter"], ["Output"]],
"pool2d": [["X"], ["Out"]],
"elementwise_add": [["X", "Y"], ["Out"]],
"softmax": [["X"], ["Out"]],
"relu": [["X"], ["Out"]],
"relu6": [["X"], ["Out"]],
"leaky_relu": [["X"], ["Out"]],
"prelu": [["X"], ["Out"]],
"tanh": [["X"], ["Out"]],
"batch_norm": [["X"], ["Y"]],
"sigmoid": [["X"], ["Out"]],
}
class ImperativeQuantAware(object):
"""
......@@ -141,7 +160,6 @@ class ImperativeQuantAware(object):
for name, layer in model.named_sublayers():
if not isinstance(layer, self._quantizable_layer_type):
continue
scopes = name.split('.')
target = scopes[-1]
obj = model
......@@ -173,3 +191,204 @@ class ImperativeQuantAware(object):
layer, self._weight_bits, self._activation_bits, self._moving_rate,
self._weight_quantize_type, self._activation_quantize_type)
return quantized_layer
class ImperativeCalcOutScale(object):
def __init__(self,
moving_rate=0.9,
target_layer_types=[
'BatchNorm', 'Conv2D', 'Conv2DTranspose', 'LeakyReLU',
'Linear', 'PReLU', 'Pool2D', 'ReLU', 'ReLU6', 'Sigmoid',
'Softmax', 'Tanh'
]):
"""
Add the logic of calculating and setting output quantization scales of some layers.
These output quantization scales may be used by tensorRT or some other inference engines.
Args:
moving_rate(float): The decay coefficient of moving average. The default value is 0.9.
quantizable_op_type(list[str]): List the type of layers that will be calculated out_scale.
Default is ['Conv2D', 'ReLU', 'PReLU', 'LeakyReLU', 'Linear', 'Sigmoid', 'BatchNorm', 'ReLU6', 'Tanh', 'Softmax', 'Conv2DTranspose']
"""
super(ImperativeCalcOutScale, self).__init__()
self._moving_rate = moving_rate
self._out_scale_layers_map = {
'BatchNorm': BatchNorm,
'Conv2D': Conv2D,
'Conv2DTranspose': Conv2DTranspose,
'LeakyReLU': LeakyReLU,
'Linear': Linear,
'PReLU': PReLU,
'Pool2D': Pool2D,
'ReLU': ReLU,
'ReLU6': ReLU6,
'Sigmoid': Sigmoid,
'Softmax': Softmax,
'Tanh': Tanh
}
self._out_scale_layer_type = tuple(
self._out_scale_layers_map[layer]
if layer in self._out_scale_layers_map else layer
for layer in target_layer_types)
for layer in self._out_scale_layer_type:
assert not isinstance(
layer, str), "{} is unspported to be out_scaled.".format(layer)
self._register_hook_handle_list = []
self._out_scale_dict = {}
def calc_out_scale(self, model):
"""
Insert the `moving_average_abs_max_scale` op to calculate output scale of Specific layers in model.
Args:
model(fluid.dygraph.Layer): The target model which would be calculate the output quantization scale.
Returns:
None
"""
assert isinstance(
model, dygraph.Layer), "model must be the instance of dygraph.Layer"
for _, layer in model.named_sublayers():
if not isinstance(layer, self._out_scale_layer_type):
continue
forward_post_hook_handle = layer.register_forward_post_hook(
self._forward_post_hook)
self._register_hook_handle_list.append(forward_post_hook_handle)
# Get the output var name of the op
def _get_op_output_names(self, op):
assert isinstance(
op, framework.Operator), "The input op should be Operator."
var_names = []
name_list = _op_real_in_out_name[op.type][1]
for name in name_list:
var_name = op.output(name)
if isinstance(var_name, list):
var_names.extend(var_name)
else:
var_names.append(var_name)
return var_names
def save_quantized_model(self, layer, path, input_spec=None, **config):
"""
Save the quantized model for the inference.
Args:
layer (Layer): The Layer to be saved.
path (str): The path prefix to save model. The format is ``dirname/file_prefix`` or ``file_prefix``.
input_spec (list[InputSpec|Tensor], optional): Describes the input of the saved model's forward
method, which can be described by InputSpec or example Tensor. If None, all input variables of
the original Layer's forward method would be the inputs of the saved model. Default None.
**configs (dict, optional): Other save configuration options for compatibility. We do not
recommend using these configurations, they may be removed in the future. If not necessary,
DO NOT use them. Default None.
The following options are currently supported:
(1) output_spec (list[Tensor]): Selects the output targets of the saved model.
By default, all return variables of original Layer's forward method are kept as the
output of the saved model. If the provided ``output_spec`` list is not all output variables,
the saved model will be pruned according to the given ``output_spec`` list.
Returns:
None
"""
assert isinstance(
layer, dygraph.Layer), "model must be the instance of dygraph.Layer"
with dygraph.guard():
layer.eval()
for handle in self._register_hook_handle_list:
handle.remove()
for key in self._out_scale_dict:
self._out_scale_dict[key] = float(self._out_scale_dict[key]
.numpy())
paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config)
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = Executor(place)
file_prefix = os.path.basename(path)
dirname = os.path.dirname(path)
model_filename = file_prefix + INFER_MODEL_SUFFIX
params_filename = file_prefix + INFER_PARAMS_SUFFIX
[inference_program, feed_target_names, fetch_targets] = (
load_inference_model(
dirname=dirname,
executor=exe,
model_filename=model_filename,
params_filename=params_filename))
# Traverse all ops in the program and find out the op matching
# the Layer in the dynamic graph.
layer_var_dict = {}
for block in inference_program.blocks:
for op in block.ops:
if op.type in _op_real_in_out_name:
output_var_names = self._get_op_output_names(op)
for output_var_name in output_var_names:
output_var_tensor = block.var(output_var_name)
if output_var_tensor.dtype not in [
core.VarDesc.VarType.FP64,
core.VarDesc.VarType.FP32
]:
continue
# Because the Layer in dygraph may correspond to multiple ops
# in static program after being saved. To ensure correctness,
# the outscale collected for output of dygraph Layer can only
# be set to the last op in the corresponding ops in static program.
#
# We can judge the execution order of the ops which corresponding
# to dygraph Layer by the name of output. And use dict to save
# the corresponding relationship between the dygraph Layer and the
# static graph op that needs to set the outscale attribute.
dynamic_layer_name, var_name_suffix = output_var_name.split(
".")
if dynamic_layer_name in layer_var_dict:
if layer_var_dict[dynamic_layer_name][
0] < var_name_suffix:
layer_var_dict[dynamic_layer_name] = [
var_name_suffix, op
]
else:
layer_var_dict[
dynamic_layer_name] = [var_name_suffix, op]
# Because the naming styles of static and dynamic graph are different,
# in order to avoid mistakes, we unify the name here.
for (layer_name, var_name_op_list) in layer_var_dict.items():
if 'prelu' in layer_name:
layer_name = layer_name.replace('prelu', 'p_re_lu')
if 'relu' in layer_name:
layer_name = layer_name.replace('relu', 're_lu')
if layer_name not in self._out_scale_dict:
continue
var_name_op_list[1]._set_attr('out_threshold',
self._out_scale_dict[layer_name])
# Save the processed program.
save_inference_model(
dirname=dirname,
feeded_var_names=feed_target_names,
target_vars=fetch_targets,
executor=exe,
main_program=inference_program.clone(),
model_filename=model_filename,
params_filename=params_filename)
def _forward_post_hook(self, layer, input, output):
assert isinstance(
output, core.VarBase
), "Multiple outputs are not currently supported in ImperativeOutScale."
if output.dtype not in [
core.VarDesc.VarType.FP32, core.VarDesc.VarType.FP64
]:
return
if not hasattr(layer, "_out_scale"):
layer._out_scale = quant_nn.MovingAverageAbsMaxScale(
output.name, self._moving_rate, output.dtype)
scale_out = layer._out_scale(output)
self._out_scale_dict[layer.full_name()] = scale_out
......@@ -24,7 +24,8 @@ from paddle.fluid.data_feeder import check_variable_and_dtype
__all__ = [
'FakeQuantMovingAverage', 'FakeQuantAbsMax', 'QuantizedConv2D',
'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax'
'QuantizedLinear', 'FakeChannelWiseQuantDequantAbsMax',
'MovingAverageAbsMaxScale'
]
......@@ -494,3 +495,78 @@ class QuantizedLinear(layers.Layer):
else:
pre_activation = mul_out
return self._helper.append_activation(pre_activation, act=self._act)
class MovingAverageAbsMaxScale(layers.Layer):
def __init__(self, name=None, moving_rate=0.9, dtype='float32'):
"""
MovingAverageMaxScale layer is used to calculating the output quantization scale of Layer.
Its computational formula is described as below:
:math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
:math:`Out = X`
"""
super(MovingAverageAbsMaxScale, self).__init__()
self._moving_rate = moving_rate
self._dtype = dtype
scale_prefix = '{}.scale'.format(name) if name else 'outscale.scale'
name = unique_name.generate(scale_prefix)
scale_attr = ParamAttr(
name=name, initializer=Constant(1), trainable=False)
self._scale = self.create_parameter(
shape=[1], attr=scale_attr, dtype=self._dtype)
self._scale.stop_gradient = True
state_prefix = "{}.state".format(name) if name else 'outscale.state'
state_attr = ParamAttr(
name=unique_name.generate(state_prefix),
initializer=Constant(1),
trainable=False)
self._state = self.create_parameter(
shape=[1], attr=state_attr, dtype=self._dtype)
self._state.stop_gradient = True
accum_prefix = "{}.accum".format(name) if name else 'outscale.accum'
accum_attr = ParamAttr(
name=unique_name.generate(accum_prefix),
initializer=Constant(1),
trainable=False)
self._accum = self.create_parameter(
shape=[1], attr=accum_attr, dtype=self._dtype)
self._accum.stop_gradient = True
MovingAverageAbsMaxScale._has_create = True
def forward(self, input):
if in_dygraph_mode():
attrs = ('moving_rate', self._moving_rate, 'is_test',
not self.training)
state = self._state if self.training else None
accum = self._accum if self.training else None
out_scale, _, _ = core.ops.moving_average_abs_max_scale(
input, accum, state, self._scale, state, accum, *attrs)
return out_scale
check_variable_and_dtype(input, 'input', ['float32', 'float64'],
'MovingAverageAbsMaxScale')
scale_out = self._scale
attrs = {'moving_rate': self._moving_rate, 'is_test': not self.training}
inputs = {"X": [input]}
outputs = {"OutScale": [scale_out]}
if self.training:
inputs['InState'] = [self._state]
inputs['InAccum'] = [self._accum]
outputs['OutState'] = [self._state]
outputs['OutAccum'] = [self._accum]
self._helper.append_op(
type="moving_average_abs_max_scale",
inputs=inputs,
outputs=outputs,
attrs=attrs)
return scale_out
# copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
# http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.
from __future__ import print_function
import os
import numpy as np
import random
import unittest
import logging
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid import core
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import ImperativeCalcOutScale
from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass, OutScaleForInferencePass
from paddle.fluid.dygraph.container import Sequential
from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, ReLU6
from paddle.fluid.dygraph.nn import BatchNorm, Conv2D, Linear, Pool2D
from paddle.fluid.log_helper import get_logger
paddle.enable_static()
os.environ["CPU_NUM"] = "1"
if core.is_compiled_with_cuda():
fluid.set_flags({"FLAGS_cudnn_deterministic": True})
_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
def StaticLenet(data, num_classes=10, classifier_activation='softmax'):
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
conv1 = fluid.layers.conv2d(
data,
num_filters=6,
filter_size=3,
stride=1,
padding=1,
param_attr=conv2d_w1_attr,
bias_attr=conv2d_b1_attr)
batch_norm1 = layers.batch_norm(conv1)
relu1 = layers.relu(batch_norm1)
pool1 = fluid.layers.pool2d(
relu1, pool_size=2, pool_type='max', pool_stride=2)
conv2 = fluid.layers.conv2d(
pool1,
num_filters=16,
filter_size=5,
stride=1,
padding=0,
param_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr)
batch_norm2 = layers.batch_norm(conv2)
relu6_1 = layers.relu6(batch_norm2)
pool2 = fluid.layers.pool2d(
relu6_1, pool_size=2, pool_type='max', pool_stride=2)
fc1 = fluid.layers.fc(input=pool2,
size=120,
param_attr=fc_w1_attr,
bias_attr=fc_b1_attr)
leaky_relu1 = layers.leaky_relu(fc1, alpha=0.01)
fc2 = fluid.layers.fc(input=leaky_relu1,
size=84,
param_attr=fc_w2_attr,
bias_attr=fc_b2_attr)
sigmoid1 = layers.sigmoid(fc2)
fc3 = fluid.layers.fc(input=sigmoid1,
size=num_classes,
act=classifier_activation,
param_attr=fc_w3_attr,
bias_attr=fc_b3_attr)
return fc3
class ImperativeLenet(fluid.dygraph.Layer):
def __init__(self, num_classes=10, classifier_activation='softmax'):
super(ImperativeLenet, self).__init__()
conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
self.features = Sequential(
Conv2D(
num_channels=1,
num_filters=6,
filter_size=3,
stride=1,
padding=1,
param_attr=conv2d_w1_attr,
bias_attr=conv2d_b1_attr),
BatchNorm(6),
ReLU(),
Pool2D(
pool_size=2, pool_type='max', pool_stride=2),
Conv2D(
num_channels=6,
num_filters=16,
filter_size=5,
stride=1,
padding=0,
param_attr=conv2d_w2_attr,
bias_attr=conv2d_b2_attr),
BatchNorm(16),
ReLU6(),
Pool2D(
pool_size=2, pool_type='max', pool_stride=2))
self.fc = Sequential(
Linear(
input_dim=400,
output_dim=120,
param_attr=fc_w1_attr,
bias_attr=fc_b1_attr),
LeakyReLU(),
Linear(
input_dim=120,
output_dim=84,
param_attr=fc_w2_attr,
bias_attr=fc_b2_attr),
Sigmoid(),
Linear(
input_dim=84,
act=classifier_activation,
output_dim=num_classes,
param_attr=fc_w3_attr,
bias_attr=fc_b3_attr))
def forward(self, inputs):
x = self.features(inputs)
x = fluid.layers.flatten(x, 1)
x = self.fc(x)
return x
class TestImperativeOutSclae(unittest.TestCase):
def test_calc_out_scale_save(self):
imperative_out_scale = ImperativeCalcOutScale()
with fluid.dygraph.guard():
lenet = ImperativeLenet()
adam = AdamOptimizer(
learning_rate=0.001, parameter_list=lenet.parameters())
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32)
imperative_out_scale.calc_out_scale(lenet)
epoch_num = 1
for epoch in range(epoch_num):
lenet.train()
for batch_id, data in enumerate(train_reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
acc = fluid.layers.accuracy(out, label)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
lenet.clear_gradients()
if batch_id % 100 == 0:
_logger.info(
"Train | At epoch {} step {}: loss = {:}, acc= {:}".
format(epoch, batch_id,
avg_loss.numpy(), acc.numpy()))
lenet.eval()
for batch_id, data in enumerate(test_reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
acc_top1 = fluid.layers.accuracy(
input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(
input=out, label=label, k=5)
if batch_id % 100 == 0:
_logger.info(
"Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
format(epoch, batch_id,
acc_top1.numpy(), acc_top5.numpy()))
# save weights
model_dict = lenet.state_dict()
fluid.save_dygraph(model_dict, "save_temp")
# test the correctness of `save_quantized_model`
data = next(test_reader())
test_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
test_img = fluid.dygraph.to_variable(test_data)
lenet.eval()
before_save = lenet(test_img)
# save inference quantized model
path = "./outscale_infer_model/lenet"
save_dir = "./outscale_infer_model"
imperative_out_scale.save_quantized_model(
layer=lenet,
path=path,
input_spec=[
paddle.static.InputSpec(
shape=[None, 1, 28, 28], dtype='float32')
])
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = fluid.Executor(place)
[inference_program, feed_target_names, fetch_targets] = (
fluid.io.load_inference_model(
dirname=save_dir,
executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX))
after_save, = exe.run(inference_program,
feed={feed_target_names[0]: test_data},
fetch_list=fetch_targets)
self.assertTrue(
np.allclose(after_save, before_save.numpy()),
msg='Failed to save the inference quantized model.')
def test_out_scale_acc(self):
def _build_static_lenet(main, startup, is_test=False, seed=1000):
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
main.random_seed = seed
startup.random_seed = seed
img = fluid.layers.data(
name='image', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(
name='label', shape=[1], dtype='int64')
prediction = StaticLenet(img)
if not is_test:
loss = fluid.layers.cross_entropy(
input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
else:
avg_loss = prediction
return img, label, avg_loss
reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
param_init_map = {}
seed = 1000
lr = 0.1
dynamic_out_scale_list = []
static_out_scale_list = []
# imperative train
_logger.info(
"--------------------------dynamic graph qat--------------------------"
)
imperative_out_scale = ImperativeCalcOutScale()
with fluid.dygraph.guard():
np.random.seed(seed)
fluid.default_main_program().random_seed = seed
fluid.default_startup_program().random_seed = seed
lenet = ImperativeLenet()
fixed_state = {}
for name, param in lenet.named_parameters():
p_shape = param.numpy().shape
p_value = param.numpy()
if name.endswith("bias"):
value = np.zeros_like(p_value).astype('float32')
else:
value = np.random.normal(
loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
p_shape).astype('float32')
fixed_state[name] = value
param_init_map[param.name] = value
lenet.set_dict(fixed_state)
imperative_out_scale.calc_out_scale(lenet)
adam = AdamOptimizer(
learning_rate=lr, parameter_list=lenet.parameters())
dynamic_loss_rec = []
lenet.train()
for batch_id, data in enumerate(reader()):
x_data = np.array([x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(-1, 1)
img = fluid.dygraph.to_variable(x_data)
label = fluid.dygraph.to_variable(y_data)
out = lenet(img)
loss = fluid.layers.cross_entropy(out, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
adam.minimize(avg_loss)
lenet.clear_gradients()
dynamic_loss_rec.append(avg_loss.numpy()[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', avg_loss.numpy()))
lenet.eval()
op_object_list = (Conv2D, ReLU, ReLU6, LeakyReLU, Sigmoid, Pool2D,
BatchNorm)
path = "./dynamic_outscale_infer_model/lenet"
save_dir = "./dynamic_outscale_infer_model"
imperative_out_scale.save_quantized_model(
layer=lenet,
path=path,
input_spec=[
paddle.static.InputSpec(
shape=[None, 1, 28, 28], dtype='float32')
])
_logger.info(
"--------------------------static graph qat--------------------------"
)
static_loss_rec = []
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
exe = fluid.Executor(place)
main = fluid.Program()
infer = fluid.Program()
startup = fluid.Program()
static_img, static_label, static_loss = _build_static_lenet(
main, startup, False, seed)
infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
seed)
with fluid.unique_name.guard():
with fluid.program_guard(main, startup):
opt = AdamOptimizer(learning_rate=lr)
opt.minimize(static_loss)
scope = core.Scope()
with fluid.scope_guard(scope):
exe.run(startup)
for param in main.all_parameters():
param_tensor = scope.var(param.name).get_tensor()
param_tensor.set(param_init_map[param.name], place)
main_graph = IrGraph(core.Graph(main.desc), for_test=False)
infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
transform_pass = OutScaleForTrainingPass(scope=scope, place=place)
transform_pass.apply(main_graph)
build_strategy = fluid.BuildStrategy()
build_strategy.fuse_all_reduce_ops = False
binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
loss_name=static_loss.name, build_strategy=build_strategy)
feeder = fluid.DataFeeder(
feed_list=[static_img, static_label], place=place)
with fluid.scope_guard(scope):
for batch_id, data in enumerate(reader()):
loss_v, = exe.run(binary,
feed=feeder.feed(data),
fetch_list=[static_loss])
static_loss_rec.append(loss_v[0])
if batch_id % 100 == 0:
_logger.info('{}: {}'.format('loss', loss_v))
scale_inference_pass = OutScaleForInferencePass(scope=scope)
scale_inference_pass.apply(infer_graph)
out_scale_op_list = [
"batch_norm", "conv2d", "leaky_relu", "pool2d", "relu6", "relu",
"sigmoid", "tanh", "relu6", "softmax", "conv2d_transpose",
"elementwise_add"
]
op_nodes = infer_graph.all_op_nodes()
for op_node in op_nodes:
if op_node.name() in out_scale_op_list:
static_out_scale_list.append(op_node.op().attr("out_threshold"))
save_program = infer_graph.to_program()
with fluid.scope_guard(scope):
fluid.io.save_inference_model("./static_mnist", [infer_img.name],
[infer_pre], exe, save_program)
rtol = 1e-05
atol = 1e-08
for i, (loss_d,
loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
diff = np.abs(loss_d - loss_s)
if diff > (atol + rtol * np.abs(loss_s)):
_logger.info(
"diff({}) at {}, dynamic loss = {}, static loss = {}".
format(diff, i, loss_d, loss_s))
break
self.assertTrue(
np.allclose(
np.array(dynamic_loss_rec),
np.array(static_loss_rec),
rtol=rtol,
atol=atol,
equal_nan=True),
msg='Failed to do the imperative qat.')
# load dynamic model
[inference_program, feed_target_names, fetch_targets] = (
fluid.io.load_inference_model(
dirname=save_dir,
executor=exe,
model_filename="lenet" + INFER_MODEL_SUFFIX,
params_filename="lenet" + INFER_PARAMS_SUFFIX))
global_block = inference_program.global_block()
for op in global_block.ops:
if op.has_attr('out_threshold'):
dynamic_out_scale_list.append(op.attr('out_threshold'))
check_list = [
False for item in dynamic_out_scale_list
if item not in static_out_scale_list
]
self.assertTrue(len(check_list) == 0)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.contrib.slim.quantization.imperative import quant_nn
paddle.enable_static()
def init_data(batch_size=32, img_shape=[784], label_range=9):
np.random.seed(5)
assert isinstance(img_shape, list)
input_shape = [batch_size] + img_shape
img = np.random.random(size=input_shape).astype(np.float32)
label = np.array(
[np.random.randint(0, label_range) for _ in range(batch_size)]).reshape(
(-1, 1)).astype("int64")
return img, label
class TestMovingAverageAbsMaxScaleOp(unittest.TestCase):
def check_backward(self, use_cuda):
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
image = fluid.layers.data(
name='image', shape=[784], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
fc_tmp = fluid.layers.fc(image, size=10, act='softmax')
out_scale = quant_nn.MovingAverageAbsMaxScale(
name=fc_tmp.name, dtype=fc_tmp.dtype)
fc_tmp_1 = out_scale(fc_tmp)
cross_entropy = fluid.layers.softmax_with_cross_entropy(fc_tmp,
label)
loss = fluid.layers.reduce_mean(cross_entropy)
sgd = fluid.optimizer.SGD(learning_rate=1e-3)
sgd.minimize(loss)
moving_average_abs_max_scale_ops = [
op for op in main_program.blocks[0].ops
if op.type == u'moving_average_abs_max_scale'
]
assert len(
moving_average_abs_max_scale_ops
) == 1, "The number of moving_average_abs_max_scale_ops should be 1."
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
binary = fluid.compiler.CompiledProgram(
main_program).with_data_parallel(loss_name=loss.name)
img, label = init_data()
feed_dict = {"image": img, "label": label}
res = exe.run(binary, feed_dict)
def test_fw_bw(self):
if core.is_compiled_with_cuda():
self.check_backward(use_cuda=True)
self.check_backward(use_cuda=False)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册