提交 c64f2204 编写于 作者: W WangZhen

add convert_to_int8 pass and transform_for_mobile pass and their UTs.

上级 c8095eeb
......@@ -21,7 +21,10 @@ from ....framework import Program
from ....initializer import Constant
from .... import unique_name
__all__ = ['QuantizationTransformPass', 'QuantizationFreezePass']
__all__ = [
'QuantizationTransformPass', 'QuantizationFreezePass', 'ConvertToInt8Pass',
'TransformForMobilePass'
]
class QuantizationTransformPass(object):
......@@ -394,6 +397,7 @@ class QuantizationFreezePass(object):
# remove the unused var node in the graph
self._remove_unused_var_nodes(graph)
return graph
def _remove_fake_quant_and_dequant_op(self, graph, op_node):
k = op_node.op().output('Out')[0]
......@@ -453,9 +457,9 @@ class QuantizationFreezePass(object):
def _load_var(self, name):
return np.array(self._scope.find_var(name).get_tensor())
def _restore_var(self, name, arr):
t = self._scope.find_var(name).get_tensor()
t.set(arr, self._place)
def _restore_var(self, name, array):
tensor = self._scope.find_var(name).get_tensor()
tensor.set(array, self._place)
def _remove_unused_var_nodes(self, graph):
all_used_vars = set()
......@@ -496,3 +500,97 @@ class QuantizationFreezePass(object):
def _quant(self, x, scale, num_bits):
return np.round(x / scale * ((1 << (num_bits - 1)) - 1))
class ConvertToInt8Pass(object):
def __init__(self, scope, place):
assert scope is not None, \
'The scope cannot be set None.'
assert place is not None, \
'The place cannot be set None.'
self._scope = scope
self._place = place
self._quantizable_ops = ['conv2d', 'depthwise_conv2d', 'mul']
def apply(self, graph):
persistable_vars = [p.name() for p in graph.all_persistable_vars()]
ops = graph.all_ops()
input_map = {}
for op_node in ops:
op_name = op_node.name()
if op_name in self._quantizable_ops:
for var_node in op_node.inputs:
name = var_node.name()
if name in persistable_vars:
if name not in input_map:
int8_var_node = self._convert_to_int8(graph,
var_node)
input_map[name] = int8_var_node
graph.update_input_link(var_node, input_map[name],
op_node)
# remove the unused var node in the graph
self._remove_unused_var_nodes(graph)
return graph
def _convert_to_int8(self, graph, var_node):
int8_var_node_name = var_node.name() + ".int8"
int8_var_node = graph.create_param_node(
name=cpt.to_text(int8_var_node_name),
var_type=var_node.var().type(),
shape=var_node.var().shape(),
var_dtype=core.VarDesc.VarType.INT8)
array = self._load_var(var_node.name())
self._scope.var(int8_var_node_name)
self._store_var(int8_var_node_name, array, np.int8)
return int8_var_node
def _load_var(self, name):
return np.array(self._scope.find_var(name).get_tensor())
def _store_var(self, name, array, dtype):
tensor = self._scope.find_var(name).get_tensor()
tensor.set(array.astype(dtype), self._place)
def _remove_unused_var_nodes(self, graph):
all_used_vars = set()
ops = graph.all_ops()
for op_node in ops:
for input_node in op_node.inputs:
all_used_vars.add(input_node)
for output_node in op_node.outputs:
all_used_vars.add(output_node)
all_unused_vars = graph.all_vars() - all_used_vars
graph.safe_remove_nodes(all_unused_vars)
class TransformForMobilePass(object):
def __init__(self):
self._fake_quant_op_names = [
'fake_quantize_abs_max', 'fake_quantize_range_abs_max'
]
self._fake_dequant_op_names = ['fake_dequantize_max_abs']
def apply(self, graph):
ops = graph.all_ops()
for op_node in ops:
name = op_node.name()
if name in self._fake_quant_op_names:
op_node.op().set_type('quantize')
quant_node = graph.create_op_node_from_desc(op_node.op())
for input_node in op_node.inputs:
graph.link_to(input_node, quant_node)
for output_node in op_node.outputs:
graph.link_to(quant_node, output_node)
graph.safe_remove_nodes(op_node)
if name in self._fake_dequant_op_names:
op_node.op().set_type('dequantize')
dequant_node = graph.create_op_node_from_desc(op_node.op())
for input_node in op_node.inputs:
graph.link_to(input_node, dequant_node)
for output_node in op_node.outputs:
graph.link_to(dequant_node, output_node)
graph.safe_remove_nodes(op_node)
return graph
......@@ -18,10 +18,11 @@ import numpy as np
import paddle.fluid as fluid
import six
import paddle
from paddle.fluid.framework import Program
from paddle.fluid.framework import IrGraph
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
from paddle.fluid import core
......@@ -233,10 +234,22 @@ class TestQuantizationFreezePass(unittest.TestCase):
scope=scope, program_exe=exe, activation_quantize_type=quant_type)
transform_pass.apply(main_graph)
transform_pass.apply(test_graph)
dev_name = '_gpu_' if use_cuda else '_cpu_'
marked_nodes = set()
for op in main_graph.all_ops():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
main_graph.draw('.', 'main' + dev_name + quant_type, marked_nodes)
marked_nodes = set()
for op in test_graph.all_ops():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes)
quantized_main_program = main_graph.to_program()
quantized_test_program = test_graph.to_program()
iters = 5
batch_size = 8
dev_name = '_gpu_' if use_cuda else '_cpu_'
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -248,66 +261,86 @@ class TestQuantizationFreezePass(unittest.TestCase):
with fluid.scope_guard(scope):
for _ in range(iters):
data = next(train_reader())
loss_v = exe.run(program=main_graph.to_program(),
loss_v = exe.run(program=quantized_main_program,
feed=feeder.feed(data),
fetch_list=[loss])
print('{}: {}'.format(dev_name, loss_v))
print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
test_data = next(test_reader())
with fluid.program_guard(quantized_test_program):
w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
quantized_test_program)
# Testing
with fluid.scope_guard(scope):
test_loss1, w_quant = exe.run(program=quantized_test_program,
feed=feeder.feed(test_data),
fetch_list=[loss, w_var])
# Freeze graph for inference, but the weight of fc/conv is still float type.
freeze_pass = QuantizationFreezePass(scope=scope, place=place)
freeze_pass.apply(test_graph)
marked_nodes = set()
for op in main_graph.all_ops():
for op in test_graph.all_ops():
if op.name().find('quantize') > -1:
marked_nodes.add(op)
main_graph.draw('.', 'main' + dev_name + quant_type, marked_nodes)
test_graph.draw('.', 'test_freeze' + dev_name + quant_type,
marked_nodes)
freeze_pass = QuantizationFreezePass(scope=scope, place=place)
origin_marked_nodes = set()
server_program = test_graph.to_program()
with fluid.scope_guard(scope):
test_loss2, = exe.run(program=server_program,
feed=feeder.feed(test_data),
fetch_list=[loss])
self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
print('{}: {}'.format('test_loss1' + dev_name + quant_type, test_loss1))
print('{}: {}'.format('test_loss2' + dev_name + quant_type, test_loss2))
w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
# Maybe failed, this is due to the calculation precision
self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
print('{}: {}'.format('w_freeze' + dev_name + quant_type,
np.sum(w_freeze)))
print('{}: {}'.format('w_quant' + dev_name + quant_type,
np.sum(w_quant)))
# Convert parameter to 8-bit.
convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
convert_int8_pass.apply(test_graph)
marked_nodes = set()
for op in test_graph.all_ops():
if op.name().find('quantize') > -1:
origin_marked_nodes.add(op)
test_graph.draw('.', 'test_origin' + dev_name + quant_type,
origin_marked_nodes)
freeze_pass.apply(test_graph)
freeze_marked_nodes = set()
marked_nodes.add(op)
test_graph.draw('.', 'test_int8' + dev_name + quant_type, marked_nodes)
server_program_int8 = test_graph.to_program()
# Save the 8-bit parameter and model file.
with fluid.scope_guard(scope):
fluid.io.save_inference_model('server_int8' + dev_name + quant_type,
['image', 'label'], [loss], exe,
server_program_int8)
# Test whether the 8-bit parameter and model file can be loaded successfully.
[infer, feed, fetch] = fluid.io.load_inference_model(
'server_int8' + dev_name + quant_type, exe)
# Check the loaded 8-bit weight.
w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
self.assertEqual(w_8bit.dtype, np.int8)
self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
print('{}: {}'.format('w_8bit' + dev_name + quant_type, np.sum(w_8bit)))
print('{}: {}'.format('w_freeze' + dev_name + quant_type,
np.sum(w_freeze)))
mobile_pass = TransformForMobilePass()
mobile_pass.apply(test_graph)
marked_nodes = set()
for op in test_graph.all_ops():
if op.name().find('quantize') > -1:
freeze_marked_nodes.add(op)
test_graph.draw('.', 'test_freeze' + dev_name + quant_type,
freeze_marked_nodes)
# with fluid.program_guard(test_program):
# test_data = next(test_reader())
# w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
# test_program)
# # Testing during training
# test_loss1, w_quant = exe.run(program=test_program,
# feed=feeder.feed(test_data),
# fetch_list=[loss, w_var])
# # Freeze program for inference, but the weight of fc/conv is still float type.
# quant_transpiler.freeze_program(test_program, place)
# test_loss2, = exe.run(program=test_program,
# feed=feeder.feed(test_data),
# fetch_list=[loss])
# self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
# w_freeze = np.array(fluid.global_scope().find_var('conv2d_1.w_0')
# .get_tensor())
# # fail: -432.0 != -433.0, this is due to the calculation precision
# #self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
# # Convert parameter to 8-bit.
# quant_transpiler.convert_to_int8(test_program, place)
# # Save the 8-bit parameter and model file.
# fluid.io.save_inference_model('model_8bit', ['image', 'label'],
# [loss], exe, test_program)
# # Test whether the 8-bit parameter and model file can be loaded successfully.
# [infer, feed, fetch] = fluid.io.load_inference_model('model_8bit',
# exe)
# # Check the loaded 8-bit weight.
# w_8bit = np.array(fluid.global_scope().find_var('conv2d_1.w_0.int8')
# .get_tensor())
# self.assertEqual(w_8bit.dtype, np.int8)
# self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
marked_nodes.add(op)
test_graph.draw('.', 'test_mobile' + dev_name + quant_type,
marked_nodes)
mobile_program = test_graph.to_program()
with fluid.scope_guard(scope):
fluid.io.save_inference_model('mobile_int8' + dev_name + quant_type,
['image', 'label'], [loss], exe,
mobile_program)
def test_freeze_program_cuda_dynamic(self):
if fluid.core.is_compiled_with_cuda():
......
......@@ -204,9 +204,11 @@ class TestQuantizeTranspiler(unittest.TestCase):
build_program(test_program, startup, True)
test_program = test_program.clone(for_test=True)
quant_transpiler = QuantizeTranspiler()
quant_transpiler.training_transpile(main)
quant_transpiler.training_transpile(test_program)
quant_type = 'abs_max'
quant_transpiler = QuantizeTranspiler(
activation_quantize_type=quant_type)
quant_transpiler.training_transpile(main, startup)
quant_transpiler.training_transpile(test_program, startup)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
......@@ -223,12 +225,14 @@ class TestQuantizeTranspiler(unittest.TestCase):
paddle.dataset.mnist.test(), batch_size=batch_size)
feeder = fluid.DataFeeder(feed_list=feeds, place=place)
dev_name = '_gpu_' if use_cuda else '_cpu_'
with fluid.program_guard(main):
for _ in range(iters):
data = next(train_reader())
loss_v = exe.run(program=main,
feed=feeder.feed(data),
fetch_list=[loss])
print('{}: {}'.format('loss' + dev_name + quant_type, loss_v))
with fluid.program_guard(test_program):
test_data = next(test_reader())
......@@ -245,11 +249,19 @@ class TestQuantizeTranspiler(unittest.TestCase):
feed=feeder.feed(test_data),
fetch_list=[loss])
self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
print('{}: {}'.format('test_loss1' + dev_name + quant_type,
test_loss1))
print('{}: {}'.format('test_loss2' + dev_name + quant_type,
test_loss2))
w_freeze = np.array(fluid.global_scope().find_var('conv2d_1.w_0')
.get_tensor())
# fail: -432.0 != -433.0, this is due to the calculation precision
#self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
print('{}: {}'.format('w_freeze' + dev_name + quant_type,
np.sum(w_freeze)))
print('{}: {}'.format('w_quant' + dev_name + quant_type,
np.sum(w_quant)))
# Convert parameter to 8-bit.
quant_transpiler.convert_to_int8(test_program, place)
# Save the 8-bit parameter and model file.
......@@ -264,13 +276,17 @@ class TestQuantizeTranspiler(unittest.TestCase):
self.assertEqual(w_8bit.dtype, np.int8)
self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
print('{}: {}'.format('w_8bit' + dev_name + quant_type,
np.sum(w_8bit)))
print('{}: {}'.format('w_freeze' + dev_name + quant_type,
np.sum(w_freeze)))
def not_test_freeze_program_cuda(self):
def test_freeze_program_cuda(self):
if fluid.core.is_compiled_with_cuda():
with fluid.unique_name.guard():
self.freeze_program(True, seed=1)
def not_test_freeze_program_cpu(self):
def test_freeze_program_cpu(self):
with fluid.unique_name.guard():
self.freeze_program(False, seed=2)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册