未验证 提交 7094251b 编写于 作者: S Sylwester Fraczek 提交者: GitHub

dequantize matmul and matmul_v2 Y weights in quant2_int8 (#37618)

* dequantize matmul and matmul_v2 Y weights in qat2_int8

* review fix

* split conv and mul tests, add matmul test

* fixup

* fix ci build

* remove unused variables

* formatting fix

* remove extra newline at end of file
上级 6abe7dcb
...@@ -69,7 +69,7 @@ class Quant2Int8MkldnnPass(object): ...@@ -69,7 +69,7 @@ class Quant2Int8MkldnnPass(object):
self._mul_ops = ['mul'] self._mul_ops = ['mul']
self._fc_ops = ['fc'] self._fc_ops = ['fc']
self._relu_ops = ['relu', 'relu6'] self._relu_ops = ['relu', 'relu6']
self._matmul_ops = ['matmul'] self._matmul_ops = ['matmul', 'matmul_v2']
self._gru_ops = ['fusion_gru', 'multi_gru'] self._gru_ops = ['fusion_gru', 'multi_gru']
self._lstm_ops = ['fusion_lstm'] self._lstm_ops = ['fusion_lstm']
self._weight_thresholds = {} self._weight_thresholds = {}
...@@ -328,14 +328,18 @@ class Quant2Int8MkldnnPass(object): ...@@ -328,14 +328,18 @@ class Quant2Int8MkldnnPass(object):
def _dequantize_weights(self, graph): def _dequantize_weights(self, graph):
def _is_int8_weights(op_node, weight_name): def _is_int8_weights(op_node, weight_name):
weight_var_name = op_node.input(weight_name)[0] weight_var_name = op_node.input(weight_name)[0]
if self._scope.find_var(weight_var_name) is None:
return False
weight = self._load_param(self._scope, weight_var_name) weight = self._load_param(self._scope, weight_var_name)
return np.all(np.mod(weight, 1) == 0) return np.all(np.mod(weight, 1) == 0)
mul_and_matmul_ops = self._mul_ops + self._matmul_ops
for op in graph.all_op_nodes(): for op in graph.all_op_nodes():
if op.name() in self._conv_ops and _is_int8_weights(op, "Filter"): if op.name() in self._conv_ops and _is_int8_weights(op, "Filter"):
self._dequantize_op_weights(graph, op, "Filter", "Output") self._dequantize_op_weights(graph, op, "Filter", "Output")
elif op.name() in self._mul_ops and _is_int8_weights(op, "Y"): elif op.name() in mul_and_matmul_ops and _is_int8_weights(op, "Y"):
self._dequantize_op_weights(graph, op, "Y", "Out") self._dequantize_op_weights(graph, op, "Y", "Out")
return graph return graph
def _dequantize_op_weights(self, graph, op_node, weight_name, output_name): def _dequantize_op_weights(self, graph, op_node, weight_name, output_name):
......
...@@ -23,7 +23,93 @@ import paddle ...@@ -23,7 +23,93 @@ import paddle
paddle.enable_static() paddle.enable_static()
class TestQuant2Int8MkldnnPass(unittest.TestCase): class TestQuant2Int8MkldnnPassMul(unittest.TestCase):
def op_name(self):
return "mul"
def setUp(self):
self.scope = fluid.Scope()
self.place = fluid.CPUPlace()
self.dtype = np.float32
self.use_mkldnn = True
self.quantized_ops = self.op_name()
self.mul_input_size = [1, 3]
self.mul_weights_size = [3, 5]
self.mul_output_size = [1, 5]
self.mul_input = np.random.random(self.mul_input_size).astype(
self.dtype)
self.mul_weights = np.ones(self.mul_weights_size, self.dtype)
self.mul_weights_bad = np.ones([1, 1], self.dtype)
self.mul_output = np.ndarray(self.mul_output_size).astype(self.dtype)
self.mul_output_scale = np.linspace(1, 5, num=5).astype(self.dtype)
self.variables_mul = {
"mul_input": self.mul_input,
"mul_weights": self.mul_weights,
"mul_output": self.mul_output,
"mul_weights_bad": self.mul_weights_bad
}
def prepare_program_mul(self, program):
block = program.global_block()
for name in self.variables_mul:
block.create_var(
name=name,
dtype="float32",
shape=self.variables_mul[name].shape)
mul_op1 = block.append_op(
type=self.op_name(),
inputs={
"X": block.var('mul_input'),
"Y": block.var('mul_weights')
},
outputs={"Out": block.var('mul_output')},
attrs={'use_mkldnn': self.use_mkldnn})
def test_dequantize_op_weights(self):
program = fluid.Program()
with fluid.program_guard(program):
self.prepare_program_mul(program)
graph = IrGraph(core.Graph(program.desc), for_test=True)
op_node = ""
for op in graph.all_op_nodes():
if op.op().type() == self.op_name():
op_node = op
break
assert op_node != "", "op of type %s not found" % self.op_name()
qpass = Quant2Int8MkldnnPass(
self.quantized_ops,
_scope=self.scope,
_place=self.place,
_core=core,
_debug=False)
qpass._weight_thresholds["mul_output"] = self.mul_output_scale
param = self.scope.var("mul_weights").get_tensor()
param.set(self.variables_mul["mul_weights"], self.place)
qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
assert np.allclose(
self.scope.find_var("mul_weights").get_tensor(),
[[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.]])
param = self.scope.var("mul_weights").get_tensor()
param.set(self.variables_mul["mul_weights_bad"], self.place)
with self.assertRaises(ValueError):
qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
class TestQuant2Int8MkldnnPassMatmulV2(TestQuant2Int8MkldnnPassMul):
def op_name(self):
return "matmul_v2"
class TestQuant2Int8MkldnnPassConv2D(unittest.TestCase):
def setUp(self): def setUp(self):
self.scope = fluid.Scope() self.scope = fluid.Scope()
self.place = fluid.CPUPlace() self.place = fluid.CPUPlace()
...@@ -46,7 +132,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase): ...@@ -46,7 +132,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
self.conv_output = np.ndarray(self.conv_output_size).astype(self.dtype) self.conv_output = np.ndarray(self.conv_output_size).astype(self.dtype)
self.conv_output2 = np.ndarray(self.conv_output2_size).astype( self.conv_output2 = np.ndarray(self.conv_output2_size).astype(
self.dtype) self.dtype)
self.quantized_ops = 'conv2d,mul' self.quantized_ops = 'conv2d'
self.variables = { self.variables = {
"input": self.input, "input": self.input,
"filter": self.filter, "filter": self.filter,
...@@ -54,24 +140,8 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase): ...@@ -54,24 +140,8 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
"conv_output": self.conv_output, "conv_output": self.conv_output,
"conv_output2": self.conv_output2, "conv_output2": self.conv_output2,
} }
self.mul_input_size = [1, 3]
self.mul_weights_size = [3, 5]
self.mul_output_size = [1, 5]
self.mul_input = np.random.random(self.mul_input_size).astype(
self.dtype)
self.mul_weights = np.ones(self.mul_weights_size, self.dtype)
self.mul_weights_bad = np.ones([1, 1], self.dtype)
self.mul_output = np.ndarray(self.mul_output_size).astype(self.dtype)
self.mul_output_scale = np.linspace(1, 5, num=5).astype(self.dtype)
self.variables_mul = { def prepare_program_conv2d(self, program):
"mul_input": self.mul_input,
"mul_weights": self.mul_weights,
"mul_output": self.mul_output,
"mul_weights_bad": self.mul_weights_bad
}
def prepare_program(self, program):
block = program.global_block() block = program.global_block()
for name in self.variables: for name in self.variables:
block.create_var( block.create_var(
...@@ -111,23 +181,6 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase): ...@@ -111,23 +181,6 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
'fuse_brelu': True 'fuse_brelu': True
}) })
def prepare_program_mul(self, program):
block = program.global_block()
for name in self.variables_mul:
block.create_var(
name=name,
dtype="float32",
shape=self.variables_mul[name].shape)
mul_op1 = block.append_op(
type="mul",
inputs={
"X": block.var('mul_input'),
"Y": block.var('mul_weights')
},
outputs={"Out": block.var('mul_output')},
attrs={'use_mkldnn': self.use_mkldnn})
def remove_fuse_activation_attribute(self, graph): def remove_fuse_activation_attribute(self, graph):
for op in graph.all_op_nodes(): for op in graph.all_op_nodes():
op.op().remove_attr("fuse_activation") op.op().remove_attr("fuse_activation")
...@@ -150,7 +203,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase): ...@@ -150,7 +203,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
def test_quant_update_activation(self): def test_quant_update_activation(self):
program = fluid.Program() program = fluid.Program()
with fluid.program_guard(program): with fluid.program_guard(program):
self.prepare_program(program) self.prepare_program_conv2d(program)
graph = IrGraph(core.Graph(program.desc), for_test=True) graph = IrGraph(core.Graph(program.desc), for_test=True)
graph = self.remove_fuse_activation_attribute(graph) graph = self.remove_fuse_activation_attribute(graph)
self.check_graph_before_pass(graph) self.check_graph_before_pass(graph)
...@@ -163,39 +216,6 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase): ...@@ -163,39 +216,6 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
graph = quant2_int8_mkldnn_pass._update_activations(graph) graph = quant2_int8_mkldnn_pass._update_activations(graph)
self.check_graph_after_pass(graph) self.check_graph_after_pass(graph)
def test_dequantize_op_weights(self):
program = fluid.Program()
with fluid.program_guard(program):
self.prepare_program_mul(program)
graph = IrGraph(core.Graph(program.desc), for_test=True)
for op in graph.all_op_nodes():
if op.op().type() == "mul":
op_node = op
break
qpass = Quant2Int8MkldnnPass(
self.quantized_ops,
_scope=self.scope,
_place=self.place,
_core=core,
_debug=False)
qpass._weight_thresholds["mul_output"] = self.mul_output_scale
param = self.scope.var("mul_weights").get_tensor()
param.set(self.variables_mul["mul_weights"], self.place)
qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
assert np.allclose(
self.scope.find_var("mul_weights").get_tensor(),
[[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.]])
param = self.scope.var("mul_weights").get_tensor()
param.set(self.variables_mul["mul_weights_bad"], self.place)
with self.assertRaises(ValueError):
qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册