From 7094251ba12387a2f7bb572b896c52c9c61420bf Mon Sep 17 00:00:00 2001 From: Sylwester Fraczek Date: Wed, 1 Dec 2021 16:10:58 +0100 Subject: [PATCH] dequantize matmul and matmul_v2 Y weights in quant2_int8 (#37618) * dequantize matmul and matmul_v2 Y weights in qat2_int8 * review fix * split conv and mul tests, add matmul test * fixup * fix ci build * remove unused variables * formatting fix * remove extra newline at end of file --- .../quantization/quant2_int8_mkldnn_pass.py | 8 +- .../tests/test_quant2_int8_mkldnn_pass.py | 160 ++++++++++-------- 2 files changed, 96 insertions(+), 72 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py index bc97e5cf6c9..4c9c4058318 100644 --- a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py @@ -69,7 +69,7 @@ class Quant2Int8MkldnnPass(object): self._mul_ops = ['mul'] self._fc_ops = ['fc'] self._relu_ops = ['relu', 'relu6'] - self._matmul_ops = ['matmul'] + self._matmul_ops = ['matmul', 'matmul_v2'] self._gru_ops = ['fusion_gru', 'multi_gru'] self._lstm_ops = ['fusion_lstm'] self._weight_thresholds = {} @@ -328,14 +328,18 @@ class Quant2Int8MkldnnPass(object): def _dequantize_weights(self, graph): def _is_int8_weights(op_node, weight_name): weight_var_name = op_node.input(weight_name)[0] + if self._scope.find_var(weight_var_name) is None: + return False weight = self._load_param(self._scope, weight_var_name) return np.all(np.mod(weight, 1) == 0) + mul_and_matmul_ops = self._mul_ops + self._matmul_ops for op in graph.all_op_nodes(): if op.name() in self._conv_ops and _is_int8_weights(op, "Filter"): self._dequantize_op_weights(graph, op, "Filter", "Output") - elif op.name() in self._mul_ops and _is_int8_weights(op, "Y"): + elif op.name() in mul_and_matmul_ops and _is_int8_weights(op, "Y"): self._dequantize_op_weights(graph, op, "Y", "Out") + return graph def _dequantize_op_weights(self, graph, op_node, weight_name, output_name): diff --git a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py index 9ba0164afbe..994f89ab3e9 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py +++ b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py @@ -23,7 +23,93 @@ import paddle paddle.enable_static() -class TestQuant2Int8MkldnnPass(unittest.TestCase): +class TestQuant2Int8MkldnnPassMul(unittest.TestCase): + def op_name(self): + return "mul" + + def setUp(self): + self.scope = fluid.Scope() + self.place = fluid.CPUPlace() + self.dtype = np.float32 + self.use_mkldnn = True + + self.quantized_ops = self.op_name() + self.mul_input_size = [1, 3] + self.mul_weights_size = [3, 5] + self.mul_output_size = [1, 5] + self.mul_input = np.random.random(self.mul_input_size).astype( + self.dtype) + self.mul_weights = np.ones(self.mul_weights_size, self.dtype) + self.mul_weights_bad = np.ones([1, 1], self.dtype) + self.mul_output = np.ndarray(self.mul_output_size).astype(self.dtype) + self.mul_output_scale = np.linspace(1, 5, num=5).astype(self.dtype) + + self.variables_mul = { + "mul_input": self.mul_input, + "mul_weights": self.mul_weights, + "mul_output": self.mul_output, + "mul_weights_bad": self.mul_weights_bad + } + + def prepare_program_mul(self, program): + block = program.global_block() + for name in self.variables_mul: + block.create_var( + name=name, + dtype="float32", + shape=self.variables_mul[name].shape) + + mul_op1 = block.append_op( + type=self.op_name(), + inputs={ + "X": block.var('mul_input'), + "Y": block.var('mul_weights') + }, + outputs={"Out": block.var('mul_output')}, + attrs={'use_mkldnn': self.use_mkldnn}) + + def test_dequantize_op_weights(self): + program = fluid.Program() + with fluid.program_guard(program): + self.prepare_program_mul(program) + graph = IrGraph(core.Graph(program.desc), for_test=True) + + op_node = "" + for op in graph.all_op_nodes(): + if op.op().type() == self.op_name(): + op_node = op + break + assert op_node != "", "op of type %s not found" % self.op_name() + + qpass = Quant2Int8MkldnnPass( + self.quantized_ops, + _scope=self.scope, + _place=self.place, + _core=core, + _debug=False) + qpass._weight_thresholds["mul_output"] = self.mul_output_scale + param = self.scope.var("mul_weights").get_tensor() + param.set(self.variables_mul["mul_weights"], self.place) + qpass._dequantize_op_weights(graph, op_node, "Y", "Out") + + assert np.allclose( + self.scope.find_var("mul_weights").get_tensor(), + [[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.], + [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.], + [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.]]) + + param = self.scope.var("mul_weights").get_tensor() + param.set(self.variables_mul["mul_weights_bad"], self.place) + with self.assertRaises(ValueError): + qpass._dequantize_op_weights(graph, op_node, "Y", "Out") + + +class TestQuant2Int8MkldnnPassMatmulV2(TestQuant2Int8MkldnnPassMul): + def op_name(self): + return "matmul_v2" + + +class TestQuant2Int8MkldnnPassConv2D(unittest.TestCase): def setUp(self): self.scope = fluid.Scope() self.place = fluid.CPUPlace() @@ -46,7 +132,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase): self.conv_output = np.ndarray(self.conv_output_size).astype(self.dtype) self.conv_output2 = np.ndarray(self.conv_output2_size).astype( self.dtype) - self.quantized_ops = 'conv2d,mul' + self.quantized_ops = 'conv2d' self.variables = { "input": self.input, "filter": self.filter, @@ -54,24 +140,8 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase): "conv_output": self.conv_output, "conv_output2": self.conv_output2, } - self.mul_input_size = [1, 3] - self.mul_weights_size = [3, 5] - self.mul_output_size = [1, 5] - self.mul_input = np.random.random(self.mul_input_size).astype( - self.dtype) - self.mul_weights = np.ones(self.mul_weights_size, self.dtype) - self.mul_weights_bad = np.ones([1, 1], self.dtype) - self.mul_output = np.ndarray(self.mul_output_size).astype(self.dtype) - self.mul_output_scale = np.linspace(1, 5, num=5).astype(self.dtype) - self.variables_mul = { - "mul_input": self.mul_input, - "mul_weights": self.mul_weights, - "mul_output": self.mul_output, - "mul_weights_bad": self.mul_weights_bad - } - - def prepare_program(self, program): + def prepare_program_conv2d(self, program): block = program.global_block() for name in self.variables: block.create_var( @@ -111,23 +181,6 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase): 'fuse_brelu': True }) - def prepare_program_mul(self, program): - block = program.global_block() - for name in self.variables_mul: - block.create_var( - name=name, - dtype="float32", - shape=self.variables_mul[name].shape) - - mul_op1 = block.append_op( - type="mul", - inputs={ - "X": block.var('mul_input'), - "Y": block.var('mul_weights') - }, - outputs={"Out": block.var('mul_output')}, - attrs={'use_mkldnn': self.use_mkldnn}) - def remove_fuse_activation_attribute(self, graph): for op in graph.all_op_nodes(): op.op().remove_attr("fuse_activation") @@ -150,7 +203,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase): def test_quant_update_activation(self): program = fluid.Program() with fluid.program_guard(program): - self.prepare_program(program) + self.prepare_program_conv2d(program) graph = IrGraph(core.Graph(program.desc), for_test=True) graph = self.remove_fuse_activation_attribute(graph) self.check_graph_before_pass(graph) @@ -163,39 +216,6 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase): graph = quant2_int8_mkldnn_pass._update_activations(graph) self.check_graph_after_pass(graph) - def test_dequantize_op_weights(self): - program = fluid.Program() - with fluid.program_guard(program): - self.prepare_program_mul(program) - graph = IrGraph(core.Graph(program.desc), for_test=True) - - for op in graph.all_op_nodes(): - if op.op().type() == "mul": - op_node = op - break - - qpass = Quant2Int8MkldnnPass( - self.quantized_ops, - _scope=self.scope, - _place=self.place, - _core=core, - _debug=False) - qpass._weight_thresholds["mul_output"] = self.mul_output_scale - param = self.scope.var("mul_weights").get_tensor() - param.set(self.variables_mul["mul_weights"], self.place) - qpass._dequantize_op_weights(graph, op_node, "Y", "Out") - - assert np.allclose( - self.scope.find_var("mul_weights").get_tensor(), - [[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.], - [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.], - [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.]]) - - param = self.scope.var("mul_weights").get_tensor() - param.set(self.variables_mul["mul_weights_bad"], self.place) - with self.assertRaises(ValueError): - qpass._dequantize_op_weights(graph, op_node, "Y", "Out") - if __name__ == '__main__': unittest.main() -- GitLab