dequantize matmul and matmul_v2 Y weights in quant2_int8 (#37618)

* dequantize matmul and matmul_v2 Y weights in qat2_int8 * review fix * split conv and mul tests, add matmul test * fixup * fix ci build * remove unused variables * formatting fix * remove extra newline at end of file

dequantize matmul and matmul_v2 Y weights in quant2_int8 (#37618)
* dequantize matmul and matmul_v2 Y weights in qat2_int8 * review fix * split conv and mul tests, add matmul test * fixup * fix ci build * remove unused variables * formatting fix * remove extra newline at end of file
7094251b · Sylwester Fraczek · GitHub · 6abe7dcb · 7094251b · 7094251b
2 changed file
--- a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
@@ -69,7 +69,7 @@ class Quant2Int8MkldnnPass(object):
        self._mul_ops = ['mul']
        self._fc_ops = ['fc']
        self._relu_ops = ['relu', 'relu6']
-        self._matmul_ops = ['matmul']
+        self._matmul_ops = ['matmul', 'matmul_v2']
        self._gru_ops = ['fusion_gru', 'multi_gru']
        self._lstm_ops = ['fusion_lstm']
        self._weight_thresholds = {}
@@ -328,14 +328,18 @@ class Quant2Int8MkldnnPass(object):
    def _dequantize_weights(self, graph):
        def _is_int8_weights(op_node, weight_name):
            weight_var_name = op_node.input(weight_name)[0]
+            if self._scope.find_var(weight_var_name) is None:
+                return False
            weight = self._load_param(self._scope, weight_var_name)
            return np.all(np.mod(weight, 1) == 0)

+        mul_and_matmul_ops = self._mul_ops + self._matmul_ops
        for op in graph.all_op_nodes():
            if op.name() in self._conv_ops and _is_int8_weights(op, "Filter"):
                self._dequantize_op_weights(graph, op, "Filter", "Output")
-            elif op.name() in self._mul_ops and _is_int8_weights(op, "Y"):
+            elif op.name() in mul_and_matmul_ops and _is_int8_weights(op, "Y"):
                self._dequantize_op_weights(graph, op, "Y", "Out")
+
        return graph

    def _dequantize_op_weights(self, graph, op_node, weight_name, output_name):

--- a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py
@@ -23,7 +23,93 @@ import paddle
 paddle.enable_static()


-class TestQuant2Int8MkldnnPass(unittest.TestCase):
+class TestQuant2Int8MkldnnPassMul(unittest.TestCase):
+    def op_name(self):
+        return "mul"
+
+    def setUp(self):
+        self.scope = fluid.Scope()
+        self.place = fluid.CPUPlace()
+        self.dtype = np.float32
+        self.use_mkldnn = True
+
+        self.quantized_ops = self.op_name()
+        self.mul_input_size = [1, 3]
+        self.mul_weights_size = [3, 5]
+        self.mul_output_size = [1, 5]
+        self.mul_input = np.random.random(self.mul_input_size).astype(
+            self.dtype)
+        self.mul_weights = np.ones(self.mul_weights_size, self.dtype)
+        self.mul_weights_bad = np.ones([1, 1], self.dtype)
+        self.mul_output = np.ndarray(self.mul_output_size).astype(self.dtype)
+        self.mul_output_scale = np.linspace(1, 5, num=5).astype(self.dtype)
+
+        self.variables_mul = {
+            "mul_input": self.mul_input,
+            "mul_weights": self.mul_weights,
+            "mul_output": self.mul_output,
+            "mul_weights_bad": self.mul_weights_bad
+        }
+
+    def prepare_program_mul(self, program):
+        block = program.global_block()
+        for name in self.variables_mul:
+            block.create_var(
+                name=name,
+                dtype="float32",
+                shape=self.variables_mul[name].shape)
+
+        mul_op1 = block.append_op(
+            type=self.op_name(),
+            inputs={
+                "X": block.var('mul_input'),
+                "Y": block.var('mul_weights')
+            },
+            outputs={"Out": block.var('mul_output')},
+            attrs={'use_mkldnn': self.use_mkldnn})
+
+    def test_dequantize_op_weights(self):
+        program = fluid.Program()
+        with fluid.program_guard(program):
+            self.prepare_program_mul(program)
+            graph = IrGraph(core.Graph(program.desc), for_test=True)
+
+            op_node = ""
+            for op in graph.all_op_nodes():
+                if op.op().type() == self.op_name():
+                    op_node = op
+                    break
+            assert op_node != "", "op of type %s not found" % self.op_name()
+
+            qpass = Quant2Int8MkldnnPass(
+                self.quantized_ops,
+                _scope=self.scope,
+                _place=self.place,
+                _core=core,
+                _debug=False)
+            qpass._weight_thresholds["mul_output"] = self.mul_output_scale
+            param = self.scope.var("mul_weights").get_tensor()
+            param.set(self.variables_mul["mul_weights"], self.place)
+            qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
+
+            assert np.allclose(
+                self.scope.find_var("mul_weights").get_tensor(),
+                [[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
+                 [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
+                 [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.]])
+
+            param = self.scope.var("mul_weights").get_tensor()
+            param.set(self.variables_mul["mul_weights_bad"], self.place)
+            with self.assertRaises(ValueError):
+                qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
+
+
+class TestQuant2Int8MkldnnPassMatmulV2(TestQuant2Int8MkldnnPassMul):
+    def op_name(self):
+        return "matmul_v2"
+
+
+class TestQuant2Int8MkldnnPassConv2D(unittest.TestCase):
    def setUp(self):
        self.scope = fluid.Scope()
        self.place = fluid.CPUPlace()
@@ -46,7 +132,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
        self.conv_output = np.ndarray(self.conv_output_size).astype(self.dtype)
        self.conv_output2 = np.ndarray(self.conv_output2_size).astype(
            self.dtype)
-        self.quantized_ops = 'conv2d,mul'
+        self.quantized_ops = 'conv2d'
        self.variables = {
            "input": self.input,
            "filter": self.filter,
@@ -54,24 +140,8 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
            "conv_output": self.conv_output,
            "conv_output2": self.conv_output2,
        }
-        self.mul_input_size = [1, 3]
-        self.mul_weights_size = [3, 5]
-        self.mul_output_size = [1, 5]
-        self.mul_input = np.random.random(self.mul_input_size).astype(
-            self.dtype)
-        self.mul_weights = np.ones(self.mul_weights_size, self.dtype)
-        self.mul_weights_bad = np.ones([1, 1], self.dtype)
-        self.mul_output = np.ndarray(self.mul_output_size).astype(self.dtype)
-        self.mul_output_scale = np.linspace(1, 5, num=5).astype(self.dtype)

-        self.variables_mul = {
-            "mul_input": self.mul_input,
-            "mul_weights": self.mul_weights,
-            "mul_output": self.mul_output,
-            "mul_weights_bad": self.mul_weights_bad
-        }
-
-    def prepare_program(self, program):
+    def prepare_program_conv2d(self, program):
        block = program.global_block()
        for name in self.variables:
            block.create_var(
@@ -111,23 +181,6 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
                'fuse_brelu': True
            })

-    def prepare_program_mul(self, program):
-        block = program.global_block()
-        for name in self.variables_mul:
-            block.create_var(
-                name=name,
-                dtype="float32",
-                shape=self.variables_mul[name].shape)
-
-        mul_op1 = block.append_op(
-            type="mul",
-            inputs={
-                "X": block.var('mul_input'),
-                "Y": block.var('mul_weights')
-            },
-            outputs={"Out": block.var('mul_output')},
-            attrs={'use_mkldnn': self.use_mkldnn})
-
    def remove_fuse_activation_attribute(self, graph):
        for op in graph.all_op_nodes():
            op.op().remove_attr("fuse_activation")
@@ -150,7 +203,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
    def test_quant_update_activation(self):
        program = fluid.Program()
        with fluid.program_guard(program):
-            self.prepare_program(program)
+            self.prepare_program_conv2d(program)
            graph = IrGraph(core.Graph(program.desc), for_test=True)
            graph = self.remove_fuse_activation_attribute(graph)
            self.check_graph_before_pass(graph)
@@ -163,39 +216,6 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
            graph = quant2_int8_mkldnn_pass._update_activations(graph)
            self.check_graph_after_pass(graph)

-    def test_dequantize_op_weights(self):
-        program = fluid.Program()
-        with fluid.program_guard(program):
-            self.prepare_program_mul(program)
-            graph = IrGraph(core.Graph(program.desc), for_test=True)
-
-            for op in graph.all_op_nodes():
-                if op.op().type() == "mul":
-                    op_node = op
-                    break
-
-            qpass = Quant2Int8MkldnnPass(
-                self.quantized_ops,
-                _scope=self.scope,
-                _place=self.place,
-                _core=core,
-                _debug=False)
-            qpass._weight_thresholds["mul_output"] = self.mul_output_scale
-            param = self.scope.var("mul_weights").get_tensor()
-            param.set(self.variables_mul["mul_weights"], self.place)
-            qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
-
-            assert np.allclose(
-                self.scope.find_var("mul_weights").get_tensor(),
-                [[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
-                 [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
-                 [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.]])
-
-            param = self.scope.var("mul_weights").get_tensor()
-            param.set(self.variables_mul["mul_weights_bad"], self.place)
-            with self.assertRaises(ValueError):
-                qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
-

 if __name__ == '__main__':
    unittest.main()