From 7094251ba12387a2f7bb572b896c52c9c61420bf Mon Sep 17 00:00:00 2001
From: Sylwester Fraczek <sylwester.fraczek@intel.com>
Date: Wed, 1 Dec 2021 16:10:58 +0100
Subject: [PATCH] dequantize matmul and matmul_v2 Y weights in quant2_int8
 (#37618)

* dequantize matmul and matmul_v2 Y weights in qat2_int8

* review fix

* split conv and mul tests, add matmul test

* fixup

* fix ci build

* remove unused variables

* formatting fix

* remove extra newline at end of file
---
 .../quantization/quant2_int8_mkldnn_pass.py   |   8 +-
 .../tests/test_quant2_int8_mkldnn_pass.py     | 160 ++++++++++--------
 2 files changed, 96 insertions(+), 72 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
index bc97e5cf6c9..4c9c4058318 100644
--- a/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quant2_int8_mkldnn_pass.py
@@ -69,7 +69,7 @@ class Quant2Int8MkldnnPass(object):
         self._mul_ops = ['mul']
         self._fc_ops = ['fc']
         self._relu_ops = ['relu', 'relu6']
-        self._matmul_ops = ['matmul']
+        self._matmul_ops = ['matmul', 'matmul_v2']
         self._gru_ops = ['fusion_gru', 'multi_gru']
         self._lstm_ops = ['fusion_lstm']
         self._weight_thresholds = {}
@@ -328,14 +328,18 @@ class Quant2Int8MkldnnPass(object):
     def _dequantize_weights(self, graph):
         def _is_int8_weights(op_node, weight_name):
             weight_var_name = op_node.input(weight_name)[0]
+            if self._scope.find_var(weight_var_name) is None:
+                return False
             weight = self._load_param(self._scope, weight_var_name)
             return np.all(np.mod(weight, 1) == 0)
 
+        mul_and_matmul_ops = self._mul_ops + self._matmul_ops
         for op in graph.all_op_nodes():
             if op.name() in self._conv_ops and _is_int8_weights(op, "Filter"):
                 self._dequantize_op_weights(graph, op, "Filter", "Output")
-            elif op.name() in self._mul_ops and _is_int8_weights(op, "Y"):
+            elif op.name() in mul_and_matmul_ops and _is_int8_weights(op, "Y"):
                 self._dequantize_op_weights(graph, op, "Y", "Out")
+
         return graph
 
     def _dequantize_op_weights(self, graph, op_node, weight_name, output_name):
diff --git a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py
index 9ba0164afbe..994f89ab3e9 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quant2_int8_mkldnn_pass.py
@@ -23,7 +23,93 @@ import paddle
 paddle.enable_static()
 
 
-class TestQuant2Int8MkldnnPass(unittest.TestCase):
+class TestQuant2Int8MkldnnPassMul(unittest.TestCase):
+    def op_name(self):
+        return "mul"
+
+    def setUp(self):
+        self.scope = fluid.Scope()
+        self.place = fluid.CPUPlace()
+        self.dtype = np.float32
+        self.use_mkldnn = True
+
+        self.quantized_ops = self.op_name()
+        self.mul_input_size = [1, 3]
+        self.mul_weights_size = [3, 5]
+        self.mul_output_size = [1, 5]
+        self.mul_input = np.random.random(self.mul_input_size).astype(
+            self.dtype)
+        self.mul_weights = np.ones(self.mul_weights_size, self.dtype)
+        self.mul_weights_bad = np.ones([1, 1], self.dtype)
+        self.mul_output = np.ndarray(self.mul_output_size).astype(self.dtype)
+        self.mul_output_scale = np.linspace(1, 5, num=5).astype(self.dtype)
+
+        self.variables_mul = {
+            "mul_input": self.mul_input,
+            "mul_weights": self.mul_weights,
+            "mul_output": self.mul_output,
+            "mul_weights_bad": self.mul_weights_bad
+        }
+
+    def prepare_program_mul(self, program):
+        block = program.global_block()
+        for name in self.variables_mul:
+            block.create_var(
+                name=name,
+                dtype="float32",
+                shape=self.variables_mul[name].shape)
+
+        mul_op1 = block.append_op(
+            type=self.op_name(),
+            inputs={
+                "X": block.var('mul_input'),
+                "Y": block.var('mul_weights')
+            },
+            outputs={"Out": block.var('mul_output')},
+            attrs={'use_mkldnn': self.use_mkldnn})
+
+    def test_dequantize_op_weights(self):
+        program = fluid.Program()
+        with fluid.program_guard(program):
+            self.prepare_program_mul(program)
+            graph = IrGraph(core.Graph(program.desc), for_test=True)
+
+            op_node = ""
+            for op in graph.all_op_nodes():
+                if op.op().type() == self.op_name():
+                    op_node = op
+                    break
+            assert op_node != "", "op of type %s not found" % self.op_name()
+
+            qpass = Quant2Int8MkldnnPass(
+                self.quantized_ops,
+                _scope=self.scope,
+                _place=self.place,
+                _core=core,
+                _debug=False)
+            qpass._weight_thresholds["mul_output"] = self.mul_output_scale
+            param = self.scope.var("mul_weights").get_tensor()
+            param.set(self.variables_mul["mul_weights"], self.place)
+            qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
+
+            assert np.allclose(
+                self.scope.find_var("mul_weights").get_tensor(),
+                [[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
+                 [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
+                 [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.]])
+
+            param = self.scope.var("mul_weights").get_tensor()
+            param.set(self.variables_mul["mul_weights_bad"], self.place)
+            with self.assertRaises(ValueError):
+                qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
+
+
+class TestQuant2Int8MkldnnPassMatmulV2(TestQuant2Int8MkldnnPassMul):
+    def op_name(self):
+        return "matmul_v2"
+
+
+class TestQuant2Int8MkldnnPassConv2D(unittest.TestCase):
     def setUp(self):
         self.scope = fluid.Scope()
         self.place = fluid.CPUPlace()
@@ -46,7 +132,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
         self.conv_output = np.ndarray(self.conv_output_size).astype(self.dtype)
         self.conv_output2 = np.ndarray(self.conv_output2_size).astype(
             self.dtype)
-        self.quantized_ops = 'conv2d,mul'
+        self.quantized_ops = 'conv2d'
         self.variables = {
             "input": self.input,
             "filter": self.filter,
@@ -54,24 +140,8 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
             "conv_output": self.conv_output,
             "conv_output2": self.conv_output2,
         }
-        self.mul_input_size = [1, 3]
-        self.mul_weights_size = [3, 5]
-        self.mul_output_size = [1, 5]
-        self.mul_input = np.random.random(self.mul_input_size).astype(
-            self.dtype)
-        self.mul_weights = np.ones(self.mul_weights_size, self.dtype)
-        self.mul_weights_bad = np.ones([1, 1], self.dtype)
-        self.mul_output = np.ndarray(self.mul_output_size).astype(self.dtype)
-        self.mul_output_scale = np.linspace(1, 5, num=5).astype(self.dtype)
 
-        self.variables_mul = {
-            "mul_input": self.mul_input,
-            "mul_weights": self.mul_weights,
-            "mul_output": self.mul_output,
-            "mul_weights_bad": self.mul_weights_bad
-        }
-
-    def prepare_program(self, program):
+    def prepare_program_conv2d(self, program):
         block = program.global_block()
         for name in self.variables:
             block.create_var(
@@ -111,23 +181,6 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
                 'fuse_brelu': True
             })
 
-    def prepare_program_mul(self, program):
-        block = program.global_block()
-        for name in self.variables_mul:
-            block.create_var(
-                name=name,
-                dtype="float32",
-                shape=self.variables_mul[name].shape)
-
-        mul_op1 = block.append_op(
-            type="mul",
-            inputs={
-                "X": block.var('mul_input'),
-                "Y": block.var('mul_weights')
-            },
-            outputs={"Out": block.var('mul_output')},
-            attrs={'use_mkldnn': self.use_mkldnn})
-
     def remove_fuse_activation_attribute(self, graph):
         for op in graph.all_op_nodes():
             op.op().remove_attr("fuse_activation")
@@ -150,7 +203,7 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
     def test_quant_update_activation(self):
         program = fluid.Program()
         with fluid.program_guard(program):
-            self.prepare_program(program)
+            self.prepare_program_conv2d(program)
             graph = IrGraph(core.Graph(program.desc), for_test=True)
             graph = self.remove_fuse_activation_attribute(graph)
             self.check_graph_before_pass(graph)
@@ -163,39 +216,6 @@ class TestQuant2Int8MkldnnPass(unittest.TestCase):
             graph = quant2_int8_mkldnn_pass._update_activations(graph)
             self.check_graph_after_pass(graph)
 
-    def test_dequantize_op_weights(self):
-        program = fluid.Program()
-        with fluid.program_guard(program):
-            self.prepare_program_mul(program)
-            graph = IrGraph(core.Graph(program.desc), for_test=True)
-
-            for op in graph.all_op_nodes():
-                if op.op().type() == "mul":
-                    op_node = op
-                    break
-
-            qpass = Quant2Int8MkldnnPass(
-                self.quantized_ops,
-                _scope=self.scope,
-                _place=self.place,
-                _core=core,
-                _debug=False)
-            qpass._weight_thresholds["mul_output"] = self.mul_output_scale
-            param = self.scope.var("mul_weights").get_tensor()
-            param.set(self.variables_mul["mul_weights"], self.place)
-            qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
-
-            assert np.allclose(
-                self.scope.find_var("mul_weights").get_tensor(),
-                [[1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
-                 [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.],
-                 [1. / 127., 2. / 127., 3. / 127., 4. / 127., 5. / 127.]])
-
-            param = self.scope.var("mul_weights").get_tensor()
-            param.set(self.variables_mul["mul_weights_bad"], self.place)
-            with self.assertRaises(ValueError):
-                qpass._dequantize_op_weights(graph, op_node, "Y", "Out")
-
 
 if __name__ == '__main__':
     unittest.main()
-- 
GitLab