Update quantize_transpiler

e3c7348f · Dang Qingqing · 1d0353f1 · e3c7348f · e3c7348f · e3c7348f
3 changed file
--- a/python/paddle/fluid/tests/transpiler/CMakeLists.txt
+++ b/python/paddle/fluid/tests/transpiler/CMakeLists.txt
+file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
+string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
+
+foreach(src ${TEST_OPS})
+    py_test(${src} SRCS ${src}.py)
+endforeach()
--- a/python/paddle/fluid/tests/transpiler/test_quantize_transpiler.py
+++ b/python/paddle/fluid/tests/transpiler/test_quantize_transpiler.py
+#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+import numpy as np
+import unittest
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.transpiler.quantize_transpiler import _original_var_name
+
+
+def linear_fc(num):
+    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    hidden = data
+    for _ in xrange(num):
+        hidden = fluid.layers.fc(hidden, size=128, act='relu')
+    loss = fluid.layers.cross_entropy(input=hidden, label=label)
+    loss = fluid.layers.mean(loss)
+    return loss
+
+
+def residual_block(num):
+    def conv_bn_layer(input,
+                      ch_out,
+                      filter_size,
+                      stride,
+                      padding,
+                      act='relu',
+                      bias_attr=False):
+        tmp = fluid.layers.conv2d(
+            input=input,
+            filter_size=filter_size,
+            num_filters=ch_out,
+            stride=stride,
+            padding=padding,
+            act=None,
+            bias_attr=bias_attr)
+        return fluid.layers.batch_norm(input=tmp, act=act)
+
+    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    hidden = data
+    for _ in xrange(num):
+        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
+        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
+        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
+    fc = fluid.layers.fc(input=hidden, size=10)
+    loss = fluid.layers.cross_entropy(input=fc, label=label)
+    loss = fluid.layers.mean(loss)
+    return loss
+
+
+def conv_net(img, label):
+    conv_pool_1 = fluid.nets.simple_img_conv_pool(
+        input=img,
+        filter_size=5,
+        num_filters=20,
+        pool_size=2,
+        pool_stride=2,
+        act="relu")
+    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_2 = fluid.nets.simple_img_conv_pool(
+        input=conv_pool_1,
+        filter_size=5,
+        num_filters=50,
+        pool_size=2,
+        pool_stride=2,
+        act="relu")
+    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
+    loss = fluid.layers.cross_entropy(input=prediction, label=label)
+    avg_loss = fluid.layers.mean(loss)
+    return avg_loss
+
+
+class TestQuantizeTranspiler(unittest.TestCase):
+    def setUp(self):
+        # since quant_op and dequant_op is not ready, use cos and sin for test
+        self.weight_quant_op_type = 'fake_quantize_abs_max'
+        self.dequant_op_type = 'fake_dequantize_max_abs'
+        self.quantizable_op_and_inputs = {
+            'conv2d': ['Input', 'Filter'],
+            'depthwise_conv2d': ['Input', 'Filter'],
+            'mul': ['X', 'Y']
+        }
+        self.quantizable_op_grad_and_inputs = {
+            'conv2d_grad': ['Input', 'Filter'],
+            'depthwise_conv2d_grad': ['Input', 'Filter'],
+            'mul_grad': ['X', 'Y']
+        }
+
+    def check_program(self, program):
+        quantized_ops = {}
+
+        persistable_vars = [
+            v.name
+            for v in filter(lambda var: var.persistable, program.list_vars())
+        ]
+
+        for block in program.blocks:
+            for idx, op in enumerate(block.ops):
+                # check forward
+                if op.type in self.quantizable_op_and_inputs:
+                    for i, arg_name in enumerate(op.input_arg_names):
+                        quant_op_type = self.weight_quant_op_type if \
+                            _original_var_name(arg_name) \
+                            in persistable_vars else self.act_quant_op_type
+                        self.assertTrue(
+                            arg_name.endswith('.quantized.dequantized'))
+                        if arg_name not in quantized_ops:
+                            self.assertEqual(block.ops[idx - 2 * i - 1].type,
+                                             self.dequant_op_type)
+                            self.assertEqual(block.ops[idx - 2 * i - 2].type,
+                                             quant_op_type)
+                            quantized_ops[arg_name] = block.ops[idx - 2 * i - 2]
+                        else:
+                            op_idx = block.ops.index(quantized_ops[arg_name])
+                            self.assertLess(op_idx, idx)
+
+                # check backward
+                if op.type in self.quantizable_op_grad_and_inputs:
+                    for pname in self.quantizable_op_grad_and_inputs[op.type]:
+                        arg_name = op.input(pname)[0]
+                        self.assertTrue(
+                            arg_name.endswith('.quantized.dequantized'))
+                        self.assertTrue(arg_name in quantized_ops)
+
+    def linear_fc_quant(self, quant_type):
+        main = fluid.Program()
+        startup = fluid.Program()
+        with fluid.program_guard(main, startup):
+            loss = linear_fc(3)
+            opt = fluid.optimizer.Adam(learning_rate=0.001)
+            opt.minimize(loss)
+            t = fluid.QuantizeTranspiler(activation_quantize_type=quant_type)
+            t.training_transpile(main)
+            self.check_program(main)
+
+    def test_linear_fc_quant_abs_max(self):
+        self.act_quant_op_type = 'fake_quantize_abs_max'
+        self.linear_fc_quant('abs_max')
+
+    def test_linear_fc_quant_range_abs_max(self):
+        self.act_quant_op_type = 'fake_quantize_range_abs_max'
+        self.linear_fc_quant('range_abs_max')
+
+    def residual_block_quant(self, quant_type):
+        main = fluid.Program()
+        startup = fluid.Program()
+        with fluid.program_guard(main, startup):
+            loss = residual_block(2)
+            opt = fluid.optimizer.Adam(learning_rate=0.001)
+            opt.minimize(loss)
+            t = fluid.QuantizeTranspiler(activation_quantize_type=quant_type)
+            t.training_transpile(main)
+            self.check_program(main)
+
+    def test_residual_block_abs_max(self):
+        self.act_quant_op_type = 'fake_quantize_abs_max'
+        self.residual_block_quant('abs_max')
+
+    def test_residual_block_range_abs_max(self):
+        self.act_quant_op_type = 'fake_quantize_range_abs_max'
+        self.residual_block_quant('range_abs_max')
+
+    def freeze_program(self, use_cuda):
+        main = fluid.Program()
+        startup = fluid.Program()
+        quant_transpiler = fluid.QuantizeTranspiler()
+        with fluid.program_guard(main, startup):
+            img = fluid.layers.data(
+                name='image', shape=[1, 28, 28], dtype='float32')
+            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+            loss = conv_net(img, label)
+            opt = fluid.optimizer.Adam(learning_rate=0.001)
+            opt.minimize(loss)
+            quant_transpiler.training_transpile(main)
+
+        test_program = main.clone()
+        with fluid.program_guard(test_program):
+            test_program = fluid.io.get_inference_program(loss)
+
+        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+        exe = fluid.Executor(place)
+        iter = 5
+        batch_size = 8
+        class_num = 10
+        exe.run(startup)
+
+        train_reader = paddle.batch(
+            paddle.reader.shuffle(
+                paddle.dataset.mnist.train(), buf_size=500),
+            batch_size=batch_size)
+        test_reader = paddle.batch(
+            paddle.dataset.mnist.test(), batch_size=batch_size)
+        feeder = fluid.DataFeeder(feed_list=[img, label], place=place)
+
+        for _ in range(iter):
+            data = train_reader().next()
+            loss_v = exe.run(program=main,
+                             feed=feeder.feed(data),
+                             fetch_list=[loss])
+        test_data = test_reader().next()
+
+        f_var = fluid.framework.get_var('conv2d_1.tmp_0', test_program)
+        w_var = fluid.framework.get_var('conv2d_1.w_0.quantized', test_program)
+        # Testing during training
+        test_loss1, f_v1, w_quant = exe.run(program=test_program,
+                                            feed=feeder.feed(test_data),
+                                            fetch_list=[loss, f_var, w_var])
+
+        # Freeze program for inference, but the weight of fc/conv is still float type.
+        quant_transpiler.freeze_program(test_program, place)
+        fv2 = fluid.framework.get_var('conv2d_1.tmp_0.dequantized',
+                                      test_program)
+        test_loss2, f_v2 = exe.run(program=test_program,
+                                   feed=feeder.feed(test_data),
+                                   fetch_list=[loss, fv2])
+        self.assertAlmostEqual(test_loss1, test_loss2, delta=1e-5)
+        self.assertAlmostEqual(f_v1.all(), f_v2.all(), delta=1e-5)
+        w_freeze = np.array(fluid.global_scope().find_var('conv2d_1.w_0')
+                            .get_tensor())
+        self.assertEqual(np.sum(w_freeze), np.sum(w_quant))
+
+        # Convert parameter to 8-bit.
+        quant_transpiler.convert_to_int8(test_program, place)
+        # Save the 8-bit parameter and model file.
+        fluid.io.save_inference_model('model_8bit', ['image', 'label'], [loss],
+                                      exe, test_program)
+        # Test whether the 8-bit parameter and model file can be loaded successfully.
+        [infer, feed, fetch] = fluid.io.load_inference_model('model_8bit', exe)
+        # Check the loaded 8-bit weight.
+        w_8bit = np.array(fluid.global_scope().find_var('conv2d_1.w_0.int8')
+                          .get_tensor())
+
+        self.assertEqual(w_8bit.dtype, np.int8)
+        self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
+
+    def test_freeze_program_cuda(self):
+        self.freeze_program(True)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/transpiler/quantize_transpiler.py
+++ b/python/paddle/fluid/transpiler/quantize_transpiler.py