test_quantization_pass.py 17.0 KB
Newer Older
W
WangZhen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
#     http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

Z
Zhen Wang 已提交
15
import os
W
WangZhen 已提交
16 17 18 19 20
import unittest
import random
import numpy as np
import paddle.fluid as fluid
import six
W
WangZhen 已提交
21
import paddle
22
from paddle.fluid.framework import IrGraph
23
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
W
WangZhen 已提交
24
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
25 26
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
W
WangZhen 已提交
27 28
from paddle.fluid import core

Z
Zhen Wang 已提交
29 30 31
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"

W
WangZhen 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74

def linear_fc(num):
    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = data
    for _ in six.moves.xrange(num):
        hidden = fluid.layers.fc(hidden, size=128, act='relu')
    loss = fluid.layers.cross_entropy(input=hidden, label=label)
    loss = fluid.layers.mean(loss)
    return loss


def residual_block(num):
    def conv_bn_layer(input,
                      ch_out,
                      filter_size,
                      stride,
                      padding,
                      act='relu',
                      bias_attr=False):
        tmp = fluid.layers.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr)
        return fluid.layers.batch_norm(input=tmp, act=act)

    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = data
    for _ in six.moves.xrange(num):
        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
    fc = fluid.layers.fc(input=hidden, size=10)
    loss = fluid.layers.cross_entropy(input=fc, label=label)
    loss = fluid.layers.mean(loss)
    return loss


W
WangZhen 已提交
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
def conv_net(img, label):
    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=img,
        filter_size=5,
        num_filters=20,
        pool_size=2,
        pool_stride=2,
        act="relu")
    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
        pool_size=2,
        pool_stride=2,
        act="relu")
    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
    loss = fluid.layers.cross_entropy(input=prediction, label=label)
    avg_loss = fluid.layers.mean(loss)
    return avg_loss


97
class TestQuantizationTransformPass(unittest.TestCase):
W
WangZhen 已提交
98 99 100 101 102 103
    def setUp(self):
        self.quantizable_op_and_inputs = {
            'conv2d': ['Input', 'Filter'],
            'depthwise_conv2d': ['Input', 'Filter'],
            'mul': ['X', 'Y']
        }
104
        self.quantizable_grad_op_inputs = {
W
WangZhen 已提交
105 106 107 108 109
            'conv2d_grad': ['Input', 'Filter'],
            'depthwise_conv2d_grad': ['Input', 'Filter'],
            'mul_grad': ['X', 'Y']
        }

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
    def check_program(self, transform_pass, program):
        quantized_ops = set()
        for block in program.blocks:
            for op in block.ops:
                # check forward
                if op.type in self.quantizable_op_and_inputs:
                    for arg_name in op.input_arg_names:
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        quantized_ops.add(arg_name)

            for op in block.ops:
                # check backward
                if op.type in self.quantizable_grad_op_inputs:
                    for pname in self.quantizable_grad_op_inputs[op.type]:
                        arg_name = op.input(pname)[0]
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        self.assertTrue(arg_name in quantized_ops)

Z
Zhen Wang 已提交
130
    def linear_fc_quant(self, quant_type, for_ci=False):
W
WangZhen 已提交
131 132 133 134 135 136
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = linear_fc(3)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
137 138
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
139
        graph = IrGraph(core.Graph(main.desc), for_test=False)
140 141
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
142
            place=place,
143 144
            activation_quantize_type=quant_type)
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
145
        if not for_ci:
Z
Zhen Wang 已提交
146 147 148 149 150
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            graph.draw('.', 'quantize_fc_' + quant_type, marked_nodes)
151 152
        program = graph.to_program()
        self.check_program(transform_pass, program)
153
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
154
        if not for_ci:
Z
Zhen Wang 已提交
155 156 157 158 159
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
            val_graph.draw('.', 'val_fc_' + quant_type, val_marked_nodes)
W
WangZhen 已提交
160

161
    def test_linear_fc_quant_abs_max(self):
Z
Zhen Wang 已提交
162
        self.linear_fc_quant('abs_max', for_ci=True)
W
WangZhen 已提交
163

164
    def test_linear_fc_quant_range_abs_max(self):
Z
Zhen Wang 已提交
165
        self.linear_fc_quant('range_abs_max', for_ci=True)
W
WangZhen 已提交
166

167 168 169
    def test_linear_fc_quant_moving_average_abs_max(self):
        self.linear_fc_quant('moving_average_abs_max', for_ci=True)

Z
Zhen Wang 已提交
170
    def residual_block_quant(self, quant_type, for_ci=False):
W
WangZhen 已提交
171 172 173 174 175 176
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = residual_block(2)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
177 178
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
179
        graph = IrGraph(core.Graph(main.desc), for_test=False)
180 181
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
182
            place=place,
183 184
            activation_quantize_type=quant_type)
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
185
        if not for_ci:
Z
Zhen Wang 已提交
186 187 188 189 190
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes)
191 192
        program = graph.to_program()
        self.check_program(transform_pass, program)
193
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
194
        if not for_ci:
Z
Zhen Wang 已提交
195 196 197 198 199
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
            val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes)
W
WangZhen 已提交
200

201
    def test_residual_block_abs_max(self):
Z
Zhen Wang 已提交
202
        self.residual_block_quant('abs_max', for_ci=True)
W
WangZhen 已提交
203

204
    def test_residual_block_range_abs_max(self):
Z
Zhen Wang 已提交
205
        self.residual_block_quant('range_abs_max', for_ci=True)
W
WangZhen 已提交
206

207 208 209
    def test_residual_block_moving_average_abs_max(self):
        self.residual_block_quant('moving_average_abs_max', for_ci=True)

W
WangZhen 已提交
210

W
WangZhen 已提交
211
class TestQuantizationFreezePass(unittest.TestCase):
Z
Zhen Wang 已提交
212
    def freeze_graph(self, use_cuda, seed, quant_type, for_ci=False):
W
WangZhen 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    img = fluid.layers.data(
                        name='image', shape=[1, 28, 28], dtype='float32')
                    label = fluid.layers.data(
                        name='label', shape=[1], dtype='int64')
                    loss = conv_net(img, label)
                    if not is_test:
                        opt = fluid.optimizer.Adam(learning_rate=0.001)
                        opt.minimize(loss)
            return [img, label], loss

        random.seed(0)
        np.random.seed(0)

        main = fluid.Program()
        startup = fluid.Program()
        test_program = fluid.Program()
        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
W
WangZhen 已提交
238
        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
W
WangZhen 已提交
239 240 241

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
W
WangZhen 已提交
242 243 244
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
W
WangZhen 已提交
245
        transform_pass = QuantizationTransformPass(
246 247 248 249 250 251
            scope=scope,
            place=place,
            activation_quantize_type=quant_type,
            weight_quantize_type='channel_wise_abs_max')
        #transform_pass = QuantizationTransformPass(
        #    scope=scope, place=place, activation_quantize_type=quant_type)
W
WangZhen 已提交
252 253
        transform_pass.apply(main_graph)
        transform_pass.apply(test_graph)
254
        dev_name = '_gpu_' if use_cuda else '_cpu_'
Z
Zhen Wang 已提交
255
        if not for_ci:
Z
Zhen Wang 已提交
256 257 258 259 260 261 262 263 264 265
            marked_nodes = set()
            for op in main_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            main_graph.draw('.', 'main' + dev_name + quant_type, marked_nodes)
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes)
W
WangZhen 已提交
266

Z
Zhen Wang 已提交
267 268 269 270 271
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)
272
        quantized_test_program = test_graph.to_program()
273
        iters = 5
274
        batch_size = 8
W
WangZhen 已提交
275 276 277 278 279 280 281 282

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=500),
            batch_size=batch_size)
        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size)
        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
W
WangZhen 已提交
283
        with fluid.scope_guard(scope):
W
WangZhen 已提交
284 285
            for _ in range(iters):
                data = next(train_reader())
Z
Zhen Wang 已提交
286
                loss_v = exe.run(binary,
287 288
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])
Z
Zhen Wang 已提交
289
                if not for_ci:
Z
Zhen Wang 已提交
290 291
                    print('{}: {}'.format('loss' + dev_name + quant_type,
                                          loss_v))
W
WangZhen 已提交
292

293 294 295 296 297 298 299 300 301 302 303
        test_data = next(test_reader())
        with fluid.program_guard(quantized_test_program):
            w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
                                             quantized_test_program)
        # Testing
        with fluid.scope_guard(scope):
            test_loss1, w_quant = exe.run(program=quantized_test_program,
                                          feed=feeder.feed(test_data),
                                          fetch_list=[loss, w_var])

        # Freeze graph for inference, but the weight of fc/conv is still float type.
304 305 306 307 308
        freeze_pass = QuantizationFreezePass(
            scope=scope,
            place=place,
            weight_quantize_type='channel_wise_abs_max')
        #freeze_pass = QuantizationFreezePass(scope=scope, place=place)
309
        freeze_pass.apply(test_graph)
Z
Zhen Wang 已提交
310
        if not for_ci:
Z
Zhen Wang 已提交
311 312 313 314 315 316
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test_freeze' + dev_name + quant_type,
                            marked_nodes)
W
WangZhen 已提交
317

318 319 320 321 322 323
        server_program = test_graph.to_program()
        with fluid.scope_guard(scope):
            test_loss2, = exe.run(program=server_program,
                                  feed=feeder.feed(test_data),
                                  fetch_list=[loss])
        self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
Z
Zhen Wang 已提交
324
        if not for_ci:
Z
Zhen Wang 已提交
325 326 327 328
            print('{}: {}'.format('test_loss1' + dev_name + quant_type,
                                  test_loss1))
            print('{}: {}'.format('test_loss2' + dev_name + quant_type,
                                  test_loss2))
329 330
        w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
        # Maybe failed, this is due to the calculation precision
331
        # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
Z
Zhen Wang 已提交
332
        if not for_ci:
Z
Zhen Wang 已提交
333 334 335 336
            print('{}: {}'.format('w_freeze' + dev_name + quant_type,
                                  np.sum(w_freeze)))
            print('{}: {}'.format('w_quant' + dev_name + quant_type,
                                  np.sum(w_quant)))
337 338 339 340

        # Convert parameter to 8-bit.
        convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
        convert_int8_pass.apply(test_graph)
Z
Zhen Wang 已提交
341
        if not for_ci:
Z
Zhen Wang 已提交
342 343 344 345 346 347
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test_int8' + dev_name + quant_type,
                            marked_nodes)
348 349 350 351 352 353 354 355 356 357 358 359 360
        server_program_int8 = test_graph.to_program()
        # Save the 8-bit parameter and model file.
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model('server_int8' + dev_name + quant_type,
                                          ['image', 'label'], [loss], exe,
                                          server_program_int8)
            # Test whether the 8-bit parameter and model file can be loaded successfully.
            [infer, feed, fetch] = fluid.io.load_inference_model(
                'server_int8' + dev_name + quant_type, exe)
        # Check the loaded 8-bit weight.
        w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
        self.assertEqual(w_8bit.dtype, np.int8)
        self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
Z
Zhen Wang 已提交
361
        if not for_ci:
Z
Zhen Wang 已提交
362 363 364 365
            print('{}: {}'.format('w_8bit' + dev_name + quant_type,
                                  np.sum(w_8bit)))
            print('{}: {}'.format('w_freeze' + dev_name + quant_type,
                                  np.sum(w_freeze)))
366 367 368

        mobile_pass = TransformForMobilePass()
        mobile_pass.apply(test_graph)
Z
Zhen Wang 已提交
369
        if not for_ci:
Z
Zhen Wang 已提交
370 371 372 373 374 375
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test_mobile' + dev_name + quant_type,
                            marked_nodes)
376 377 378 379 380 381

        mobile_program = test_graph.to_program()
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model('mobile_int8' + dev_name + quant_type,
                                          ['image', 'label'], [loss], exe,
                                          mobile_program)
W
WangZhen 已提交
382

383
    def test_freeze_graph_cuda_dynamic(self):
W
WangZhen 已提交
384 385
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
Z
Zhen Wang 已提交
386
                self.freeze_graph(
387
                    True, seed=1, quant_type='abs_max', for_ci=False)
W
WangZhen 已提交
388

389
    def test_freeze_graph_cpu_dynamic(self):
W
WangZhen 已提交
390
        with fluid.unique_name.guard():
391
            self.freeze_graph(False, seed=2, quant_type='abs_max', for_ci=False)
W
WangZhen 已提交
392

393
    def test_freeze_graph_cuda_static(self):
W
WangZhen 已提交
394 395
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
Z
Zhen Wang 已提交
396
                self.freeze_graph(
397
                    True, seed=1, quant_type='range_abs_max', for_ci=False)
398 399 400 401
                self.freeze_graph(
                    True,
                    seed=1,
                    quant_type='moving_average_abs_max',
402
                    for_ci=False)
W
WangZhen 已提交
403

404
    def test_freeze_graph_cpu_static(self):
W
WangZhen 已提交
405
        with fluid.unique_name.guard():
Z
Zhen Wang 已提交
406
            self.freeze_graph(
407
                False, seed=2, quant_type='range_abs_max', for_ci=False)
408
            self.freeze_graph(
409 410 411 412
                False,
                seed=2,
                quant_type='moving_average_abs_max',
                for_ci=False)
W
WangZhen 已提交
413 414


W
WangZhen 已提交
415 416
if __name__ == '__main__':
    unittest.main()