test_quantization_pass.py 35.7 KB
Newer Older
W
WangZhen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
#     http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

Z
Zhen Wang 已提交
15
import os
W
WangZhen 已提交
16 17 18 19
import unittest
import random
import numpy as np
import paddle.fluid as fluid
W
WangZhen 已提交
20
import paddle
21
from paddle.fluid.framework import IrGraph
22
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
23
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPassV2
W
WangZhen 已提交
24
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
25 26
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
27
from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass
W
WangZhen 已提交
28 29
from paddle.fluid import core

P
pangyoki 已提交
30 31
paddle.enable_static()

Z
Zhen Wang 已提交
32 33 34
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"

W
WangZhen 已提交
35 36 37 38 39

def linear_fc(num):
    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = data
40
    for _ in range(num):
W
WangZhen 已提交
41 42
        hidden = fluid.layers.fc(hidden, size=128, act='relu')
    loss = fluid.layers.cross_entropy(input=hidden, label=label)
43
    loss = paddle.mean(loss)
W
WangZhen 已提交
44 45 46
    return loss


47
def residual_block(num, quant_skip_pattern=None):
48 49 50 51 52 53 54 55 56 57 58 59
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
        tmp = fluid.layers.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr,
        )
W
WangZhen 已提交
60 61
        return fluid.layers.batch_norm(input=tmp, act=act)

62 63 64 65 66 67 68 69 70
    data = fluid.layers.data(
        name='image',
        shape=[1, 1, 32, 32],
        dtype='float32',
        append_batch_size=False,
    )
    label = fluid.layers.data(
        name='label', shape=[1, 1], dtype='int64', append_batch_size=False
    )
W
WangZhen 已提交
71
    hidden = data
72
    for _ in range(num):
W
WangZhen 已提交
73 74
        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
75
        hidden = paddle.nn.functional.relu(paddle.add(x=conv, y=short))
76
    matmul_weight = paddle.create_parameter(
77 78
        shape=[1, 16, 32, 32], dtype='float32'
    )
79
    hidden = fluid.layers.matmul(hidden, matmul_weight, True, True)
80 81
    if quant_skip_pattern:
        with fluid.name_scope(quant_skip_pattern):
82 83 84
            pool = fluid.layers.pool2d(
                input=hidden, pool_size=2, pool_type='avg', pool_stride=2
            )
85
    else:
86 87 88
        pool = fluid.layers.pool2d(
            input=hidden, pool_size=2, pool_type='avg', pool_stride=2
        )
89
    fc = fluid.layers.fc(input=pool, size=10)
W
WangZhen 已提交
90
    loss = fluid.layers.cross_entropy(input=fc, label=label)
91
    loss = paddle.mean(loss)
W
WangZhen 已提交
92 93 94
    return loss


95
def conv_net(img, label, quant_skip_pattern):
96 97 98 99 100 101 102 103 104
    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=img,
        filter_size=5,
        num_filters=20,
        pool_size=2,
        pool_stride=2,
        pool_type='max',
        act="relu",
    )
W
WangZhen 已提交
105
    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
106 107 108 109 110 111 112 113 114
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
        pool_size=2,
        pool_stride=2,
        pool_type='avg',
        act="relu",
    )
115 116 117
    hidden = fluid.layers.fc(input=conv_pool_2, size=100, act='relu')
    with fluid.name_scope(quant_skip_pattern):
        prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
W
WangZhen 已提交
118
    loss = fluid.layers.cross_entropy(input=prediction, label=label)
119
    avg_loss = paddle.mean(loss)
W
WangZhen 已提交
120 121 122
    return avg_loss


123
class TestQuantizationTransformPass(unittest.TestCase):
W
WangZhen 已提交
124 125 126 127
    def setUp(self):
        self.quantizable_op_and_inputs = {
            'conv2d': ['Input', 'Filter'],
            'depthwise_conv2d': ['Input', 'Filter'],
128
            'mul': ['X', 'Y'],
W
WangZhen 已提交
129
        }
130
        self.quantizable_grad_op_inputs = {
W
WangZhen 已提交
131 132
            'conv2d_grad': ['Input', 'Filter'],
            'depthwise_conv2d_grad': ['Input', 'Filter'],
133
            'mul_grad': ['X', 'Y'],
W
WangZhen 已提交
134 135
        }

136
    def check_program(self, program):
137 138 139 140 141 142 143
        quantized_ops = set()
        for block in program.blocks:
            for op in block.ops:
                # check forward
                if op.type in self.quantizable_op_and_inputs:
                    for arg_name in op.input_arg_names:
                        self.assertTrue(
144 145
                            arg_name.endswith('.quantized.dequantized')
                        )
146 147 148 149 150 151 152 153
                        quantized_ops.add(arg_name)

            for op in block.ops:
                # check backward
                if op.type in self.quantizable_grad_op_inputs:
                    for pname in self.quantizable_grad_op_inputs[op.type]:
                        arg_name = op.input(pname)[0]
                        self.assertTrue(
154 155
                            arg_name.endswith('.quantized.dequantized')
                        )
156 157
                        self.assertTrue(arg_name in quantized_ops)

158 159 160
    def linear_fc_quant(
        self, activation_quant_type, weight_quantize_type, for_ci=True
    ):
W
WangZhen 已提交
161 162 163 164 165 166
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = linear_fc(3)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
167
        place = fluid.CPUPlace()
168
        graph = IrGraph(core.Graph(main.desc), for_test=False)
169 170
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
171
            place=place,
172
            activation_quantize_type=activation_quant_type,
173 174
            weight_quantize_type=weight_quantize_type,
        )
175
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
176
        if not for_ci:
Z
Zhen Wang 已提交
177 178 179 180
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
181 182 183
            graph.draw(
                '.', 'quantize_fc_' + activation_quant_type, marked_nodes
            )
184
        program = graph.to_program()
185
        self.check_program(program)
186
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
187
        if not for_ci:
Z
Zhen Wang 已提交
188 189 190 191
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
192 193 194
            val_graph.draw(
                '.', 'val_fc_' + activation_quant_type, val_marked_nodes
            )
W
WangZhen 已提交
195

196
    def test_linear_fc_quant_abs_max(self):
197
        self.linear_fc_quant('abs_max', 'abs_max', for_ci=True)
W
WangZhen 已提交
198

199
    def test_linear_fc_quant_range_abs_max(self):
200
        self.linear_fc_quant('range_abs_max', 'abs_max', for_ci=True)
W
WangZhen 已提交
201

202
    def test_linear_fc_quant_moving_average_abs_max(self):
203 204 205 206 207 208 209 210 211 212 213
        self.linear_fc_quant(
            'moving_average_abs_max', 'channel_wise_abs_max', for_ci=True
        )

    def residual_block_quant(
        self,
        activation_quant_type,
        weight_quantize_type,
        quantizable_op_type,
        for_ci=True,
    ):
W
WangZhen 已提交
214 215 216 217 218 219
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = residual_block(2)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
220
        place = fluid.CPUPlace()
221
        graph = IrGraph(core.Graph(main.desc), for_test=False)
222 223
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
224
            place=place,
225
            activation_quantize_type=activation_quant_type,
226
            weight_quantize_type=weight_quantize_type,
227 228
            quantizable_op_type=quantizable_op_type,
        )
229
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
230
        if not for_ci:
Z
Zhen Wang 已提交
231 232 233 234
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
235 236 237
            graph.draw(
                '.', 'quantize_residual_' + activation_quant_type, marked_nodes
            )
238
        program = graph.to_program()
239
        self.check_program(program)
240
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
241
        if not for_ci:
Z
Zhen Wang 已提交
242 243 244 245
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
246 247 248
            val_graph.draw(
                '.', 'val_residual_' + activation_quant_type, val_marked_nodes
            )
W
WangZhen 已提交
249

250
    def test_residual_block_abs_max(self):
251
        quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul']
252 253 254
        self.residual_block_quant(
            'abs_max', 'abs_max', quantizable_op_type, for_ci=True
        )
W
WangZhen 已提交
255

256
    def test_residual_block_range_abs_max(self):
257
        quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul']
258 259 260
        self.residual_block_quant(
            'range_abs_max', 'abs_max', quantizable_op_type, for_ci=True
        )
W
WangZhen 已提交
261

262
    def test_residual_block_moving_average_abs_max(self):
263
        quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul']
264 265 266 267 268 269
        self.residual_block_quant(
            'moving_average_abs_max',
            'channel_wise_abs_max',
            quantizable_op_type,
            for_ci=True,
        )
270

W
WangZhen 已提交
271

W
WangZhen 已提交
272
class TestQuantizationFreezePass(unittest.TestCase):
273 274 275 276 277 278 279 280 281 282
    def freeze_graph(
        self,
        use_cuda,
        seed,
        activation_quant_type,
        bias_correction=False,
        weight_quant_type='abs_max',
        for_ci=True,
        quant_skip_pattern='skip_quant',
    ):
W
WangZhen 已提交
283 284 285 286 287
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
288 289 290 291 292 293
                    img = fluid.layers.data(
                        name='image', shape=[1, 28, 28], dtype='float32'
                    )
                    label = fluid.layers.data(
                        name='label', shape=[1], dtype='int64'
                    )
294
                    loss = conv_net(img, label, quant_skip_pattern)
W
WangZhen 已提交
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
                    if not is_test:
                        opt = fluid.optimizer.Adam(learning_rate=0.001)
                        opt.minimize(loss)
            return [img, label], loss

        random.seed(0)
        np.random.seed(0)

        main = fluid.Program()
        startup = fluid.Program()
        test_program = fluid.Program()
        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
W
WangZhen 已提交
310
        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
W
WangZhen 已提交
311 312 313

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
W
WangZhen 已提交
314 315 316
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
W
WangZhen 已提交
317
        transform_pass = QuantizationTransformPass(
318 319
            scope=scope,
            place=place,
320
            activation_quantize_type=activation_quant_type,
321
            weight_quantize_type=weight_quant_type,
322 323
            skip_pattern=quant_skip_pattern,
        )
W
WangZhen 已提交
324
        transform_pass.apply(main_graph)
325 326 327 328 329
        transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quant_type,
330 331
            skip_pattern=quant_skip_pattern,
        )
W
WangZhen 已提交
332
        transform_pass.apply(test_graph)
333
        dev_name = '_gpu_' if use_cuda else '_cpu_'
Z
Zhen Wang 已提交
334
        if not for_ci:
Z
Zhen Wang 已提交
335 336 337 338
            marked_nodes = set()
            for op in main_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
339
            main_graph.draw(
340 341 342 343 344 345 346 347
                '.',
                'main'
                + dev_name
                + activation_quant_type
                + '_'
                + weight_quant_type,
                marked_nodes,
            )
Z
Zhen Wang 已提交
348 349 350 351
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
352
            test_graph.draw(
353 354 355 356 357 358 359 360
                '.',
                'test'
                + dev_name
                + activation_quant_type
                + '_'
                + weight_quant_type,
                marked_nodes,
            )
W
WangZhen 已提交
361

Z
Zhen Wang 已提交
362 363 364
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
365
        build_strategy.fuse_all_reduce_ops = False
Z
Zhen Wang 已提交
366
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
367 368
            loss_name=loss.name, build_strategy=build_strategy
        )
369
        quantized_test_program = test_graph.to_program()
370
        iters = 5
371
        batch_size = 8
W
WangZhen 已提交
372

373 374 375 376 377 378 379
        train_reader = paddle.batch(
            paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500),
            batch_size=batch_size,
        )
        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size
        )
W
WangZhen 已提交
380
        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
W
WangZhen 已提交
381
        with fluid.scope_guard(scope):
W
WangZhen 已提交
382 383
            for _ in range(iters):
                data = next(train_reader())
384 385 386
                loss_v = exe.run(
                    binary, feed=feeder.feed(data), fetch_list=[loss]
                )
Z
Zhen Wang 已提交
387
                if not for_ci:
388 389 390 391 392 393 394 395 396 397
                    print(
                        '{}: {}'.format(
                            'loss'
                            + dev_name
                            + activation_quant_type
                            + '_'
                            + weight_quant_type,
                            loss_v,
                        )
                    )
W
WangZhen 已提交
398

399 400
        test_data = next(test_reader())
        with fluid.program_guard(quantized_test_program):
401 402 403
            w_var = fluid.framework._get_var(
                'conv2d_1.w_0.quantized', quantized_test_program
            )
404 405
        # Testing
        with fluid.scope_guard(scope):
406 407 408 409 410
            test_loss1, w_quant = exe.run(
                program=quantized_test_program,
                feed=feeder.feed(test_data),
                fetch_list=[loss, w_var],
            )
411 412

        # Freeze graph for inference, but the weight of fc/conv is still float type.
413
        freeze_pass = QuantizationFreezePass(
414 415 416 417 418
            scope=scope,
            place=place,
            bias_correction=bias_correction,
            weight_quantize_type=weight_quant_type,
        )
419
        freeze_pass.apply(test_graph)
Z
Zhen Wang 已提交
420
        if not for_ci:
Z
Zhen Wang 已提交
421 422 423 424
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
425
            test_graph.draw(
426 427 428 429 430 431 432 433
                '.',
                'test_freeze'
                + dev_name
                + activation_quant_type
                + '_'
                + weight_quant_type,
                marked_nodes,
            )
W
WangZhen 已提交
434

435 436
        server_program = test_graph.to_program()
        with fluid.scope_guard(scope):
437 438 439 440 441
            (test_loss2,) = exe.run(
                program=server_program,
                feed=feeder.feed(test_data),
                fetch_list=[loss],
            )
442
        self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
Z
Zhen Wang 已提交
443
        if not for_ci:
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
            print(
                '{}: {}'.format(
                    'test_loss1'
                    + dev_name
                    + activation_quant_type
                    + '_'
                    + weight_quant_type,
                    test_loss1,
                )
            )
            print(
                '{}: {}'.format(
                    'test_loss2'
                    + dev_name
                    + activation_quant_type
                    + '_'
                    + weight_quant_type,
                    test_loss2,
                )
            )
464 465
        w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
        # Maybe failed, this is due to the calculation precision
466
        # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
Z
Zhen Wang 已提交
467
        if not for_ci:
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
            print(
                '{}: {}'.format(
                    'w_freeze'
                    + dev_name
                    + activation_quant_type
                    + '_'
                    + weight_quant_type,
                    np.sum(w_freeze),
                )
            )
            print(
                '{}: {}'.format(
                    'w_quant'
                    + dev_name
                    + activation_quant_type
                    + '_'
                    + weight_quant_type,
                    np.sum(w_quant),
                )
            )
488 489 490 491

        # Convert parameter to 8-bit.
        convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
        convert_int8_pass.apply(test_graph)
Z
Zhen Wang 已提交
492
        if not for_ci:
Z
Zhen Wang 已提交
493 494 495 496
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
497
            test_graph.draw(
498 499 500 501 502 503 504 505
                '.',
                'test_int8'
                + dev_name
                + activation_quant_type
                + '_'
                + weight_quant_type,
                marked_nodes,
            )
506 507 508
        server_program_int8 = test_graph.to_program()
        # Save the 8-bit parameter and model file.
        with fluid.scope_guard(scope):
509
            fluid.io.save_inference_model(
510 511 512 513 514 515 516 517 518 519
                'server_int8'
                + dev_name
                + activation_quant_type
                + '_'
                + weight_quant_type,
                ['image', 'label'],
                [loss],
                exe,
                server_program_int8,
            )
520 521
            # Test whether the 8-bit parameter and model file can be loaded successfully.
            [infer, feed, fetch] = fluid.io.load_inference_model(
522 523 524 525 526 527 528
                'server_int8'
                + dev_name
                + activation_quant_type
                + '_'
                + weight_quant_type,
                exe,
            )
529 530 531 532
        # Check the loaded 8-bit weight.
        w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
        self.assertEqual(w_8bit.dtype, np.int8)
        self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
Z
Zhen Wang 已提交
533
        if not for_ci:
534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
            print(
                '{}: {}'.format(
                    'w_8bit'
                    + dev_name
                    + activation_quant_type
                    + '_'
                    + weight_quant_type,
                    np.sum(w_8bit),
                )
            )
            print(
                '{}: {}'.format(
                    'w_freeze'
                    + dev_name
                    + activation_quant_type
                    + '_'
                    + weight_quant_type,
                    np.sum(w_freeze),
                )
            )
554 555 556

        mobile_pass = TransformForMobilePass()
        mobile_pass.apply(test_graph)
Z
Zhen Wang 已提交
557
        if not for_ci:
Z
Zhen Wang 已提交
558 559 560 561
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
562
            test_graph.draw(
563 564 565 566 567 568 569 570
                '.',
                'test_mobile'
                + dev_name
                + activation_quant_type
                + '_'
                + weight_quant_type,
                marked_nodes,
            )
571 572 573

        mobile_program = test_graph.to_program()
        with fluid.scope_guard(scope):
574
            fluid.io.save_inference_model(
575 576 577 578 579 580 581 582 583 584
                'mobile_int8'
                + dev_name
                + activation_quant_type
                + '_'
                + weight_quant_type,
                ['image', 'label'],
                [loss],
                exe,
                mobile_program,
            )
W
WangZhen 已提交
585

586
    def test_freeze_graph_cuda_dynamic(self):
W
WangZhen 已提交
587 588
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
589 590 591 592 593 594 595
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='abs_max',
                    weight_quant_type='abs_max',
                    for_ci=True,
                )
596
            with fluid.unique_name.guard():
597 598 599 600 601 602 603
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True,
                )
W
WangZhen 已提交
604

605
    def test_freeze_graph_cpu_dynamic(self):
W
WangZhen 已提交
606
        with fluid.unique_name.guard():
607 608 609 610 611 612 613 614 615 616 617 618 619 620
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='abs_max',
                weight_quant_type='abs_max',
                for_ci=True,
            )
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True,
            )
W
WangZhen 已提交
621

622
    def test_freeze_graph_cuda_static(self):
W
WangZhen 已提交
623 624
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='range_abs_max',
                    bias_correction=True,
                    weight_quant_type='abs_max',
                    for_ci=True,
                )
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='range_abs_max',
                    weight_quant_type='abs_max',
                    for_ci=True,
                )
640 641 642 643 644
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='abs_max',
645 646 647 648 649 650 651 652 653
                    for_ci=True,
                )
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='range_abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True,
                )
654 655 656 657 658
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='channel_wise_abs_max',
659 660
                    for_ci=True,
                )
X
XGZhang 已提交
661 662 663 664 665 666
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    bias_correction=True,
                    weight_quant_type='channel_wise_abs_max',
667 668
                    for_ci=True,
                )
W
WangZhen 已提交
669

670
    def test_freeze_graph_cpu_static(self):
W
WangZhen 已提交
671
        with fluid.unique_name.guard():
672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='range_abs_max',
                weight_quant_type='abs_max',
                for_ci=True,
            )
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='abs_max',
                for_ci=True,
            )
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='range_abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True,
            )
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True,
            )
W
WangZhen 已提交
700 701


702
def quant_dequant_residual_block(num, quant_skip_pattern=None):
703 704 705 706 707 708 709 710 711 712 713 714
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
        tmp = fluid.layers.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr,
        )
715 716
        return fluid.layers.batch_norm(input=tmp, act=act)

717
    data1 = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
718 719 720
    data2 = fluid.layers.data(
        name='matmul_input', shape=[16, 32, 32], dtype='float32'
    )
721
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
722
    hidden = data1
723
    for _ in range(num):
724 725
        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
726
        hidden = paddle.nn.functional.relu(paddle.add(x=conv, y=short))
727
    hidden = fluid.layers.matmul(hidden, data2, True, True)
728
    if isinstance(quant_skip_pattern, str):
729
        with fluid.name_scope(quant_skip_pattern):
730 731 732 733 734 735
            pool1 = fluid.layers.pool2d(
                input=hidden, pool_size=2, pool_type='avg', pool_stride=2
            )
            pool2 = fluid.layers.pool2d(
                input=hidden, pool_size=2, pool_type='max', pool_stride=2
            )
736
            pool_add = paddle.nn.functional.relu(paddle.add(x=pool1, y=pool2))
737
    elif isinstance(quant_skip_pattern, list):
738 739 740
        assert (
            len(quant_skip_pattern) > 1
        ), 'test config error: the len of quant_skip_pattern list should be greater than 1.'
741
        with fluid.name_scope(quant_skip_pattern[0]):
742 743 744 745 746 747
            pool1 = fluid.layers.pool2d(
                input=hidden, pool_size=2, pool_type='avg', pool_stride=2
            )
            pool2 = fluid.layers.pool2d(
                input=hidden, pool_size=2, pool_type='max', pool_stride=2
            )
748
        with fluid.name_scope(quant_skip_pattern[1]):
749
            pool_add = paddle.nn.functional.relu(paddle.add(x=pool1, y=pool2))
750
    else:
751 752 753 754 755 756
        pool1 = fluid.layers.pool2d(
            input=hidden, pool_size=2, pool_type='avg', pool_stride=2
        )
        pool2 = fluid.layers.pool2d(
            input=hidden, pool_size=2, pool_type='max', pool_stride=2
        )
757
        pool_add = paddle.nn.functional.relu(paddle.add(x=pool1, y=pool2))
758 759
    fc = fluid.layers.fc(input=pool_add, size=10)
    loss = fluid.layers.cross_entropy(input=fc, label=label)
760
    loss = paddle.mean(loss)
761 762 763
    return loss


764 765 766 767 768
class TestAddQuantDequantPass(unittest.TestCase):
    def setUp(self):
        self._target_ops = {'elementwise_add', 'pool2d'}
        self._target_grad_ops = {'elementwise_add_grad', 'pool2d_grad'}

769
    def check_graph(self, graph, skip_pattern=None):
770 771 772
        ops = graph.all_op_nodes()
        for op_node in ops:
            if op_node.name() in self._target_ops:
773 774
                user_skipped = False
                if isinstance(skip_pattern, list):
775 776 777 778 779 780
                    user_skipped = op_node.op().has_attr(
                        "op_namescope"
                    ) and any(
                        pattern in op_node.op().attr("op_namescope")
                        for pattern in skip_pattern
                    )
781
                elif isinstance(skip_pattern, str):
782 783 784 785 786
                    user_skipped = (
                        op_node.op().has_attr("op_namescope")
                        and op_node.op().attr("op_namescope").find(skip_pattern)
                        != -1
                    )
787 788

                if user_skipped:
789 790
                    continue

791 792
                in_nodes_all_not_persistable = True
                for input_name in op_node.input_arg_names():
793 794 795 796 797 798 799
                    in_node = graph._find_node_by_name(
                        op_node.inputs, input_name
                    )
                    in_nodes_all_not_persistable = (
                        in_nodes_all_not_persistable
                        and not in_node.persistable()
                    )
800 801 802 803 804 805
                if not in_nodes_all_not_persistable:
                    continue
                input_names = op_node.input_arg_names()
                for input_name in input_names:
                    self.assertTrue(input_name.endswith('.quant_dequant'))

806 807 808
    def residual_block_quant(
        self, quantizable_op_type, skip_pattern=None, for_ci=True
    ):
809 810 811
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
812
            loss = quant_dequant_residual_block(2, skip_pattern)
813 814 815 816 817
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
        place = fluid.CPUPlace()
        graph = IrGraph(core.Graph(main.desc), for_test=False)
        add_quant_dequant_pass = AddQuantDequantPass(
818 819 820
            scope=fluid.global_scope(),
            place=place,
            skip_pattern=skip_pattern,
821 822
            quantizable_op_type=quantizable_op_type,
        )
823 824 825 826 827 828 829
        add_quant_dequant_pass.apply(graph)
        if not for_ci:
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quant') > -1:
                    marked_nodes.add(op)
            graph.draw('.', 'add_quant_dequant_graph', marked_nodes)
830
        self.check_graph(graph, skip_pattern)
831 832 833 834 835 836 837 838 839 840
        program = graph.to_program()
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
        if not for_ci:
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quant') > -1:
                    val_marked_nodes.add(op)
            val_graph.draw('.', 'val_add_quant_dequant_graph', val_marked_nodes)

    def test_residual_block(self):
841
        quantizable_op_type = ['elementwise_add', 'pool2d', 'mul', 'matmul']
842 843 844
        self.residual_block_quant(
            quantizable_op_type, skip_pattern=None, for_ci=True
        )
845 846

    def test_residual_block_skip_pattern(self):
847
        quantizable_op_type = ['elementwise_add', 'pool2d', 'mul', 'matmul']
848 849 850
        self.residual_block_quant(
            quantizable_op_type, skip_pattern='skip_quant', for_ci=True
        )
851

Z
zhangchunle 已提交
852
    def test_residual_block_skip_pattern_1(self):
853
        quantizable_op_type = ['elementwise_add', 'pool2d', 'mul', 'matmul']
854 855 856 857 858
        self.residual_block_quant(
            quantizable_op_type,
            skip_pattern=['skip_quant1', 'skip_quant2'],
            for_ci=True,
        )
859

860

861 862 863 864 865
class TestQuantizationTransformPassV2(unittest.TestCase):
    def setUp(self):
        self.quantizable_op_and_inputs = {
            'conv2d': ['Input', 'Filter'],
            'depthwise_conv2d': ['Input', 'Filter'],
866
            'mul': ['X', 'Y'],
867 868 869 870
        }
        self.quantizable_grad_op_inputs = {
            'conv2d_grad': ['Input', 'Filter'],
            'depthwise_conv2d_grad': ['Input', 'Filter'],
871
            'mul_grad': ['X', 'Y'],
872 873 874 875 876 877 878 879 880 881
        }

    def check_program(self, program):
        quantized_ops = set()
        for block in program.blocks:
            for op in block.ops:
                # check forward
                if op.type in self.quantizable_op_and_inputs:
                    for arg_name in op.input_arg_names:
                        self.assertTrue(
882 883
                            arg_name.endswith('.quantized.dequantized')
                        )
884 885 886 887 888 889 890 891
                        quantized_ops.add(arg_name)

            for op in block.ops:
                # check backward
                if op.type in self.quantizable_grad_op_inputs:
                    for pname in self.quantizable_grad_op_inputs[op.type]:
                        arg_name = op.input(pname)[0]
                        self.assertTrue(
892 893
                            arg_name.endswith('.quantized.dequantized')
                        )
894 895
                        self.assertTrue(arg_name in quantized_ops)

896 897 898
    def linear_fc_quant(
        self, activation_quant_type, weight_quantize_type, for_ci=True
    ):
899 900 901 902 903 904 905 906 907 908 909 910
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = linear_fc(3)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
        place = fluid.CPUPlace()
        graph = IrGraph(core.Graph(main.desc), for_test=False)
        transform_pass = QuantizationTransformPassV2(
            scope=fluid.global_scope(),
            place=place,
            activation_quantize_type=activation_quant_type,
911 912
            weight_quantize_type=weight_quantize_type,
        )
913 914 915 916 917 918
        transform_pass.apply(graph)
        if not for_ci:
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
919 920 921
            graph.draw(
                '.', 'quantize_fc_' + activation_quant_type, marked_nodes
            )
922 923 924 925 926 927 928 929
        program = graph.to_program()
        self.check_program(program)
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
        if not for_ci:
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
930 931 932
            val_graph.draw(
                '.', 'val_fc_' + activation_quant_type, val_marked_nodes
            )
933 934 935 936 937 938 939

    def test_linear_fc_quant_abs_max(self):
        self.linear_fc_quant('abs_max', 'abs_max', for_ci=True)

    def test_linear_fc_quant_channel_wise_abs_max(self):
        self.linear_fc_quant('abs_max', 'channel_wise_abs_max', for_ci=True)

940 941 942 943 944 945 946
    def residual_block_quant(
        self,
        activation_quant_type,
        weight_quantize_type,
        quantizable_op_type,
        for_ci=True,
    ):
947 948 949 950 951 952 953 954 955 956 957 958 959
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = residual_block(2)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
        place = fluid.CPUPlace()
        graph = IrGraph(core.Graph(main.desc), for_test=False)
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quantize_type,
960 961
            quantizable_op_type=quantizable_op_type,
        )
962 963 964 965 966 967
        transform_pass.apply(graph)
        if not for_ci:
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
968 969 970
            graph.draw(
                '.', 'quantize_residual_' + activation_quant_type, marked_nodes
            )
971 972 973 974 975 976 977 978
        program = graph.to_program()
        self.check_program(program)
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
        if not for_ci:
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
979 980 981
            val_graph.draw(
                '.', 'val_residual_' + activation_quant_type, val_marked_nodes
            )
982 983 984

    def test_residual_block_abs_max(self):
        quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul']
985 986 987
        self.residual_block_quant(
            'abs_max', 'abs_max', quantizable_op_type, for_ci=True
        )
988 989 990

    def test_residual_block_channel_wise_abs_max(self):
        quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul']
991 992 993
        self.residual_block_quant(
            'abs_max', 'channel_wise_abs_max', quantizable_op_type, for_ci=True
        )
994 995


W
WangZhen 已提交
996 997
if __name__ == '__main__':
    unittest.main()