test_quantization_pass.py 36.9 KB
Newer Older
W
WangZhen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
#     http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

Z
Zhen Wang 已提交
15
import os
W
WangZhen 已提交
16 17 18 19
import unittest
import random
import numpy as np
import paddle.fluid as fluid
W
WangZhen 已提交
20
import paddle
21
from paddle.fluid.framework import IrGraph
22
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
23
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPassV2
W
WangZhen 已提交
24
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
25 26
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
27
from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass
W
WangZhen 已提交
28 29
from paddle.fluid import core

P
pangyoki 已提交
30 31
paddle.enable_static()

Z
Zhen Wang 已提交
32 33 34
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"

W
WangZhen 已提交
35 36 37 38 39

def linear_fc(num):
    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = data
40
    for _ in range(num):
W
WangZhen 已提交
41 42
        hidden = fluid.layers.fc(hidden, size=128, act='relu')
    loss = fluid.layers.cross_entropy(input=hidden, label=label)
43
    loss = paddle.mean(loss)
W
WangZhen 已提交
44 45 46
    return loss


47
def residual_block(num, quant_skip_pattern=None):
48

W
WangZhen 已提交
49 50 51 52 53 54 55
    def conv_bn_layer(input,
                      ch_out,
                      filter_size,
                      stride,
                      padding,
                      act='relu',
                      bias_attr=False):
56 57 58 59 60 61 62
        tmp = fluid.layers.conv2d(input=input,
                                  filter_size=filter_size,
                                  num_filters=ch_out,
                                  stride=stride,
                                  padding=padding,
                                  act=None,
                                  bias_attr=bias_attr)
W
WangZhen 已提交
63 64
        return fluid.layers.batch_norm(input=tmp, act=act)

65 66 67 68 69 70 71 72
    data = fluid.layers.data(name='image',
                             shape=[1, 1, 32, 32],
                             dtype='float32',
                             append_batch_size=False)
    label = fluid.layers.data(name='label',
                              shape=[1, 1],
                              dtype='int64',
                              append_batch_size=False)
W
WangZhen 已提交
73
    hidden = data
74
    for _ in range(num):
W
WangZhen 已提交
75 76 77
        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
78 79
    matmul_weight = fluid.layers.create_parameter(shape=[1, 16, 32, 32],
                                                  dtype='float32')
80
    hidden = fluid.layers.matmul(hidden, matmul_weight, True, True)
81 82
    if quant_skip_pattern:
        with fluid.name_scope(quant_skip_pattern):
83 84 85 86
            pool = fluid.layers.pool2d(input=hidden,
                                       pool_size=2,
                                       pool_type='avg',
                                       pool_stride=2)
87
    else:
88 89 90 91
        pool = fluid.layers.pool2d(input=hidden,
                                   pool_size=2,
                                   pool_type='avg',
                                   pool_stride=2)
92
    fc = fluid.layers.fc(input=pool, size=10)
W
WangZhen 已提交
93
    loss = fluid.layers.cross_entropy(input=fc, label=label)
94
    loss = paddle.mean(loss)
W
WangZhen 已提交
95 96 97
    return loss


98
def conv_net(img, label, quant_skip_pattern):
99 100 101 102 103 104 105
    conv_pool_1 = fluid.nets.simple_img_conv_pool(input=img,
                                                  filter_size=5,
                                                  num_filters=20,
                                                  pool_size=2,
                                                  pool_stride=2,
                                                  pool_type='max',
                                                  act="relu")
W
WangZhen 已提交
106
    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
107 108 109 110 111 112 113
    conv_pool_2 = fluid.nets.simple_img_conv_pool(input=conv_pool_1,
                                                  filter_size=5,
                                                  num_filters=50,
                                                  pool_size=2,
                                                  pool_stride=2,
                                                  pool_type='avg',
                                                  act="relu")
114 115 116
    hidden = fluid.layers.fc(input=conv_pool_2, size=100, act='relu')
    with fluid.name_scope(quant_skip_pattern):
        prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
W
WangZhen 已提交
117
    loss = fluid.layers.cross_entropy(input=prediction, label=label)
118
    avg_loss = paddle.mean(loss)
W
WangZhen 已提交
119 120 121
    return avg_loss


122
class TestQuantizationTransformPass(unittest.TestCase):
123

W
WangZhen 已提交
124 125 126 127 128 129
    def setUp(self):
        self.quantizable_op_and_inputs = {
            'conv2d': ['Input', 'Filter'],
            'depthwise_conv2d': ['Input', 'Filter'],
            'mul': ['X', 'Y']
        }
130
        self.quantizable_grad_op_inputs = {
W
WangZhen 已提交
131 132 133 134 135
            'conv2d_grad': ['Input', 'Filter'],
            'depthwise_conv2d_grad': ['Input', 'Filter'],
            'mul_grad': ['X', 'Y']
        }

136
    def check_program(self, program):
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
        quantized_ops = set()
        for block in program.blocks:
            for op in block.ops:
                # check forward
                if op.type in self.quantizable_op_and_inputs:
                    for arg_name in op.input_arg_names:
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        quantized_ops.add(arg_name)

            for op in block.ops:
                # check backward
                if op.type in self.quantizable_grad_op_inputs:
                    for pname in self.quantizable_grad_op_inputs[op.type]:
                        arg_name = op.input(pname)[0]
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        self.assertTrue(arg_name in quantized_ops)

156 157 158 159
    def linear_fc_quant(self,
                        activation_quant_type,
                        weight_quantize_type,
                        for_ci=True):
W
WangZhen 已提交
160 161 162 163 164 165
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = linear_fc(3)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
166
        place = fluid.CPUPlace()
167
        graph = IrGraph(core.Graph(main.desc), for_test=False)
168 169
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
170
            place=place,
171 172
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quantize_type)
173
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
174
        if not for_ci:
Z
Zhen Wang 已提交
175 176 177 178
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
179 180
            graph.draw('.', 'quantize_fc_' + activation_quant_type,
                       marked_nodes)
181
        program = graph.to_program()
182
        self.check_program(program)
183
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
184
        if not for_ci:
Z
Zhen Wang 已提交
185 186 187 188
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
189 190
            val_graph.draw('.', 'val_fc_' + activation_quant_type,
                           val_marked_nodes)
W
WangZhen 已提交
191

192
    def test_linear_fc_quant_abs_max(self):
193
        self.linear_fc_quant('abs_max', 'abs_max', for_ci=True)
W
WangZhen 已提交
194

195
    def test_linear_fc_quant_range_abs_max(self):
196
        self.linear_fc_quant('range_abs_max', 'abs_max', for_ci=True)
W
WangZhen 已提交
197

198
    def test_linear_fc_quant_moving_average_abs_max(self):
199 200 201
        self.linear_fc_quant('moving_average_abs_max',
                             'channel_wise_abs_max',
                             for_ci=True)
202

203 204 205
    def residual_block_quant(self,
                             activation_quant_type,
                             weight_quantize_type,
206
                             quantizable_op_type,
207
                             for_ci=True):
W
WangZhen 已提交
208 209 210 211 212 213
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = residual_block(2)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
214
        place = fluid.CPUPlace()
215
        graph = IrGraph(core.Graph(main.desc), for_test=False)
216 217
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
218
            place=place,
219
            activation_quantize_type=activation_quant_type,
220 221
            weight_quantize_type=weight_quantize_type,
            quantizable_op_type=quantizable_op_type)
222
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
223
        if not for_ci:
Z
Zhen Wang 已提交
224 225 226 227
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
228 229
            graph.draw('.', 'quantize_residual_' + activation_quant_type,
                       marked_nodes)
230
        program = graph.to_program()
231
        self.check_program(program)
232
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
233
        if not for_ci:
Z
Zhen Wang 已提交
234 235 236 237
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
238 239
            val_graph.draw('.', 'val_residual_' + activation_quant_type,
                           val_marked_nodes)
W
WangZhen 已提交
240

241
    def test_residual_block_abs_max(self):
242
        quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul']
243 244 245 246
        self.residual_block_quant('abs_max',
                                  'abs_max',
                                  quantizable_op_type,
                                  for_ci=True)
W
WangZhen 已提交
247

248
    def test_residual_block_range_abs_max(self):
249
        quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul']
250 251 252 253
        self.residual_block_quant('range_abs_max',
                                  'abs_max',
                                  quantizable_op_type,
                                  for_ci=True)
W
WangZhen 已提交
254

255
    def test_residual_block_moving_average_abs_max(self):
256
        quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul']
257 258 259 260
        self.residual_block_quant('moving_average_abs_max',
                                  'channel_wise_abs_max',
                                  quantizable_op_type,
                                  for_ci=True)
261

W
WangZhen 已提交
262

W
WangZhen 已提交
263
class TestQuantizationFreezePass(unittest.TestCase):
264

265 266 267 268
    def freeze_graph(self,
                     use_cuda,
                     seed,
                     activation_quant_type,
X
XGZhang 已提交
269
                     bias_correction=False,
270
                     weight_quant_type='abs_max',
271 272
                     for_ci=True,
                     quant_skip_pattern='skip_quant'):
273

W
WangZhen 已提交
274 275 276 277 278
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
279 280 281 282 283 284
                    img = fluid.layers.data(name='image',
                                            shape=[1, 28, 28],
                                            dtype='float32')
                    label = fluid.layers.data(name='label',
                                              shape=[1],
                                              dtype='int64')
285
                    loss = conv_net(img, label, quant_skip_pattern)
W
WangZhen 已提交
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
                    if not is_test:
                        opt = fluid.optimizer.Adam(learning_rate=0.001)
                        opt.minimize(loss)
            return [img, label], loss

        random.seed(0)
        np.random.seed(0)

        main = fluid.Program()
        startup = fluid.Program()
        test_program = fluid.Program()
        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
W
WangZhen 已提交
301
        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
W
WangZhen 已提交
302 303 304

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
W
WangZhen 已提交
305 306 307
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
W
WangZhen 已提交
308
        transform_pass = QuantizationTransformPass(
309 310
            scope=scope,
            place=place,
311
            activation_quantize_type=activation_quant_type,
312 313
            weight_quantize_type=weight_quant_type,
            skip_pattern=quant_skip_pattern)
W
WangZhen 已提交
314
        transform_pass.apply(main_graph)
315 316 317 318 319 320
        transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quant_type,
            skip_pattern=quant_skip_pattern)
W
WangZhen 已提交
321
        transform_pass.apply(test_graph)
322
        dev_name = '_gpu_' if use_cuda else '_cpu_'
Z
Zhen Wang 已提交
323
        if not for_ci:
Z
Zhen Wang 已提交
324 325 326 327
            marked_nodes = set()
            for op in main_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
328 329 330
            main_graph.draw(
                '.', 'main' + dev_name + activation_quant_type + '_' +
                weight_quant_type, marked_nodes)
Z
Zhen Wang 已提交
331 332 333 334
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
335 336 337
            test_graph.draw(
                '.', 'test' + dev_name + activation_quant_type + '_' +
                weight_quant_type, marked_nodes)
W
WangZhen 已提交
338

Z
Zhen Wang 已提交
339 340 341
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
342
        build_strategy.fuse_all_reduce_ops = False
Z
Zhen Wang 已提交
343 344
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)
345
        quantized_test_program = test_graph.to_program()
346
        iters = 5
347
        batch_size = 8
W
WangZhen 已提交
348

349 350 351 352 353
        train_reader = paddle.batch(paddle.reader.shuffle(
            paddle.dataset.mnist.train(), buf_size=500),
                                    batch_size=batch_size)
        test_reader = paddle.batch(paddle.dataset.mnist.test(),
                                   batch_size=batch_size)
W
WangZhen 已提交
354
        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
W
WangZhen 已提交
355
        with fluid.scope_guard(scope):
W
WangZhen 已提交
356 357
            for _ in range(iters):
                data = next(train_reader())
Z
Zhen Wang 已提交
358
                loss_v = exe.run(binary,
359 360
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])
Z
Zhen Wang 已提交
361
                if not for_ci:
362 363 364
                    print('{}: {}'.format(
                        'loss' + dev_name + activation_quant_type + '_' +
                        weight_quant_type, loss_v))
W
WangZhen 已提交
365

366 367 368 369 370 371 372 373 374 375 376
        test_data = next(test_reader())
        with fluid.program_guard(quantized_test_program):
            w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
                                             quantized_test_program)
        # Testing
        with fluid.scope_guard(scope):
            test_loss1, w_quant = exe.run(program=quantized_test_program,
                                          feed=feeder.feed(test_data),
                                          fetch_list=[loss, w_var])

        # Freeze graph for inference, but the weight of fc/conv is still float type.
377
        freeze_pass = QuantizationFreezePass(
X
XGZhang 已提交
378 379
            scope=scope, place=place, bias_correction=bias_correction, \
            weight_quantize_type=weight_quant_type)
380
        freeze_pass.apply(test_graph)
Z
Zhen Wang 已提交
381
        if not for_ci:
Z
Zhen Wang 已提交
382 383 384 385
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
386 387 388
            test_graph.draw(
                '.', 'test_freeze' + dev_name + activation_quant_type + '_' +
                weight_quant_type, marked_nodes)
W
WangZhen 已提交
389

390 391 392 393 394 395
        server_program = test_graph.to_program()
        with fluid.scope_guard(scope):
            test_loss2, = exe.run(program=server_program,
                                  feed=feeder.feed(test_data),
                                  fetch_list=[loss])
        self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
Z
Zhen Wang 已提交
396
        if not for_ci:
397 398 399 400 401 402
            print('{}: {}'.format(
                'test_loss1' + dev_name + activation_quant_type + '_' +
                weight_quant_type, test_loss1))
            print('{}: {}'.format(
                'test_loss2' + dev_name + activation_quant_type + '_' +
                weight_quant_type, test_loss2))
403 404
        w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
        # Maybe failed, this is due to the calculation precision
405
        # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
Z
Zhen Wang 已提交
406
        if not for_ci:
407 408 409 410 411 412
            print('{}: {}'.format(
                'w_freeze' + dev_name + activation_quant_type + '_' +
                weight_quant_type, np.sum(w_freeze)))
            print('{}: {}'.format(
                'w_quant' + dev_name + activation_quant_type + '_' +
                weight_quant_type, np.sum(w_quant)))
413 414 415 416

        # Convert parameter to 8-bit.
        convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
        convert_int8_pass.apply(test_graph)
Z
Zhen Wang 已提交
417
        if not for_ci:
Z
Zhen Wang 已提交
418 419 420 421
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
422 423 424
            test_graph.draw(
                '.', 'test_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, marked_nodes)
425 426 427
        server_program_int8 = test_graph.to_program()
        # Save the 8-bit parameter and model file.
        with fluid.scope_guard(scope):
428 429 430 431
            fluid.io.save_inference_model(
                'server_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, ['image', 'label'], [loss], exe,
                server_program_int8)
432 433
            # Test whether the 8-bit parameter and model file can be loaded successfully.
            [infer, feed, fetch] = fluid.io.load_inference_model(
434 435
                'server_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, exe)
436 437 438 439
        # Check the loaded 8-bit weight.
        w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
        self.assertEqual(w_8bit.dtype, np.int8)
        self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
Z
Zhen Wang 已提交
440
        if not for_ci:
441 442 443 444 445 446
            print('{}: {}'.format(
                'w_8bit' + dev_name + activation_quant_type + '_' +
                weight_quant_type, np.sum(w_8bit)))
            print('{}: {}'.format(
                'w_freeze' + dev_name + activation_quant_type + '_' +
                weight_quant_type, np.sum(w_freeze)))
447 448 449

        mobile_pass = TransformForMobilePass()
        mobile_pass.apply(test_graph)
Z
Zhen Wang 已提交
450
        if not for_ci:
Z
Zhen Wang 已提交
451 452 453 454
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
455 456 457
            test_graph.draw(
                '.', 'test_mobile' + dev_name + activation_quant_type + '_' +
                weight_quant_type, marked_nodes)
458 459 460

        mobile_program = test_graph.to_program()
        with fluid.scope_guard(scope):
461 462 463 464
            fluid.io.save_inference_model(
                'mobile_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, ['image', 'label'], [loss], exe,
                mobile_program)
W
WangZhen 已提交
465

466
    def test_freeze_graph_cuda_dynamic(self):
W
WangZhen 已提交
467 468
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
469 470 471 472 473
                self.freeze_graph(True,
                                  seed=1,
                                  activation_quant_type='abs_max',
                                  weight_quant_type='abs_max',
                                  for_ci=True)
474
            with fluid.unique_name.guard():
475 476 477 478 479
                self.freeze_graph(True,
                                  seed=1,
                                  activation_quant_type='abs_max',
                                  weight_quant_type='channel_wise_abs_max',
                                  for_ci=True)
W
WangZhen 已提交
480

481
    def test_freeze_graph_cpu_dynamic(self):
W
WangZhen 已提交
482
        with fluid.unique_name.guard():
483 484 485 486 487 488 489 490 491 492
            self.freeze_graph(False,
                              seed=2,
                              activation_quant_type='abs_max',
                              weight_quant_type='abs_max',
                              for_ci=True)
            self.freeze_graph(False,
                              seed=2,
                              activation_quant_type='abs_max',
                              weight_quant_type='channel_wise_abs_max',
                              for_ci=True)
W
WangZhen 已提交
493

494
    def test_freeze_graph_cuda_static(self):
W
WangZhen 已提交
495 496
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
497 498 499 500 501 502 503 504 505 506 507
                self.freeze_graph(True,
                                  seed=1,
                                  activation_quant_type='range_abs_max',
                                  bias_correction=True,
                                  weight_quant_type='abs_max',
                                  for_ci=True)
                self.freeze_graph(True,
                                  seed=1,
                                  activation_quant_type='range_abs_max',
                                  weight_quant_type='abs_max',
                                  for_ci=True)
508 509 510 511 512 513
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='abs_max',
                    for_ci=True)
514 515 516 517 518
                self.freeze_graph(True,
                                  seed=1,
                                  activation_quant_type='range_abs_max',
                                  weight_quant_type='channel_wise_abs_max',
                                  for_ci=True)
519 520 521 522 523 524
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True)
X
XGZhang 已提交
525 526 527 528 529 530 531
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    bias_correction=True,
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True)
W
WangZhen 已提交
532

533
    def test_freeze_graph_cpu_static(self):
W
WangZhen 已提交
534
        with fluid.unique_name.guard():
535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
            self.freeze_graph(False,
                              seed=2,
                              activation_quant_type='range_abs_max',
                              weight_quant_type='abs_max',
                              for_ci=True)
            self.freeze_graph(False,
                              seed=2,
                              activation_quant_type='moving_average_abs_max',
                              weight_quant_type='abs_max',
                              for_ci=True)
            self.freeze_graph(False,
                              seed=2,
                              activation_quant_type='range_abs_max',
                              weight_quant_type='channel_wise_abs_max',
                              for_ci=True)
            self.freeze_graph(False,
                              seed=2,
                              activation_quant_type='moving_average_abs_max',
                              weight_quant_type='channel_wise_abs_max',
                              for_ci=True)
W
WangZhen 已提交
555 556


557
def quant_dequant_residual_block(num, quant_skip_pattern=None):
558

559 560 561 562 563 564 565
    def conv_bn_layer(input,
                      ch_out,
                      filter_size,
                      stride,
                      padding,
                      act='relu',
                      bias_attr=False):
566 567 568 569 570 571 572
        tmp = fluid.layers.conv2d(input=input,
                                  filter_size=filter_size,
                                  num_filters=ch_out,
                                  stride=stride,
                                  padding=padding,
                                  act=None,
                                  bias_attr=bias_attr)
573 574
        return fluid.layers.batch_norm(input=tmp, act=act)

575
    data1 = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
576 577 578
    data2 = fluid.layers.data(name='matmul_input',
                              shape=[16, 32, 32],
                              dtype='float32')
579
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
580
    hidden = data1
581
    for _ in range(num):
582 583 584
        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
585
    hidden = fluid.layers.matmul(hidden, data2, True, True)
586
    if isinstance(quant_skip_pattern, str):
587
        with fluid.name_scope(quant_skip_pattern):
588 589 590 591 592 593 594 595 596 597 598
            pool1 = fluid.layers.pool2d(input=hidden,
                                        pool_size=2,
                                        pool_type='avg',
                                        pool_stride=2)
            pool2 = fluid.layers.pool2d(input=hidden,
                                        pool_size=2,
                                        pool_type='max',
                                        pool_stride=2)
            pool_add = fluid.layers.elementwise_add(x=pool1,
                                                    y=pool2,
                                                    act='relu')
599 600 601 602 603
    elif isinstance(quant_skip_pattern, list):
        assert len(
            quant_skip_pattern
        ) > 1, 'test config error: the len of quant_skip_pattern list should be greater than 1.'
        with fluid.name_scope(quant_skip_pattern[0]):
604 605 606 607 608 609 610 611
            pool1 = fluid.layers.pool2d(input=hidden,
                                        pool_size=2,
                                        pool_type='avg',
                                        pool_stride=2)
            pool2 = fluid.layers.pool2d(input=hidden,
                                        pool_size=2,
                                        pool_type='max',
                                        pool_stride=2)
612
        with fluid.name_scope(quant_skip_pattern[1]):
613 614 615
            pool_add = fluid.layers.elementwise_add(x=pool1,
                                                    y=pool2,
                                                    act='relu')
616
    else:
617 618 619 620 621 622 623 624
        pool1 = fluid.layers.pool2d(input=hidden,
                                    pool_size=2,
                                    pool_type='avg',
                                    pool_stride=2)
        pool2 = fluid.layers.pool2d(input=hidden,
                                    pool_size=2,
                                    pool_type='max',
                                    pool_stride=2)
625 626 627
        pool_add = fluid.layers.elementwise_add(x=pool1, y=pool2, act='relu')
    fc = fluid.layers.fc(input=pool_add, size=10)
    loss = fluid.layers.cross_entropy(input=fc, label=label)
628
    loss = paddle.mean(loss)
629 630 631
    return loss


632
class TestAddQuantDequantPass(unittest.TestCase):
633

634 635 636 637
    def setUp(self):
        self._target_ops = {'elementwise_add', 'pool2d'}
        self._target_grad_ops = {'elementwise_add_grad', 'pool2d_grad'}

638
    def check_graph(self, graph, skip_pattern=None):
639 640 641
        ops = graph.all_op_nodes()
        for op_node in ops:
            if op_node.name() in self._target_ops:
642 643 644 645 646 647 648 649 650
                user_skipped = False
                if isinstance(skip_pattern, list):
                    user_skipped = op_node.op().has_attr("op_namescope") and \
                                   any(pattern in op_node.op().attr("op_namescope") for pattern in skip_pattern)
                elif isinstance(skip_pattern, str):
                    user_skipped = op_node.op().has_attr("op_namescope") and \
                                   op_node.op().attr("op_namescope").find(skip_pattern) != -1

                if user_skipped:
651 652
                    continue

653 654 655 656
                in_nodes_all_not_persistable = True
                for input_name in op_node.input_arg_names():
                    in_node = graph._find_node_by_name(op_node.inputs,
                                                       input_name)
657 658 659
                    in_nodes_all_not_persistable = (in_nodes_all_not_persistable
                                                    and
                                                    not in_node.persistable())
660 661 662 663 664 665
                if not in_nodes_all_not_persistable:
                    continue
                input_names = op_node.input_arg_names()
                for input_name in input_names:
                    self.assertTrue(input_name.endswith('.quant_dequant'))

666 667 668 669
    def residual_block_quant(self,
                             quantizable_op_type,
                             skip_pattern=None,
                             for_ci=True):
670 671 672
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
673
            loss = quant_dequant_residual_block(2, skip_pattern)
674 675 676 677 678
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
        place = fluid.CPUPlace()
        graph = IrGraph(core.Graph(main.desc), for_test=False)
        add_quant_dequant_pass = AddQuantDequantPass(
679 680 681 682
            scope=fluid.global_scope(),
            place=place,
            skip_pattern=skip_pattern,
            quantizable_op_type=quantizable_op_type)
683 684 685 686 687 688 689
        add_quant_dequant_pass.apply(graph)
        if not for_ci:
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quant') > -1:
                    marked_nodes.add(op)
            graph.draw('.', 'add_quant_dequant_graph', marked_nodes)
690
        self.check_graph(graph, skip_pattern)
691 692 693 694 695 696 697 698 699 700
        program = graph.to_program()
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
        if not for_ci:
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quant') > -1:
                    val_marked_nodes.add(op)
            val_graph.draw('.', 'val_add_quant_dequant_graph', val_marked_nodes)

    def test_residual_block(self):
701
        quantizable_op_type = ['elementwise_add', 'pool2d', 'mul', 'matmul']
702 703 704
        self.residual_block_quant(quantizable_op_type,
                                  skip_pattern=None,
                                  for_ci=True)
705 706

    def test_residual_block_skip_pattern(self):
707
        quantizable_op_type = ['elementwise_add', 'pool2d', 'mul', 'matmul']
708 709 710
        self.residual_block_quant(quantizable_op_type,
                                  skip_pattern='skip_quant',
                                  for_ci=True)
711

Z
zhangchunle 已提交
712
    def test_residual_block_skip_pattern_1(self):
713
        quantizable_op_type = ['elementwise_add', 'pool2d', 'mul', 'matmul']
714 715 716
        self.residual_block_quant(quantizable_op_type,
                                  skip_pattern=['skip_quant1', 'skip_quant2'],
                                  for_ci=True)
717

718

719
class TestQuantizationTransformPassV2(unittest.TestCase):
720

721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834
    def setUp(self):
        self.quantizable_op_and_inputs = {
            'conv2d': ['Input', 'Filter'],
            'depthwise_conv2d': ['Input', 'Filter'],
            'mul': ['X', 'Y']
        }
        self.quantizable_grad_op_inputs = {
            'conv2d_grad': ['Input', 'Filter'],
            'depthwise_conv2d_grad': ['Input', 'Filter'],
            'mul_grad': ['X', 'Y']
        }

    def check_program(self, program):
        quantized_ops = set()
        for block in program.blocks:
            for op in block.ops:
                # check forward
                if op.type in self.quantizable_op_and_inputs:
                    for arg_name in op.input_arg_names:
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        quantized_ops.add(arg_name)

            for op in block.ops:
                # check backward
                if op.type in self.quantizable_grad_op_inputs:
                    for pname in self.quantizable_grad_op_inputs[op.type]:
                        arg_name = op.input(pname)[0]
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        self.assertTrue(arg_name in quantized_ops)

    def linear_fc_quant(self,
                        activation_quant_type,
                        weight_quantize_type,
                        for_ci=True):
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = linear_fc(3)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
        place = fluid.CPUPlace()
        graph = IrGraph(core.Graph(main.desc), for_test=False)
        transform_pass = QuantizationTransformPassV2(
            scope=fluid.global_scope(),
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quantize_type)
        transform_pass.apply(graph)
        if not for_ci:
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            graph.draw('.', 'quantize_fc_' + activation_quant_type,
                       marked_nodes)
        program = graph.to_program()
        self.check_program(program)
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
        if not for_ci:
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
            val_graph.draw('.', 'val_fc_' + activation_quant_type,
                           val_marked_nodes)

    def test_linear_fc_quant_abs_max(self):
        self.linear_fc_quant('abs_max', 'abs_max', for_ci=True)

    def test_linear_fc_quant_channel_wise_abs_max(self):
        self.linear_fc_quant('abs_max', 'channel_wise_abs_max', for_ci=True)

    def residual_block_quant(self,
                             activation_quant_type,
                             weight_quantize_type,
                             quantizable_op_type,
                             for_ci=True):
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = residual_block(2)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
        place = fluid.CPUPlace()
        graph = IrGraph(core.Graph(main.desc), for_test=False)
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quantize_type,
            quantizable_op_type=quantizable_op_type)
        transform_pass.apply(graph)
        if not for_ci:
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            graph.draw('.', 'quantize_residual_' + activation_quant_type,
                       marked_nodes)
        program = graph.to_program()
        self.check_program(program)
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
        if not for_ci:
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
            val_graph.draw('.', 'val_residual_' + activation_quant_type,
                           val_marked_nodes)

    def test_residual_block_abs_max(self):
        quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul']
835 836 837 838
        self.residual_block_quant('abs_max',
                                  'abs_max',
                                  quantizable_op_type,
                                  for_ci=True)
839 840 841

    def test_residual_block_channel_wise_abs_max(self):
        quantizable_op_type = ['conv2d', 'depthwise_conv2d', 'mul', 'matmul']
842 843 844 845
        self.residual_block_quant('abs_max',
                                  'channel_wise_abs_max',
                                  quantizable_op_type,
                                  for_ci=True)
846 847


W
WangZhen 已提交
848 849
if __name__ == '__main__':
    unittest.main()