test_quantization_pass.py 19.8 KB
Newer Older
W
WangZhen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
#     http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

Z
Zhen Wang 已提交
15
import os
W
WangZhen 已提交
16 17 18 19 20
import unittest
import random
import numpy as np
import paddle.fluid as fluid
import six
W
WangZhen 已提交
21
import paddle
22
from paddle.fluid.framework import IrGraph
23
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
W
WangZhen 已提交
24
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
25 26
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
W
WangZhen 已提交
27 28
from paddle.fluid import core

Z
Zhen Wang 已提交
29 30 31
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"

W
WangZhen 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74

def linear_fc(num):
    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = data
    for _ in six.moves.xrange(num):
        hidden = fluid.layers.fc(hidden, size=128, act='relu')
    loss = fluid.layers.cross_entropy(input=hidden, label=label)
    loss = fluid.layers.mean(loss)
    return loss


def residual_block(num):
    def conv_bn_layer(input,
                      ch_out,
                      filter_size,
                      stride,
                      padding,
                      act='relu',
                      bias_attr=False):
        tmp = fluid.layers.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr)
        return fluid.layers.batch_norm(input=tmp, act=act)

    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = data
    for _ in six.moves.xrange(num):
        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
    fc = fluid.layers.fc(input=hidden, size=10)
    loss = fluid.layers.cross_entropy(input=fc, label=label)
    loss = fluid.layers.mean(loss)
    return loss


W
WangZhen 已提交
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
def conv_net(img, label):
    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=img,
        filter_size=5,
        num_filters=20,
        pool_size=2,
        pool_stride=2,
        act="relu")
    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
        pool_size=2,
        pool_stride=2,
        act="relu")
    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
    loss = fluid.layers.cross_entropy(input=prediction, label=label)
    avg_loss = fluid.layers.mean(loss)
    return avg_loss


97
class TestQuantizationTransformPass(unittest.TestCase):
W
WangZhen 已提交
98 99 100 101 102 103
    def setUp(self):
        self.quantizable_op_and_inputs = {
            'conv2d': ['Input', 'Filter'],
            'depthwise_conv2d': ['Input', 'Filter'],
            'mul': ['X', 'Y']
        }
104
        self.quantizable_grad_op_inputs = {
W
WangZhen 已提交
105 106 107 108 109
            'conv2d_grad': ['Input', 'Filter'],
            'depthwise_conv2d_grad': ['Input', 'Filter'],
            'mul_grad': ['X', 'Y']
        }

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
    def check_program(self, transform_pass, program):
        quantized_ops = set()
        for block in program.blocks:
            for op in block.ops:
                # check forward
                if op.type in self.quantizable_op_and_inputs:
                    for arg_name in op.input_arg_names:
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        quantized_ops.add(arg_name)

            for op in block.ops:
                # check backward
                if op.type in self.quantizable_grad_op_inputs:
                    for pname in self.quantizable_grad_op_inputs[op.type]:
                        arg_name = op.input(pname)[0]
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        self.assertTrue(arg_name in quantized_ops)

130
    def linear_fc_quant(self, activation_quant_type, for_ci=False):
W
WangZhen 已提交
131 132 133 134 135 136
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = linear_fc(3)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
137 138
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
139
        graph = IrGraph(core.Graph(main.desc), for_test=False)
140 141
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
142
            place=place,
143
            activation_quantize_type=activation_quant_type)
144
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
145
        if not for_ci:
Z
Zhen Wang 已提交
146 147 148 149
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
150 151
            graph.draw('.', 'quantize_fc_' + activation_quant_type,
                       marked_nodes)
152 153
        program = graph.to_program()
        self.check_program(transform_pass, program)
154
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
155
        if not for_ci:
Z
Zhen Wang 已提交
156 157 158 159
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
160 161
            val_graph.draw('.', 'val_fc_' + activation_quant_type,
                           val_marked_nodes)
W
WangZhen 已提交
162

163
    def test_linear_fc_quant_abs_max(self):
Z
Zhen Wang 已提交
164
        self.linear_fc_quant('abs_max', for_ci=True)
W
WangZhen 已提交
165

166
    def test_linear_fc_quant_range_abs_max(self):
Z
Zhen Wang 已提交
167
        self.linear_fc_quant('range_abs_max', for_ci=True)
W
WangZhen 已提交
168

169 170 171
    def test_linear_fc_quant_moving_average_abs_max(self):
        self.linear_fc_quant('moving_average_abs_max', for_ci=True)

172
    def residual_block_quant(self, activation_quant_type, for_ci=False):
W
WangZhen 已提交
173 174 175 176 177 178
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = residual_block(2)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
179 180
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
181
        graph = IrGraph(core.Graph(main.desc), for_test=False)
182 183
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
184
            place=place,
185
            activation_quantize_type=activation_quant_type)
186
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
187
        if not for_ci:
Z
Zhen Wang 已提交
188 189 190 191
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
192 193
            graph.draw('.', 'quantize_residual_' + activation_quant_type,
                       marked_nodes)
194 195
        program = graph.to_program()
        self.check_program(transform_pass, program)
196
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
197
        if not for_ci:
Z
Zhen Wang 已提交
198 199 200 201
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
202 203
            val_graph.draw('.', 'val_residual_' + activation_quant_type,
                           val_marked_nodes)
W
WangZhen 已提交
204

205
    def test_residual_block_abs_max(self):
Z
Zhen Wang 已提交
206
        self.residual_block_quant('abs_max', for_ci=True)
W
WangZhen 已提交
207

208
    def test_residual_block_range_abs_max(self):
Z
Zhen Wang 已提交
209
        self.residual_block_quant('range_abs_max', for_ci=True)
W
WangZhen 已提交
210

211 212 213
    def test_residual_block_moving_average_abs_max(self):
        self.residual_block_quant('moving_average_abs_max', for_ci=True)

W
WangZhen 已提交
214

W
WangZhen 已提交
215
class TestQuantizationFreezePass(unittest.TestCase):
216 217 218 219 220 221
    def freeze_graph(self,
                     use_cuda,
                     seed,
                     activation_quant_type,
                     weight_quant_type='abs_max',
                     for_ci=False):
W
WangZhen 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    img = fluid.layers.data(
                        name='image', shape=[1, 28, 28], dtype='float32')
                    label = fluid.layers.data(
                        name='label', shape=[1], dtype='int64')
                    loss = conv_net(img, label)
                    if not is_test:
                        opt = fluid.optimizer.Adam(learning_rate=0.001)
                        opt.minimize(loss)
            return [img, label], loss

        random.seed(0)
        np.random.seed(0)

        main = fluid.Program()
        startup = fluid.Program()
        test_program = fluid.Program()
        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
W
WangZhen 已提交
247
        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
W
WangZhen 已提交
248 249 250

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
W
WangZhen 已提交
251 252 253
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
W
WangZhen 已提交
254
        transform_pass = QuantizationTransformPass(
255 256
            scope=scope,
            place=place,
257 258
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quant_type)
W
WangZhen 已提交
259 260
        transform_pass.apply(main_graph)
        transform_pass.apply(test_graph)
261
        dev_name = '_gpu_' if use_cuda else '_cpu_'
Z
Zhen Wang 已提交
262
        if not for_ci:
Z
Zhen Wang 已提交
263 264 265 266
            marked_nodes = set()
            for op in main_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
267 268
            main_graph.draw('.', 'main' + dev_name + activation_quant_type + '_'
                            + weight_quant_type, marked_nodes)
Z
Zhen Wang 已提交
269 270 271 272
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
273 274
            test_graph.draw('.', 'test' + dev_name + activation_quant_type + '_'
                            + weight_quant_type, marked_nodes)
W
WangZhen 已提交
275

Z
Zhen Wang 已提交
276 277 278 279 280
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)
281
        quantized_test_program = test_graph.to_program()
282
        iters = 5
283
        batch_size = 8
W
WangZhen 已提交
284 285 286 287 288 289 290 291

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=500),
            batch_size=batch_size)
        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size)
        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
W
WangZhen 已提交
292
        with fluid.scope_guard(scope):
W
WangZhen 已提交
293 294
            for _ in range(iters):
                data = next(train_reader())
Z
Zhen Wang 已提交
295
                loss_v = exe.run(binary,
296 297
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])
Z
Zhen Wang 已提交
298
                if not for_ci:
299 300 301
                    print('{}: {}'.format('loss' + dev_name +
                                          activation_quant_type + '_' +
                                          weight_quant_type, loss_v))
W
WangZhen 已提交
302

303 304 305 306 307 308 309 310 311 312 313
        test_data = next(test_reader())
        with fluid.program_guard(quantized_test_program):
            w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
                                             quantized_test_program)
        # Testing
        with fluid.scope_guard(scope):
            test_loss1, w_quant = exe.run(program=quantized_test_program,
                                          feed=feeder.feed(test_data),
                                          fetch_list=[loss, w_var])

        # Freeze graph for inference, but the weight of fc/conv is still float type.
314
        freeze_pass = QuantizationFreezePass(
315
            scope=scope, place=place, weight_quantize_type=weight_quant_type)
316
        freeze_pass.apply(test_graph)
Z
Zhen Wang 已提交
317
        if not for_ci:
Z
Zhen Wang 已提交
318 319 320 321
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
322 323
            test_graph.draw('.', 'test_freeze' + dev_name +
                            activation_quant_type + '_' + weight_quant_type,
Z
Zhen Wang 已提交
324
                            marked_nodes)
W
WangZhen 已提交
325

326 327 328 329 330 331
        server_program = test_graph.to_program()
        with fluid.scope_guard(scope):
            test_loss2, = exe.run(program=server_program,
                                  feed=feeder.feed(test_data),
                                  fetch_list=[loss])
        self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
Z
Zhen Wang 已提交
332
        if not for_ci:
333 334 335 336 337 338
            print(
                '{}: {}'.format('test_loss1' + dev_name + activation_quant_type
                                + '_' + weight_quant_type, test_loss1))
            print(
                '{}: {}'.format('test_loss2' + dev_name + activation_quant_type
                                + '_' + weight_quant_type, test_loss2))
339 340
        w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
        # Maybe failed, this is due to the calculation precision
341
        # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
Z
Zhen Wang 已提交
342
        if not for_ci:
343 344 345 346
            print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type
                                  + '_' + weight_quant_type, np.sum(w_freeze)))
            print('{}: {}'.format('w_quant' + dev_name + activation_quant_type +
                                  '_' + weight_quant_type, np.sum(w_quant)))
347 348 349 350

        # Convert parameter to 8-bit.
        convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
        convert_int8_pass.apply(test_graph)
Z
Zhen Wang 已提交
351
        if not for_ci:
Z
Zhen Wang 已提交
352 353 354 355
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
356 357
            test_graph.draw('.', 'test_int8' + dev_name + activation_quant_type
                            + '_' + weight_quant_type, marked_nodes)
358 359 360
        server_program_int8 = test_graph.to_program()
        # Save the 8-bit parameter and model file.
        with fluid.scope_guard(scope):
361 362 363 364
            fluid.io.save_inference_model(
                'server_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, ['image', 'label'], [loss], exe,
                server_program_int8)
365 366
            # Test whether the 8-bit parameter and model file can be loaded successfully.
            [infer, feed, fetch] = fluid.io.load_inference_model(
367 368
                'server_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, exe)
369 370 371 372
        # Check the loaded 8-bit weight.
        w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
        self.assertEqual(w_8bit.dtype, np.int8)
        self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
Z
Zhen Wang 已提交
373
        if not for_ci:
374 375 376 377
            print('{}: {}'.format('w_8bit' + dev_name + activation_quant_type +
                                  '_' + weight_quant_type, np.sum(w_8bit)))
            print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type
                                  + '_' + weight_quant_type, np.sum(w_freeze)))
378 379 380

        mobile_pass = TransformForMobilePass()
        mobile_pass.apply(test_graph)
Z
Zhen Wang 已提交
381
        if not for_ci:
Z
Zhen Wang 已提交
382 383 384 385
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
386 387
            test_graph.draw('.', 'test_mobile' + dev_name +
                            activation_quant_type + '_' + weight_quant_type,
Z
Zhen Wang 已提交
388
                            marked_nodes)
389 390 391

        mobile_program = test_graph.to_program()
        with fluid.scope_guard(scope):
392 393 394 395
            fluid.io.save_inference_model(
                'mobile_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, ['image', 'label'], [loss], exe,
                mobile_program)
W
WangZhen 已提交
396

397
    def test_freeze_graph_cuda_dynamic(self):
W
WangZhen 已提交
398 399
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
Z
Zhen Wang 已提交
400
                self.freeze_graph(
401 402 403 404 405 406 407 408 409 410 411 412
                    True,
                    seed=1,
                    activation_quant_type='abs_max',
                    weight_quant_type='abs_max',
                    for_ci=True)
            with fluid.unique_name.guard():
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True)
W
WangZhen 已提交
413

414
    def test_freeze_graph_cpu_dynamic(self):
W
WangZhen 已提交
415
        with fluid.unique_name.guard():
416 417 418 419 420 421 422 423 424 425 426 427
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='abs_max',
                weight_quant_type='abs_max',
                for_ci=True)
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True)
W
WangZhen 已提交
428

429
    def test_freeze_graph_cuda_static(self):
W
WangZhen 已提交
430 431
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
Z
Zhen Wang 已提交
432
                self.freeze_graph(
433 434 435 436 437 438 439 440 441 442 443
                    True,
                    seed=1,
                    activation_quant_type='range_abs_max',
                    weight_quant_type='abs_max',
                    for_ci=True)
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='abs_max',
                    for_ci=True)
444 445 446
                self.freeze_graph(
                    True,
                    seed=1,
447 448 449 450 451 452 453 454 455
                    activation_quant_type='range_abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True)
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True)
W
WangZhen 已提交
456

457
    def test_freeze_graph_cpu_static(self):
W
WangZhen 已提交
458
        with fluid.unique_name.guard():
Z
Zhen Wang 已提交
459
            self.freeze_graph(
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
                False,
                seed=2,
                activation_quant_type='range_abs_max',
                weight_quant_type='abs_max',
                for_ci=True)
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='abs_max',
                for_ci=True)
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='range_abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True)
477
            self.freeze_graph(
478 479
                False,
                seed=2,
480 481 482
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True)
W
WangZhen 已提交
483 484


W
WangZhen 已提交
485 486
if __name__ == '__main__':
    unittest.main()