test_quantization_pass.py 20.0 KB
Newer Older
W
WangZhen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
#     http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

Z
Zhen Wang 已提交
15
import os
W
WangZhen 已提交
16 17 18 19 20
import unittest
import random
import numpy as np
import paddle.fluid as fluid
import six
W
WangZhen 已提交
21
import paddle
22
from paddle.fluid.framework import IrGraph
23
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
W
WangZhen 已提交
24
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
25 26
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
W
WangZhen 已提交
27 28
from paddle.fluid import core

Z
Zhen Wang 已提交
29 30 31
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"

W
WangZhen 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74

def linear_fc(num):
    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = data
    for _ in six.moves.xrange(num):
        hidden = fluid.layers.fc(hidden, size=128, act='relu')
    loss = fluid.layers.cross_entropy(input=hidden, label=label)
    loss = fluid.layers.mean(loss)
    return loss


def residual_block(num):
    def conv_bn_layer(input,
                      ch_out,
                      filter_size,
                      stride,
                      padding,
                      act='relu',
                      bias_attr=False):
        tmp = fluid.layers.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr)
        return fluid.layers.batch_norm(input=tmp, act=act)

    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = data
    for _ in six.moves.xrange(num):
        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
    fc = fluid.layers.fc(input=hidden, size=10)
    loss = fluid.layers.cross_entropy(input=fc, label=label)
    loss = fluid.layers.mean(loss)
    return loss


W
WangZhen 已提交
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
def conv_net(img, label):
    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=img,
        filter_size=5,
        num_filters=20,
        pool_size=2,
        pool_stride=2,
        act="relu")
    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
        pool_size=2,
        pool_stride=2,
        act="relu")
    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
    loss = fluid.layers.cross_entropy(input=prediction, label=label)
    avg_loss = fluid.layers.mean(loss)
    return avg_loss


97
class TestQuantizationTransformPass(unittest.TestCase):
W
WangZhen 已提交
98 99 100 101 102 103
    def setUp(self):
        self.quantizable_op_and_inputs = {
            'conv2d': ['Input', 'Filter'],
            'depthwise_conv2d': ['Input', 'Filter'],
            'mul': ['X', 'Y']
        }
104
        self.quantizable_grad_op_inputs = {
W
WangZhen 已提交
105 106 107 108 109
            'conv2d_grad': ['Input', 'Filter'],
            'depthwise_conv2d_grad': ['Input', 'Filter'],
            'mul_grad': ['X', 'Y']
        }

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
    def check_program(self, transform_pass, program):
        quantized_ops = set()
        for block in program.blocks:
            for op in block.ops:
                # check forward
                if op.type in self.quantizable_op_and_inputs:
                    for arg_name in op.input_arg_names:
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        quantized_ops.add(arg_name)

            for op in block.ops:
                # check backward
                if op.type in self.quantizable_grad_op_inputs:
                    for pname in self.quantizable_grad_op_inputs[op.type]:
                        arg_name = op.input(pname)[0]
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        self.assertTrue(arg_name in quantized_ops)

130
    def linear_fc_quant(self, activation_quant_type, for_ci=False):
W
WangZhen 已提交
131 132 133 134 135 136
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = linear_fc(3)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
137 138
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
139
        graph = IrGraph(core.Graph(main.desc), for_test=False)
140 141
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
142
            place=place,
143
            activation_quantize_type=activation_quant_type)
144
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
145
        if not for_ci:
Z
Zhen Wang 已提交
146 147 148 149
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
150 151
            graph.draw('.', 'quantize_fc_' + activation_quant_type,
                       marked_nodes)
152 153
        program = graph.to_program()
        self.check_program(transform_pass, program)
154
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
155
        if not for_ci:
Z
Zhen Wang 已提交
156 157 158 159
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
160 161
            val_graph.draw('.', 'val_fc_' + activation_quant_type,
                           val_marked_nodes)
W
WangZhen 已提交
162

163
    def test_linear_fc_quant_abs_max(self):
Z
Zhen Wang 已提交
164
        self.linear_fc_quant('abs_max', for_ci=True)
W
WangZhen 已提交
165

166
    def test_linear_fc_quant_range_abs_max(self):
Z
Zhen Wang 已提交
167
        self.linear_fc_quant('range_abs_max', for_ci=True)
W
WangZhen 已提交
168

169 170 171
    def test_linear_fc_quant_moving_average_abs_max(self):
        self.linear_fc_quant('moving_average_abs_max', for_ci=True)

172
    def residual_block_quant(self, activation_quant_type, for_ci=False):
W
WangZhen 已提交
173 174 175 176 177 178
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = residual_block(2)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
179 180
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
181
        graph = IrGraph(core.Graph(main.desc), for_test=False)
182 183
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
184
            place=place,
185
            activation_quantize_type=activation_quant_type)
186
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
187
        if not for_ci:
Z
Zhen Wang 已提交
188 189 190 191
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
192 193
            graph.draw('.', 'quantize_residual_' + activation_quant_type,
                       marked_nodes)
194 195
        program = graph.to_program()
        self.check_program(transform_pass, program)
196
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
197
        if not for_ci:
Z
Zhen Wang 已提交
198 199 200 201
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
202 203
            val_graph.draw('.', 'val_residual_' + activation_quant_type,
                           val_marked_nodes)
W
WangZhen 已提交
204

205
    def test_residual_block_abs_max(self):
Z
Zhen Wang 已提交
206
        self.residual_block_quant('abs_max', for_ci=True)
W
WangZhen 已提交
207

208
    def test_residual_block_range_abs_max(self):
Z
Zhen Wang 已提交
209
        self.residual_block_quant('range_abs_max', for_ci=True)
W
WangZhen 已提交
210

211 212 213
    def test_residual_block_moving_average_abs_max(self):
        self.residual_block_quant('moving_average_abs_max', for_ci=True)

W
WangZhen 已提交
214

W
WangZhen 已提交
215
class TestQuantizationFreezePass(unittest.TestCase):
216 217 218 219 220 221
    def freeze_graph(self,
                     use_cuda,
                     seed,
                     activation_quant_type,
                     weight_quant_type='abs_max',
                     for_ci=False):
W
WangZhen 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    img = fluid.layers.data(
                        name='image', shape=[1, 28, 28], dtype='float32')
                    label = fluid.layers.data(
                        name='label', shape=[1], dtype='int64')
                    loss = conv_net(img, label)
                    if not is_test:
                        opt = fluid.optimizer.Adam(learning_rate=0.001)
                        opt.minimize(loss)
            return [img, label], loss

        random.seed(0)
        np.random.seed(0)

        main = fluid.Program()
        startup = fluid.Program()
        test_program = fluid.Program()
        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
W
WangZhen 已提交
247
        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
W
WangZhen 已提交
248 249 250

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
W
WangZhen 已提交
251 252 253
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
W
WangZhen 已提交
254
        transform_pass = QuantizationTransformPass(
255 256
            scope=scope,
            place=place,
257 258
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quant_type)
259
        #transform_pass = QuantizationTransformPass(
260
        #    scope=scope, place=place, activation_quantize_type=activation_quant_type)
W
WangZhen 已提交
261 262
        transform_pass.apply(main_graph)
        transform_pass.apply(test_graph)
263
        dev_name = '_gpu_' if use_cuda else '_cpu_'
Z
Zhen Wang 已提交
264
        if not for_ci:
Z
Zhen Wang 已提交
265 266 267 268
            marked_nodes = set()
            for op in main_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
269 270
            main_graph.draw('.', 'main' + dev_name + activation_quant_type + '_'
                            + weight_quant_type, marked_nodes)
Z
Zhen Wang 已提交
271 272 273 274
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
275 276
            test_graph.draw('.', 'test' + dev_name + activation_quant_type + '_'
                            + weight_quant_type, marked_nodes)
W
WangZhen 已提交
277

Z
Zhen Wang 已提交
278 279 280 281 282
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)
283
        quantized_test_program = test_graph.to_program()
284
        iters = 5
285
        batch_size = 8
W
WangZhen 已提交
286 287 288 289 290 291 292 293

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=500),
            batch_size=batch_size)
        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size)
        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
W
WangZhen 已提交
294
        with fluid.scope_guard(scope):
W
WangZhen 已提交
295 296
            for _ in range(iters):
                data = next(train_reader())
Z
Zhen Wang 已提交
297
                loss_v = exe.run(binary,
298 299
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])
Z
Zhen Wang 已提交
300
                if not for_ci:
301 302 303
                    print('{}: {}'.format('loss' + dev_name +
                                          activation_quant_type + '_' +
                                          weight_quant_type, loss_v))
W
WangZhen 已提交
304

305 306 307 308 309 310 311 312 313 314 315
        test_data = next(test_reader())
        with fluid.program_guard(quantized_test_program):
            w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
                                             quantized_test_program)
        # Testing
        with fluid.scope_guard(scope):
            test_loss1, w_quant = exe.run(program=quantized_test_program,
                                          feed=feeder.feed(test_data),
                                          fetch_list=[loss, w_var])

        # Freeze graph for inference, but the weight of fc/conv is still float type.
316
        freeze_pass = QuantizationFreezePass(
317
            scope=scope, place=place, weight_quantize_type=weight_quant_type)
318
        #freeze_pass = QuantizationFreezePass(scope=scope, place=place)
319
        freeze_pass.apply(test_graph)
Z
Zhen Wang 已提交
320
        if not for_ci:
Z
Zhen Wang 已提交
321 322 323 324
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
325 326
            test_graph.draw('.', 'test_freeze' + dev_name +
                            activation_quant_type + '_' + weight_quant_type,
Z
Zhen Wang 已提交
327
                            marked_nodes)
W
WangZhen 已提交
328

329 330 331 332 333 334
        server_program = test_graph.to_program()
        with fluid.scope_guard(scope):
            test_loss2, = exe.run(program=server_program,
                                  feed=feeder.feed(test_data),
                                  fetch_list=[loss])
        self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
Z
Zhen Wang 已提交
335
        if not for_ci:
336 337 338 339 340 341
            print(
                '{}: {}'.format('test_loss1' + dev_name + activation_quant_type
                                + '_' + weight_quant_type, test_loss1))
            print(
                '{}: {}'.format('test_loss2' + dev_name + activation_quant_type
                                + '_' + weight_quant_type, test_loss2))
342 343
        w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
        # Maybe failed, this is due to the calculation precision
344
        # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
Z
Zhen Wang 已提交
345
        if not for_ci:
346 347 348 349
            print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type
                                  + '_' + weight_quant_type, np.sum(w_freeze)))
            print('{}: {}'.format('w_quant' + dev_name + activation_quant_type +
                                  '_' + weight_quant_type, np.sum(w_quant)))
350 351 352 353

        # Convert parameter to 8-bit.
        convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
        convert_int8_pass.apply(test_graph)
Z
Zhen Wang 已提交
354
        if not for_ci:
Z
Zhen Wang 已提交
355 356 357 358
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
359 360
            test_graph.draw('.', 'test_int8' + dev_name + activation_quant_type
                            + '_' + weight_quant_type, marked_nodes)
361 362 363
        server_program_int8 = test_graph.to_program()
        # Save the 8-bit parameter and model file.
        with fluid.scope_guard(scope):
364 365 366 367
            fluid.io.save_inference_model(
                'server_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, ['image', 'label'], [loss], exe,
                server_program_int8)
368 369
            # Test whether the 8-bit parameter and model file can be loaded successfully.
            [infer, feed, fetch] = fluid.io.load_inference_model(
370 371
                'server_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, exe)
372 373 374 375
        # Check the loaded 8-bit weight.
        w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
        self.assertEqual(w_8bit.dtype, np.int8)
        self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
Z
Zhen Wang 已提交
376
        if not for_ci:
377 378 379 380
            print('{}: {}'.format('w_8bit' + dev_name + activation_quant_type +
                                  '_' + weight_quant_type, np.sum(w_8bit)))
            print('{}: {}'.format('w_freeze' + dev_name + activation_quant_type
                                  + '_' + weight_quant_type, np.sum(w_freeze)))
381 382 383

        mobile_pass = TransformForMobilePass()
        mobile_pass.apply(test_graph)
Z
Zhen Wang 已提交
384
        if not for_ci:
Z
Zhen Wang 已提交
385 386 387 388
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
389 390
            test_graph.draw('.', 'test_mobile' + dev_name +
                            activation_quant_type + '_' + weight_quant_type,
Z
Zhen Wang 已提交
391
                            marked_nodes)
392 393 394

        mobile_program = test_graph.to_program()
        with fluid.scope_guard(scope):
395 396 397 398
            fluid.io.save_inference_model(
                'mobile_int8' + dev_name + activation_quant_type + '_' +
                weight_quant_type, ['image', 'label'], [loss], exe,
                mobile_program)
W
WangZhen 已提交
399

400
    def test_freeze_graph_cuda_dynamic(self):
W
WangZhen 已提交
401 402
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
Z
Zhen Wang 已提交
403
                self.freeze_graph(
404 405 406 407 408 409 410 411 412 413 414 415
                    True,
                    seed=1,
                    activation_quant_type='abs_max',
                    weight_quant_type='abs_max',
                    for_ci=True)
            with fluid.unique_name.guard():
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True)
W
WangZhen 已提交
416

417
    def test_freeze_graph_cpu_dynamic(self):
W
WangZhen 已提交
418
        with fluid.unique_name.guard():
419 420 421 422 423 424 425 426 427 428 429 430
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='abs_max',
                weight_quant_type='abs_max',
                for_ci=True)
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True)
W
WangZhen 已提交
431

432
    def test_freeze_graph_cuda_static(self):
W
WangZhen 已提交
433 434
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
Z
Zhen Wang 已提交
435
                self.freeze_graph(
436 437 438 439 440 441 442 443 444 445 446
                    True,
                    seed=1,
                    activation_quant_type='range_abs_max',
                    weight_quant_type='abs_max',
                    for_ci=True)
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='abs_max',
                    for_ci=True)
447 448 449
                self.freeze_graph(
                    True,
                    seed=1,
450 451 452 453 454 455 456 457 458
                    activation_quant_type='range_abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True)
                self.freeze_graph(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True)
W
WangZhen 已提交
459

460
    def test_freeze_graph_cpu_static(self):
W
WangZhen 已提交
461
        with fluid.unique_name.guard():
Z
Zhen Wang 已提交
462
            self.freeze_graph(
463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
                False,
                seed=2,
                activation_quant_type='range_abs_max',
                weight_quant_type='abs_max',
                for_ci=True)
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='abs_max',
                for_ci=True)
            self.freeze_graph(
                False,
                seed=2,
                activation_quant_type='range_abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True)
480
            self.freeze_graph(
481 482
                False,
                seed=2,
483 484 485
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True)
W
WangZhen 已提交
486 487


W
WangZhen 已提交
488 489
if __name__ == '__main__':
    unittest.main()