test_quantization_pass.py 16.6 KB
Newer Older
W
WangZhen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
#     http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

Z
Zhen Wang 已提交
15
import os
W
WangZhen 已提交
16 17 18 19 20
import unittest
import random
import numpy as np
import paddle.fluid as fluid
import six
W
WangZhen 已提交
21
import paddle
22
from paddle.fluid.framework import IrGraph
23
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
W
WangZhen 已提交
24
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
25 26
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
W
WangZhen 已提交
27 28
from paddle.fluid import core

Z
Zhen Wang 已提交
29 30 31
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"

W
WangZhen 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74

def linear_fc(num):
    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = data
    for _ in six.moves.xrange(num):
        hidden = fluid.layers.fc(hidden, size=128, act='relu')
    loss = fluid.layers.cross_entropy(input=hidden, label=label)
    loss = fluid.layers.mean(loss)
    return loss


def residual_block(num):
    def conv_bn_layer(input,
                      ch_out,
                      filter_size,
                      stride,
                      padding,
                      act='relu',
                      bias_attr=False):
        tmp = fluid.layers.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
            stride=stride,
            padding=padding,
            act=None,
            bias_attr=bias_attr)
        return fluid.layers.batch_norm(input=tmp, act=act)

    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    hidden = data
    for _ in six.moves.xrange(num):
        conv = conv_bn_layer(hidden, 16, 3, 1, 1, act=None, bias_attr=True)
        short = conv_bn_layer(hidden, 16, 1, 1, 0, act=None)
        hidden = fluid.layers.elementwise_add(x=conv, y=short, act='relu')
    fc = fluid.layers.fc(input=hidden, size=10)
    loss = fluid.layers.cross_entropy(input=fc, label=label)
    loss = fluid.layers.mean(loss)
    return loss


W
WangZhen 已提交
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
def conv_net(img, label):
    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=img,
        filter_size=5,
        num_filters=20,
        pool_size=2,
        pool_stride=2,
        act="relu")
    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
        pool_size=2,
        pool_stride=2,
        act="relu")
    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
    loss = fluid.layers.cross_entropy(input=prediction, label=label)
    avg_loss = fluid.layers.mean(loss)
    return avg_loss


97
class TestQuantizationTransformPass(unittest.TestCase):
W
WangZhen 已提交
98 99 100 101 102 103
    def setUp(self):
        self.quantizable_op_and_inputs = {
            'conv2d': ['Input', 'Filter'],
            'depthwise_conv2d': ['Input', 'Filter'],
            'mul': ['X', 'Y']
        }
104
        self.quantizable_grad_op_inputs = {
W
WangZhen 已提交
105 106 107 108 109
            'conv2d_grad': ['Input', 'Filter'],
            'depthwise_conv2d_grad': ['Input', 'Filter'],
            'mul_grad': ['X', 'Y']
        }

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
    def check_program(self, transform_pass, program):
        quantized_ops = set()
        for block in program.blocks:
            for op in block.ops:
                # check forward
                if op.type in self.quantizable_op_and_inputs:
                    for arg_name in op.input_arg_names:
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        quantized_ops.add(arg_name)

            for op in block.ops:
                # check backward
                if op.type in self.quantizable_grad_op_inputs:
                    for pname in self.quantizable_grad_op_inputs[op.type]:
                        arg_name = op.input(pname)[0]
                        self.assertTrue(
                            arg_name.endswith('.quantized.dequantized'))
                        self.assertTrue(arg_name in quantized_ops)

Z
Zhen Wang 已提交
130
    def linear_fc_quant(self, quant_type, for_ci=False):
W
WangZhen 已提交
131 132 133 134 135 136
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = linear_fc(3)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
137 138
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
139
        graph = IrGraph(core.Graph(main.desc), for_test=False)
140 141
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
142
            place=place,
143 144
            activation_quantize_type=quant_type)
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
145
        if not for_ci:
Z
Zhen Wang 已提交
146 147 148 149 150
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            graph.draw('.', 'quantize_fc_' + quant_type, marked_nodes)
151 152
        program = graph.to_program()
        self.check_program(transform_pass, program)
153
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
154
        if not for_ci:
Z
Zhen Wang 已提交
155 156 157 158 159
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
            val_graph.draw('.', 'val_fc_' + quant_type, val_marked_nodes)
W
WangZhen 已提交
160

161
    def test_linear_fc_quant_abs_max(self):
Z
Zhen Wang 已提交
162
        self.linear_fc_quant('abs_max', for_ci=True)
W
WangZhen 已提交
163

164
    def test_linear_fc_quant_range_abs_max(self):
Z
Zhen Wang 已提交
165
        self.linear_fc_quant('range_abs_max', for_ci=True)
W
WangZhen 已提交
166

167 168 169
    def test_linear_fc_quant_moving_average_abs_max(self):
        self.linear_fc_quant('moving_average_abs_max', for_ci=True)

Z
Zhen Wang 已提交
170
    def residual_block_quant(self, quant_type, for_ci=False):
W
WangZhen 已提交
171 172 173 174 175 176
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            loss = residual_block(2)
            opt = fluid.optimizer.Adam(learning_rate=0.001)
            opt.minimize(loss)
177 178
        place = fluid.CPUPlace()
        exe = fluid.Executor(place)
179
        graph = IrGraph(core.Graph(main.desc), for_test=False)
180 181
        transform_pass = QuantizationTransformPass(
            scope=fluid.global_scope(),
182
            place=place,
183 184
            activation_quantize_type=quant_type)
        transform_pass.apply(graph)
Z
Zhen Wang 已提交
185
        if not for_ci:
Z
Zhen Wang 已提交
186 187 188 189 190
            marked_nodes = set()
            for op in graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            graph.draw('.', 'quantize_residual_' + quant_type, marked_nodes)
191 192
        program = graph.to_program()
        self.check_program(transform_pass, program)
193
        val_graph = IrGraph(core.Graph(program.desc), for_test=False)
Z
Zhen Wang 已提交
194
        if not for_ci:
Z
Zhen Wang 已提交
195 196 197 198 199
            val_marked_nodes = set()
            for op in val_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    val_marked_nodes.add(op)
            val_graph.draw('.', 'val_residual_' + quant_type, val_marked_nodes)
W
WangZhen 已提交
200

201
    def test_residual_block_abs_max(self):
Z
Zhen Wang 已提交
202
        self.residual_block_quant('abs_max', for_ci=True)
W
WangZhen 已提交
203

204
    def test_residual_block_range_abs_max(self):
Z
Zhen Wang 已提交
205
        self.residual_block_quant('range_abs_max', for_ci=True)
W
WangZhen 已提交
206

207 208 209
    def test_residual_block_moving_average_abs_max(self):
        self.residual_block_quant('moving_average_abs_max', for_ci=True)

W
WangZhen 已提交
210

W
WangZhen 已提交
211
class TestQuantizationFreezePass(unittest.TestCase):
Z
Zhen Wang 已提交
212
    def freeze_graph(self, use_cuda, seed, quant_type, for_ci=False):
W
WangZhen 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
            with fluid.unique_name.guard():
                with fluid.program_guard(main, startup):
                    img = fluid.layers.data(
                        name='image', shape=[1, 28, 28], dtype='float32')
                    label = fluid.layers.data(
                        name='label', shape=[1], dtype='int64')
                    loss = conv_net(img, label)
                    if not is_test:
                        opt = fluid.optimizer.Adam(learning_rate=0.001)
                        opt.minimize(loss)
            return [img, label], loss

        random.seed(0)
        np.random.seed(0)

        main = fluid.Program()
        startup = fluid.Program()
        test_program = fluid.Program()
        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
W
WangZhen 已提交
238
        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)
W
WangZhen 已提交
239 240 241

        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        exe = fluid.Executor(place)
W
WangZhen 已提交
242 243 244
        scope = fluid.Scope()
        with fluid.scope_guard(scope):
            exe.run(startup)
W
WangZhen 已提交
245
        transform_pass = QuantizationTransformPass(
246
            scope=scope, place=place, activation_quantize_type=quant_type)
W
WangZhen 已提交
247 248
        transform_pass.apply(main_graph)
        transform_pass.apply(test_graph)
249
        dev_name = '_gpu_' if use_cuda else '_cpu_'
Z
Zhen Wang 已提交
250
        if not for_ci:
Z
Zhen Wang 已提交
251 252 253 254 255 256 257 258 259 260
            marked_nodes = set()
            for op in main_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            main_graph.draw('.', 'main' + dev_name + quant_type, marked_nodes)
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test' + dev_name + quant_type, marked_nodes)
W
WangZhen 已提交
261

Z
Zhen Wang 已提交
262 263 264 265 266
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
            loss_name=loss.name, build_strategy=build_strategy)
267
        quantized_test_program = test_graph.to_program()
268
        iters = 5
269
        batch_size = 8
W
WangZhen 已提交
270 271 272 273 274 275 276 277

        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.mnist.train(), buf_size=500),
            batch_size=batch_size)
        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size)
        feeder = fluid.DataFeeder(feed_list=feeds, place=place)
W
WangZhen 已提交
278
        with fluid.scope_guard(scope):
W
WangZhen 已提交
279 280
            for _ in range(iters):
                data = next(train_reader())
Z
Zhen Wang 已提交
281
                loss_v = exe.run(binary,
282 283
                                 feed=feeder.feed(data),
                                 fetch_list=[loss])
Z
Zhen Wang 已提交
284
                if not for_ci:
Z
Zhen Wang 已提交
285 286
                    print('{}: {}'.format('loss' + dev_name + quant_type,
                                          loss_v))
W
WangZhen 已提交
287

288 289 290 291 292 293 294 295 296 297 298 299 300
        test_data = next(test_reader())
        with fluid.program_guard(quantized_test_program):
            w_var = fluid.framework._get_var('conv2d_1.w_0.quantized',
                                             quantized_test_program)
        # Testing
        with fluid.scope_guard(scope):
            test_loss1, w_quant = exe.run(program=quantized_test_program,
                                          feed=feeder.feed(test_data),
                                          fetch_list=[loss, w_var])

        # Freeze graph for inference, but the weight of fc/conv is still float type.
        freeze_pass = QuantizationFreezePass(scope=scope, place=place)
        freeze_pass.apply(test_graph)
Z
Zhen Wang 已提交
301
        if not for_ci:
Z
Zhen Wang 已提交
302 303 304 305 306 307
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test_freeze' + dev_name + quant_type,
                            marked_nodes)
W
WangZhen 已提交
308

309 310 311 312 313 314
        server_program = test_graph.to_program()
        with fluid.scope_guard(scope):
            test_loss2, = exe.run(program=server_program,
                                  feed=feeder.feed(test_data),
                                  fetch_list=[loss])
        self.assertAlmostEqual(test_loss1, test_loss2, delta=5e-3)
Z
Zhen Wang 已提交
315
        if not for_ci:
Z
Zhen Wang 已提交
316 317 318 319
            print('{}: {}'.format('test_loss1' + dev_name + quant_type,
                                  test_loss1))
            print('{}: {}'.format('test_loss2' + dev_name + quant_type,
                                  test_loss2))
320 321
        w_freeze = np.array(scope.find_var('conv2d_1.w_0').get_tensor())
        # Maybe failed, this is due to the calculation precision
322
        # self.assertAlmostEqual(np.sum(w_freeze), np.sum(w_quant))
Z
Zhen Wang 已提交
323
        if not for_ci:
Z
Zhen Wang 已提交
324 325 326 327
            print('{}: {}'.format('w_freeze' + dev_name + quant_type,
                                  np.sum(w_freeze)))
            print('{}: {}'.format('w_quant' + dev_name + quant_type,
                                  np.sum(w_quant)))
328 329 330 331

        # Convert parameter to 8-bit.
        convert_int8_pass = ConvertToInt8Pass(scope=scope, place=place)
        convert_int8_pass.apply(test_graph)
Z
Zhen Wang 已提交
332
        if not for_ci:
Z
Zhen Wang 已提交
333 334 335 336 337 338
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test_int8' + dev_name + quant_type,
                            marked_nodes)
339 340 341 342 343 344 345 346 347 348 349 350 351
        server_program_int8 = test_graph.to_program()
        # Save the 8-bit parameter and model file.
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model('server_int8' + dev_name + quant_type,
                                          ['image', 'label'], [loss], exe,
                                          server_program_int8)
            # Test whether the 8-bit parameter and model file can be loaded successfully.
            [infer, feed, fetch] = fluid.io.load_inference_model(
                'server_int8' + dev_name + quant_type, exe)
        # Check the loaded 8-bit weight.
        w_8bit = np.array(scope.find_var('conv2d_1.w_0.int8').get_tensor())
        self.assertEqual(w_8bit.dtype, np.int8)
        self.assertEqual(np.sum(w_8bit), np.sum(w_freeze))
Z
Zhen Wang 已提交
352
        if not for_ci:
Z
Zhen Wang 已提交
353 354 355 356
            print('{}: {}'.format('w_8bit' + dev_name + quant_type,
                                  np.sum(w_8bit)))
            print('{}: {}'.format('w_freeze' + dev_name + quant_type,
                                  np.sum(w_freeze)))
357 358 359

        mobile_pass = TransformForMobilePass()
        mobile_pass.apply(test_graph)
Z
Zhen Wang 已提交
360
        if not for_ci:
Z
Zhen Wang 已提交
361 362 363 364 365 366
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test_mobile' + dev_name + quant_type,
                            marked_nodes)
367 368 369 370 371 372

        mobile_program = test_graph.to_program()
        with fluid.scope_guard(scope):
            fluid.io.save_inference_model('mobile_int8' + dev_name + quant_type,
                                          ['image', 'label'], [loss], exe,
                                          mobile_program)
W
WangZhen 已提交
373

374
    def test_freeze_graph_cuda_dynamic(self):
W
WangZhen 已提交
375 376
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
Z
Zhen Wang 已提交
377
                self.freeze_graph(
Z
Zhen Wang 已提交
378
                    True, seed=1, quant_type='abs_max', for_ci=True)
W
WangZhen 已提交
379

380
    def test_freeze_graph_cpu_dynamic(self):
W
WangZhen 已提交
381
        with fluid.unique_name.guard():
Z
Zhen Wang 已提交
382
            self.freeze_graph(False, seed=2, quant_type='abs_max', for_ci=True)
W
WangZhen 已提交
383

384
    def test_freeze_graph_cuda_static(self):
W
WangZhen 已提交
385 386
        if fluid.core.is_compiled_with_cuda():
            with fluid.unique_name.guard():
Z
Zhen Wang 已提交
387
                self.freeze_graph(
Z
Zhen Wang 已提交
388
                    True, seed=1, quant_type='range_abs_max', for_ci=True)
389 390 391 392 393
                self.freeze_graph(
                    True,
                    seed=1,
                    quant_type='moving_average_abs_max',
                    for_ci=True)
W
WangZhen 已提交
394

395
    def test_freeze_graph_cpu_static(self):
W
WangZhen 已提交
396
        with fluid.unique_name.guard():
Z
Zhen Wang 已提交
397
            self.freeze_graph(
Z
Zhen Wang 已提交
398
                False, seed=2, quant_type='range_abs_max', for_ci=True)
399 400
            self.freeze_graph(
                False, seed=2, quant_type='moving_average_abs_max', for_ci=True)
W
WangZhen 已提交
401 402


W
WangZhen 已提交
403 404
if __name__ == '__main__':
    unittest.main()