test_quantization_scale_pass.py 7.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
#     http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

import os
import random
17
import tempfile
18 19 20 21
import unittest

import numpy as np

22 23
import paddle
from paddle.fluid.framework import IrGraph
24 25 26 27 28 29 30 31
from paddle.framework import core
from paddle.static.quantization import (
    AddQuantDequantPass,
    OutScaleForInferencePass,
    OutScaleForTrainingPass,
    QuantizationFreezePass,
    QuantizationTransformPass,
)
32

P
pangyoki 已提交
33 34
paddle.enable_static()

35 36 37 38
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"


39
def conv_net(img, label):
40
    conv_out_1 = paddle.static.nn.conv2d(
41 42 43
        input=img,
        filter_size=5,
        num_filters=20,
44 45 46 47
        act='relu',
    )
    conv_pool_1 = paddle.nn.functional.max_pool2d(
        conv_out_1, kernel_size=2, stride=2
48
    )
49
    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
50
    conv_out_2 = paddle.static.nn.conv2d(
51 52
        input=conv_pool_1,
        filter_size=5,
53 54
        num_filters=20,
        act='relu',
55
    )
56 57 58 59 60
    conv_pool_2 = paddle.nn.functional.avg_pool2d(
        conv_out_2, kernel_size=2, stride=2
    )
    hidden = paddle.static.nn.fc(conv_pool_2, size=100, activation='relu')
    prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
61 62 63
    loss = paddle.nn.functional.cross_entropy(
        input=prediction, label=label, reduction='none', use_softmax=False
    )
64
    avg_loss = paddle.mean(loss)
65
    return avg_loss
66 67 68


class TestQuantizationScalePass(unittest.TestCase):
69 70 71 72 73 74 75 76
    def quantization_scale(
        self,
        use_cuda,
        seed,
        activation_quant_type,
        weight_quant_type='abs_max',
        for_ci=False,
    ):
77 78 79
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
80 81 82 83
            with paddle.utils.unique_name.guard():
                with paddle.static.program_guard(main, startup):
                    img = paddle.static.data(
                        name='image', shape=[-1, 1, 28, 28], dtype='float32'
84
                    )
85 86
                    label = paddle.static.data(
                        name='label', shape=[-1, 1], dtype='int64'
87
                    )
88
                    loss = conv_net(img, label)
89
                    if not is_test:
90
                        opt = paddle.optimizer.Adam(learning_rate=0.0001)
91 92 93 94 95 96
                        opt.minimize(loss)
            return [img, label], loss

        random.seed(0)
        np.random.seed(0)

97 98 99
        main = paddle.static.Program()
        startup = paddle.static.Program()
        test_program = paddle.static.Program()
100 101 102 103 104 105
        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)

106 107 108 109
        place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        scope = paddle.static.global_scope()
        with paddle.static.scope_guard(scope):
110
            exe.run(startup)
111

112 113 114 115
        transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
116 117
            weight_quantize_type=weight_quant_type,
        )
118 119
        transform_pass.apply(main_graph)
        transform_pass.apply(test_graph)
120 121 122 123 124

        add_quant_dequant_pass = AddQuantDequantPass(scope=scope, place=place)
        add_quant_dequant_pass.apply(main_graph)
        add_quant_dequant_pass.apply(test_graph)

125
        scale_training_pass = OutScaleForTrainingPass(scope=scope, place=place)
126
        scale_training_pass.apply(main_graph)
127

128 129 130 131 132 133 134 135 136 137 138 139 140
        dev_name = '_gpu' if use_cuda else '_cpu'
        if not for_ci:
            marked_nodes = set()
            for op in main_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            main_graph.draw('.', 'main_scale' + dev_name, marked_nodes)
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'test_scale' + dev_name, marked_nodes)

141
        build_strategy = paddle.static.BuildStrategy()
142 143
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
144
        build_strategy.fuse_all_reduce_ops = False
145
        binary = paddle.static.CompiledProgram(
146 147
            main_graph.graph, build_strategy=build_strategy
        )
148 149 150
        iters = 5
        batch_size = 8

151 152 153 154
        train_reader = paddle.batch(
            paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500),
            batch_size=batch_size,
        )
155 156
        feeder = paddle.fluid.DataFeeder(feed_list=feeds, place=place)
        with paddle.static.scope_guard(scope):
157 158
            for _ in range(iters):
                data = next(train_reader())
159 160 161
                loss_v = exe.run(
                    binary, feed=feeder.feed(data), fetch_list=[loss]
                )
162 163 164
                if not for_ci:
                    print('{}: {}'.format('loss' + dev_name, loss_v))

165
        scale_inference_pass = OutScaleForInferencePass(scope=scope)
166 167 168 169
        scale_inference_pass.apply(test_graph)

        # Freeze graph for inference, but the weight of fc/conv is still float type.
        freeze_pass = QuantizationFreezePass(
170 171
            scope=scope, place=place, weight_quantize_type=weight_quant_type
        )
172 173 174 175 176 177 178 179 180 181
        freeze_pass.apply(test_graph)
        server_program = test_graph.to_program()

        if not for_ci:
            marked_nodes = set()
            for op in test_graph.all_op_nodes():
                if op.name().find('quantize') > -1:
                    marked_nodes.add(op)
            test_graph.draw('.', 'quant_scale' + dev_name, marked_nodes)

182 183
        tempdir = tempfile.TemporaryDirectory()
        mapping_table_path = os.path.join(
184 185
            tempdir.name, 'quant_scale_model' + dev_name + '.txt'
        )
186 187
        save_path = os.path.join(tempdir.name, 'quant_scale_model' + dev_name)
        with open(mapping_table_path, 'w') as f:
188 189
            f.write(str(server_program))

190 191 192 193 194 195
        with paddle.static.scope_guard(scope):
            feed_list = ['image', 'label']
            feed_vars = [
                server_program.global_block().var(name) for name in feed_list
            ]
            paddle.static.save_inference_model(
196
                save_path,
197
                feed_vars,
198 199
                [loss],
                exe,
200
                program=server_program,
201 202
                clip_extra=True,
            )
203
        tempdir.cleanup()
204 205

    def test_quant_scale_cuda(self):
206 207
        if core.is_compiled_with_cuda():
            with paddle.utils.unique_name.guard():
208 209 210 211 212
                self.quantization_scale(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='channel_wise_abs_max',
213 214
                    for_ci=True,
                )
215 216

    def test_quant_scale_cpu(self):
217
        with paddle.utils.unique_name.guard():
218 219 220 221 222
            self.quantization_scale(
                False,
                seed=2,
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='channel_wise_abs_max',
223 224
                for_ci=True,
            )
225 226 227 228


if __name__ == '__main__':
    unittest.main()