test_user_defined_quantization.py 11.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   copyright (c) 2020 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
#     http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

15
import json
16
import os
17
import random
18
import tempfile
19 20 21 22
import unittest

import numpy as np

23 24
import paddle
from paddle.fluid.framework import IrGraph
25 26 27 28 29 30 31 32
from paddle.framework import LayerHelper, core
from paddle.static.quantization import (
    AddQuantDequantPass,
    OutScaleForInferencePass,
    OutScaleForTrainingPass,
    QuantizationFreezePass,
    QuantizationTransformPass,
)
33

P
pangyoki 已提交
34 35
paddle.enable_static()

36 37 38 39
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["CPU_NUM"] = "1"


40
def conv_net(img, label):
41
    conv_out_1 = paddle.static.nn.conv2d(
42 43 44
        input=img,
        filter_size=5,
        num_filters=20,
45 46 47 48
        act='relu',
    )
    conv_pool_1 = paddle.nn.functional.max_pool2d(
        conv_out_1, kernel_size=2, stride=2
49
    )
50
    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
51
    conv_out_2 = paddle.static.nn.conv2d(
52 53
        input=conv_pool_1,
        filter_size=5,
54 55
        num_filters=20,
        act='relu',
56
    )
57 58 59 60 61
    conv_pool_2 = paddle.nn.functional.avg_pool2d(
        conv_out_2, kernel_size=2, stride=2
    )
    hidden = paddle.static.nn.fc(conv_pool_2, size=100, activation='relu')
    prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
62 63 64
    loss = paddle.nn.functional.cross_entropy(
        input=prediction, label=label, reduction='none', use_softmax=False
    )
65
    avg_loss = paddle.mean(loss)
66
    return avg_loss
67 68 69 70 71 72


def pact(x, name=None):
    helper = LayerHelper("pact", **locals())
    dtype = 'float32'
    init_thres = 20
73
    u_param_attr = paddle.ParamAttr(
74
        name=x.name + '_pact',
75 76
        initializer=paddle.nn.initializer.Constant(value=init_thres),
        regularizer=paddle.regularizer.L2Decay(0.0001),
77 78
        learning_rate=1,
    )
79
    u_param = helper.create_parameter(attr=u_param_attr, shape=[1], dtype=dtype)
80 81 82 83
    x = paddle.subtract(
        x, paddle.nn.functional.relu(paddle.subtract(x, u_param))
    )
    x = paddle.add(x, paddle.nn.functional.relu(paddle.subtract(-u_param, x)))
84 85 86 87 88

    return x


class TestUserDefinedQuantization(unittest.TestCase):
89 90 91 92 93 94 95 96 97 98 99 100
    def quantization_scale(
        self,
        use_cuda,
        seed,
        activation_quant_type,
        weight_quant_type='abs_max',
        for_ci=False,
        act_preprocess_func=None,
        weight_preprocess_func=None,
        act_quantize_func=None,
        weight_quantize_func=None,
    ):
101 102 103
        def build_program(main, startup, is_test):
            main.random_seed = seed
            startup.random_seed = seed
104 105 106 107
            with paddle.utils.unique_name.guard():
                with paddle.static.program_guard(main, startup):
                    img = paddle.static.data(
                        name='image', shape=[-1, 1, 28, 28], dtype='float32'
108
                    )
109
                    img.stop_gradient = False
110 111
                    label = paddle.static.data(
                        name='label', shape=[-1, 1], dtype='int64'
112
                    )
113
                    loss = conv_net(img, label)
114
                    if not is_test:
115
                        opt = paddle.optimizer.SGD(learning_rate=0.0001)
116 117 118 119
                        opt.minimize(loss)
            return [img, label], loss

        def get_optimizer():
120
            return paddle.optimizer.Momentum(0.0001, 0.9)
121

122 123
        def load_dict(mapping_table_path):
            with open(mapping_table_path, 'r') as file:
124 125 126 127
                data = file.read()
                data = json.loads(data)
                return data

128 129
        def save_dict(Dict, mapping_table_path):
            with open(mapping_table_path, 'w') as file:
130 131
                file.write(json.dumps(Dict))

132 133
        random.seed(0)
        np.random.seed(0)
134 135
        tempdir = tempfile.TemporaryDirectory()
        mapping_table_path = os.path.join(tempdir.name, 'inference')
136

137 138 139
        main = paddle.static.Program()
        startup = paddle.static.Program()
        test_program = paddle.static.Program()
140 141 142 143 144 145
        feeds, loss = build_program(main, startup, False)
        build_program(test_program, startup, True)
        test_program = test_program.clone(for_test=True)
        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
        test_graph = IrGraph(core.Graph(test_program.desc), for_test=True)

146 147 148 149
        place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
        exe = paddle.static.Executor(place)
        scope = paddle.static.global_scope()
        with paddle.static.scope_guard(scope):
150 151 152 153 154 155 156 157 158 159 160
            exe.run(startup)
        train_transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quant_type,
            act_preprocess_func=act_preprocess_func,
            weight_preprocess_func=weight_preprocess_func,
            act_quantize_func=act_quantize_func,
            weight_quantize_func=weight_quantize_func,
            optimizer_func=get_optimizer,
161 162
            executor=exe,
        )
163 164 165 166 167 168 169 170 171 172 173
        train_transform_pass.apply(main_graph)
        test_transform_pass = QuantizationTransformPass(
            scope=scope,
            place=place,
            activation_quantize_type=activation_quant_type,
            weight_quantize_type=weight_quant_type,
            act_preprocess_func=act_preprocess_func,
            weight_preprocess_func=weight_preprocess_func,
            act_quantize_func=act_quantize_func,
            weight_quantize_func=weight_quantize_func,
            optimizer_func=get_optimizer,
174 175
            executor=exe,
        )
176 177

        test_transform_pass.apply(test_graph)
178
        save_dict(test_graph.out_node_mapping_table, mapping_table_path)
179 180 181 182 183 184 185 186 187 188

        add_quant_dequant_pass = AddQuantDequantPass(scope=scope, place=place)
        add_quant_dequant_pass.apply(main_graph)
        add_quant_dequant_pass.apply(test_graph)

        scale_training_pass = OutScaleForTrainingPass(scope=scope, place=place)
        scale_training_pass.apply(main_graph)

        dev_name = '_gpu' if use_cuda else '_cpu'

189
        build_strategy = paddle.static.BuildStrategy()
190 191 192
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False
        build_strategy.fuse_all_reduce_ops = False
193 194 195
        binary = paddle.static.CompiledProgram(
            main_graph.graph
        ).with_data_parallel(loss_name=loss.name, build_strategy=build_strategy)
196 197 198
        iters = 5
        batch_size = 8

199 200 201 202
        train_reader = paddle.batch(
            paddle.reader.shuffle(paddle.dataset.mnist.train(), buf_size=500),
            batch_size=batch_size,
        )
203 204
        feeder = paddle.fluid.DataFeeder(feed_list=feeds, place=place)
        with paddle.static.scope_guard(scope):
205 206
            for _ in range(iters):
                data = next(train_reader())
207 208 209
                loss_v = exe.run(
                    binary, feed=feeder.feed(data), fetch_list=[loss]
                )
210

211 212 213 214 215 216 217 218
        out_scale_infer_pass = OutScaleForInferencePass(scope=scope)
        out_scale_infer_pass.apply(test_graph)

        freeze_pass = QuantizationFreezePass(
            scope=scope,
            place=place,
            weight_bits=8,
            activation_bits=8,
219 220
            weight_quantize_type=weight_quant_type,
        )
221

222
        mapping_table = load_dict(mapping_table_path)
223
        test_graph.out_node_mapping_table = mapping_table
224
        if act_quantize_func is None and weight_quantize_func is None:
225
            freeze_pass.apply(test_graph)
226
        tempdir.cleanup()
227

228
    def test_act_preprocess_cuda(self):
229 230
        if core.is_compiled_with_cuda():
            with paddle.utils.unique_name.guard():
231 232 233 234 235 236
                self.quantization_scale(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True,
237 238
                    act_preprocess_func=pact,
                )
239 240

    def test_act_preprocess_cpu(self):
241
        with paddle.utils.unique_name.guard():
242 243 244 245 246 247
            self.quantization_scale(
                False,
                seed=2,
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True,
248 249
                act_preprocess_func=pact,
            )
250 251

    def test_weight_preprocess_cuda(self):
252 253
        if core.is_compiled_with_cuda():
            with paddle.utils.unique_name.guard():
254 255 256 257 258 259
                self.quantization_scale(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True,
260 261
                    weight_preprocess_func=pact,
                )
262 263

    def test_weight_preprocess_cpu(self):
264
        with paddle.utils.unique_name.guard():
265 266 267 268 269 270
            self.quantization_scale(
                False,
                seed=2,
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True,
271 272
                weight_preprocess_func=pact,
            )
273 274

    def test_act_quantize_cuda(self):
275 276
        if core.is_compiled_with_cuda():
            with paddle.utils.unique_name.guard():
277 278 279 280 281 282
                self.quantization_scale(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True,
283 284
                    act_quantize_func=pact,
                )
285 286

    def test_act_quantize_cpu(self):
287
        with paddle.utils.unique_name.guard():
288 289 290 291 292 293
            self.quantization_scale(
                False,
                seed=2,
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True,
294 295
                act_quantize_func=pact,
            )
296 297

    def test_weight_quantize_cuda(self):
298 299
        if core.is_compiled_with_cuda():
            with paddle.utils.unique_name.guard():
300 301 302 303 304 305
                self.quantization_scale(
                    True,
                    seed=1,
                    activation_quant_type='moving_average_abs_max',
                    weight_quant_type='channel_wise_abs_max',
                    for_ci=True,
306 307
                    weight_quantize_func=pact,
                )
308 309

    def test_weight_quantize_cpu(self):
310
        with paddle.utils.unique_name.guard():
311 312 313 314 315 316
            self.quantization_scale(
                False,
                seed=2,
                activation_quant_type='moving_average_abs_max',
                weight_quant_type='channel_wise_abs_max',
                for_ci=True,
317 318
                weight_quantize_func=pact,
            )
319 320 321 322


if __name__ == '__main__':
    unittest.main()