test_imperative_ptq.py 13.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
#
# licensed under the apache license, version 2.0 (the "license");
# you may not use this file except in compliance with the license.
# you may obtain a copy of the license at
#
#     http://www.apache.org/licenses/license-2.0
#
# unless required by applicable law or agreed to in writing, software
# distributed under the license is distributed on an "as is" basis,
# without warranties or conditions of any kind, either express or implied.
# see the license for the specific language governing permissions and
# limitations under the license.

from __future__ import print_function

import os
import numpy as np
import random
import shutil
import time
import unittest
23
import copy
24
import logging
25
import tempfile
26

X
XGZhang 已提交
27
import paddle.nn as nn
28 29 30 31 32
import paddle
import paddle.fluid as fluid
from paddle.fluid.contrib.slim.quantization import *
from paddle.fluid.log_helper import get_logger
from paddle.dataset.common import download
J
Jiabin Yang 已提交
33
from paddle.fluid.framework import _test_eager_guard
34

X
XGZhang 已提交
35 36
from imperative_test_utils import fix_model_dict, ImperativeLenet, ImperativeLinearBn
from imperative_test_utils import ImperativeLinearBn_hook
37 38 39 40 41

_logger = get_logger(
    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')


X
XGZhang 已提交
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
class TestFuseLinearBn(unittest.TestCase):
    """
    Fuse the linear and bn layers, and then quantize the model.
    """

    def test_fuse(self):
        model = ImperativeLinearBn()
        model_h = ImperativeLinearBn_hook()
        inputs = paddle.randn((3, 10), dtype="float32")
        config = PTQConfig(AbsmaxQuantizer(), AbsmaxQuantizer())
        ptq = ImperativePTQ(config)
        f_l = [['linear', 'bn']]
        quant_model = ptq.quantize(model, fuse=True, fuse_list=f_l)
        quant_h = ptq.quantize(model_h, fuse=True, fuse_list=f_l)
        for name, layer in quant_model.named_sublayers():
            if name in f_l:
                assert not (isinstance(layer, nn.BatchNorm1D) or
                            isinstance(layer, nn.BatchNorm2D))
        out = model(inputs)
        out_h = model_h(inputs)
        out_quant = quant_model(inputs)
        out_quant_h = quant_h(inputs)
        cos_sim_func = nn.CosineSimilarity(axis=0)
        print('fuse linear+bn',
              cos_sim_func(out.flatten(), out_quant.flatten()))
        print(cos_sim_func(out_h.flatten(), out_quant_h.flatten()))


70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
class TestImperativePTQ(unittest.TestCase):
    """
    """

    @classmethod
    def setUpClass(cls):
        cls.download_path = 'dygraph_int8/download'
        cls.cache_folder = os.path.expanduser('~/.cache/paddle/dataset/' +
                                              cls.download_path)

        cls.lenet_url = "https://paddle-inference-dist.cdn.bcebos.com/int8/unittest_model_data/lenet_pretrained.tar.gz"
        cls.lenet_md5 = "953b802fb73b52fae42896e3c24f0afb"

        seed = 1
        np.random.seed(seed)
        paddle.static.default_main_program().random_seed = seed
        paddle.static.default_startup_program().random_seed = seed

    def cache_unzipping(self, target_folder, zip_path):
        if not os.path.exists(target_folder):
            cmd = 'mkdir {0} && tar xf {1} -C {0}'.format(target_folder,
                                                          zip_path)
            os.system(cmd)

    def download_model(self, data_url, data_md5, folder_name):
        download(data_url, self.download_path, data_md5)
        file_name = data_url.split('/')[-1]
        zip_path = os.path.join(self.cache_folder, file_name)
        print('Data is downloaded at {0}'.format(zip_path))

        data_cache_folder = os.path.join(self.cache_folder, folder_name)
        self.cache_unzipping(data_cache_folder, zip_path)
        return data_cache_folder

    def set_vars(self):
105 106
        config = PTQConfig(AbsmaxQuantizer(), AbsmaxQuantizer())
        self.ptq = ImperativePTQ(config)
107 108 109

        self.batch_num = 10
        self.batch_size = 10
110
        self.eval_acc_top1 = 0.95
111

112
        # the input, output and weight thresholds of quantized op
113 114 115 116 117
        self.gt_thresholds = {
            'conv2d_0': [[1.0], [0.37673383951187134], [0.10933732241392136]],
            'batch_norm2d_0': [[0.37673383951187134], [0.44249194860458374]],
            're_lu_0': [[0.44249194860458374], [0.25804123282432556]],
            'max_pool2d_0': [[0.25804123282432556], [0.25804123282432556]],
118 119
            'linear_0': [[1.7058950662612915], [14.405526161193848],
                         [0.4373355209827423]],
120 121 122 123 124 125 126 127 128 129 130 131 132
            'add_0': [[1.7058950662612915, 0.0], [1.7058950662612915]],
        }

    def model_test(self, model, batch_num=-1, batch_size=8):
        model.eval()

        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size)

        eval_acc_top1_list = []
        for batch_id, data in enumerate(test_reader()):
            x_data = np.array([x[0].reshape(1, 28, 28)
                               for x in data]).astype('float32')
133 134
            y_data = np.array([x[1]
                               for x in data]).astype('int64').reshape(-1, 1)
135 136 137 138 139 140 141

            img = paddle.to_tensor(x_data)
            label = paddle.to_tensor(y_data)

            out = model(img)
            acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
            acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
142
            eval_acc_top1_list.append(float(acc_top1.numpy()))
143

144
            if batch_id % 50 == 0:
145 146 147 148 149 150 151 152 153 154
                _logger.info("Test | At step {}: acc1 = {:}, acc5 = {:}".format(
                    batch_id, acc_top1.numpy(), acc_top5.numpy()))

            if batch_num > 0 and batch_id + 1 >= batch_num:
                break

        eval_acc_top1 = sum(eval_acc_top1_list) / len(eval_acc_top1_list)

        return eval_acc_top1

155 156
    def program_test(self, program_path, batch_num=-1, batch_size=8):
        exe = paddle.static.Executor(paddle.CPUPlace())
157 158
        [inference_program, feed_target_names, fetch_targets
         ] = (paddle.static.load_inference_model(program_path, exe))
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185

        test_reader = paddle.batch(
            paddle.dataset.mnist.test(), batch_size=batch_size)

        top1_correct_num = 0.
        total_num = 0.
        for batch_id, data in enumerate(test_reader()):
            img = np.array([x[0].reshape(1, 28, 28)
                            for x in data]).astype('float32')
            label = np.array([x[1] for x in data]).astype('int64')

            feed = {feed_target_names[0]: img}
            results = exe.run(inference_program,
                              feed=feed,
                              fetch_list=fetch_targets)

            pred = np.argmax(results[0], axis=1)
            top1_correct_num += np.sum(np.equal(pred, label))
            total_num += len(img)

            if total_num % 50 == 49:
                _logger.info("Test | Test num {}: acc1 = {:}".format(
                    total_num, top1_correct_num / total_num))

            if batch_num > 0 and batch_id + 1 >= batch_num:
                break
        return top1_correct_num / total_num
186

J
Jiabin Yang 已提交
187
    def func_ptq(self):
188 189 190 191
        start_time = time.time()

        self.set_vars()

192
        # Load model
193 194 195 196
        params_path = self.download_model(self.lenet_url, self.lenet_md5,
                                          "lenet")
        params_path += "/lenet_pretrained/lenet.pdparams"

197 198 199 200 201 202 203
        model = ImperativeLenet()
        model_state_dict = paddle.load(params_path)
        model.set_state_dict(model_state_dict)
        # Quantize, calibrate and save
        quant_model = self.ptq.quantize(model)
        before_acc_top1 = self.model_test(quant_model, self.batch_num,
                                          self.batch_size)
204 205 206 207 208

        input_spec = [
            paddle.static.InputSpec(
                shape=[None, 1, 28, 28], dtype='float32')
        ]
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
        with tempfile.TemporaryDirectory(prefix="imperative_ptq_") as tmpdir:
            save_path = os.path.join(tmpdir, "model")
            self.ptq.save_quantized_model(
                model=quant_model, path=save_path, input_spec=input_spec)
            print('Quantized model saved in {%s}' % save_path)

            after_acc_top1 = self.model_test(quant_model, self.batch_num,
                                             self.batch_size)

            paddle.enable_static()
            infer_acc_top1 = self.program_test(save_path, self.batch_num,
                                               self.batch_size)
            paddle.disable_static()

            # Check
            print('Before converted acc_top1: %s' % before_acc_top1)
            print('After converted acc_top1: %s' % after_acc_top1)
            print('Infer acc_top1: %s' % infer_acc_top1)

            self.assertTrue(
                after_acc_top1 >= self.eval_acc_top1,
                msg="The test acc {%f} is less than {%f}." %
                (after_acc_top1, self.eval_acc_top1))
            self.assertTrue(
                infer_acc_top1 >= after_acc_top1,
                msg='The acc is lower after converting model.')

            end_time = time.time()
            print("total time: %ss \n" % (end_time - start_time))
X
XGZhang 已提交
238

J
Jiabin Yang 已提交
239 240 241 242 243
    def test_ptq(self):
        with _test_eager_guard():
            self.func_ptq()
        self.func_ptq()

X
XGZhang 已提交
244 245

class TestImperativePTQfuse(TestImperativePTQ):
J
Jiabin Yang 已提交
246
    def func_ptq(self):
X
XGZhang 已提交
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
        start_time = time.time()

        self.set_vars()

        # Load model
        params_path = self.download_model(self.lenet_url, self.lenet_md5,
                                          "lenet")
        params_path += "/lenet_pretrained/lenet.pdparams"

        model = ImperativeLenet()
        model_state_dict = paddle.load(params_path)
        model.set_state_dict(model_state_dict)
        # Quantize, calibrate and save
        f_l = [['features.0', 'features.1'], ['features.4', 'features.5']]
        quant_model = self.ptq.quantize(model, fuse=True, fuse_list=f_l)
        for name, layer in quant_model.named_sublayers():
            if name in f_l:
                assert not (isinstance(layer, nn.BatchNorm1D) or
                            isinstance(layer, nn.BatchNorm2D))
        before_acc_top1 = self.model_test(quant_model, self.batch_num,
                                          self.batch_size)

        input_spec = [
            paddle.static.InputSpec(
                shape=[None, 1, 28, 28], dtype='float32')
        ]
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
        with tempfile.TemporaryDirectory(prefix="imperative_ptq_") as tmpdir:
            save_path = os.path.join(tmpdir, "model")
            self.ptq.save_quantized_model(
                model=quant_model, path=save_path, input_spec=input_spec)
            print('Quantized model saved in {%s}' % save_path)

            after_acc_top1 = self.model_test(quant_model, self.batch_num,
                                             self.batch_size)

            paddle.enable_static()
            infer_acc_top1 = self.program_test(save_path, self.batch_num,
                                               self.batch_size)
            paddle.disable_static()

            # Check
            print('Before converted acc_top1: %s' % before_acc_top1)
            print('After converted acc_top1: %s' % after_acc_top1)
            print('Infer acc_top1: %s' % infer_acc_top1)

            #Check whether the quant_model is correct after converting.
            #The acc of quantized model should be higher than 0.95.
            self.assertTrue(
                after_acc_top1 >= self.eval_acc_top1,
                msg="The test acc {%f} is less than {%f}." %
                (after_acc_top1, self.eval_acc_top1))
            #Check the saved infer_model.The acc of infer model
            #should not be lower than the one of dygraph model.
            self.assertTrue(
                infer_acc_top1 >= after_acc_top1,
                msg='The acc is lower after converting model.')

            end_time = time.time()
            print("total time: %ss \n" % (end_time - start_time))
306

J
Jiabin Yang 已提交
307 308 309 310 311
    def test_ptq(self):
        with _test_eager_guard():
            self.func_ptq()
        self.func_ptq()

312 313 314 315 316 317 318 319

class TestImperativePTQHist(TestImperativePTQ):
    def set_vars(self):
        config = PTQConfig(HistQuantizer(), AbsmaxQuantizer())
        self.ptq = ImperativePTQ(config)

        self.batch_num = 10
        self.batch_size = 10
320
        self.eval_acc_top1 = 0.98
321 322

        self.gt_thresholds = {
323 324
            'conv2d_0': [[0.99853515625], [0.35732391771364225],
                         [0.10933732241392136]],
325 326 327
            'batch_norm2d_0': [[0.35732391771364225], [0.4291427868761275]],
            're_lu_0': [[0.4291427868761275], [0.2359918110742001]],
            'max_pool2d_0': [[0.2359918110742001], [0.25665526917146053]],
328 329
            'linear_0': [[1.7037603475152991], [14.395224522473026],
                         [0.4373355209827423]],
330 331 332 333 334 335 336 337 338 339 340
            'add_0': [[1.7037603475152991, 0.0], [1.7037603475152991]],
        }


class TestImperativePTQKL(TestImperativePTQ):
    def set_vars(self):
        config = PTQConfig(KLQuantizer(), PerChannelAbsmaxQuantizer())
        self.ptq = ImperativePTQ(config)

        self.batch_num = 10
        self.batch_size = 10
341
        self.eval_acc_top1 = 1.0
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360

        conv2d_1_wt_thresholds = [
            0.18116560578346252, 0.17079241573810577, 0.1702047884464264,
            0.179476797580719, 0.1454375684261322, 0.22981858253479004
        ]
        self.gt_thresholds = {
            'conv2d_0': [[0.99267578125], [0.37695913558696836]],
            'conv2d_1': [[0.19189296757394914], [0.24514256547263358],
                         [conv2d_1_wt_thresholds]],
            'batch_norm2d_0': [[0.37695913558696836], [0.27462541429440535]],
            're_lu_0': [[0.27462541429440535], [0.19189296757394914]],
            'max_pool2d_0': [[0.19189296757394914], [0.19189296757394914]],
            'linear_0': [[1.2839322163611087], [8.957185942414352]],
            'add_0': [[1.2839322163611087, 0.0], [1.2839322163611087]],
        }


if __name__ == '__main__':
    unittest.main()