Add tutorial of QAT for classification (#1716)

100b7e13 · whs · GitHub · 99cd470c · 100b7e13 · 100b7e13
3 changed file
--- a/example/quantization/qat/classification/README.md
+++ b/example/quantization/qat/classification/README.md
+# 动态图量化训练
+本示例介绍如何对动态图模型进行量化训练，示例以常用的MobileNetV1，介绍如何对其进行量化训练。
+## 分类模型的量化训练流程
+### 准备数据
+在当前目录下创建``data``文件夹，将``ImageNet``数据集解压在``data``文件夹下，解压后``data/ILSVRC2012``文件夹下应包含以下文件：
+- ``'train'``文件夹，训练图片
+- ``'train_list.txt'``文件
+- ``'val'``文件夹，验证图片
+- ``'val_list.txt'``文件
+### 准备需要量化的模型
+本示例直接使用[paddle vision](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/vision/models)内置的模型结构和预训练权重。通过以下命令查看支持的所有模型：
+```
+python train.py --help
+```
+### 训练命令
+- MobileNetV1
+  我们使用普通的量化训练方法即可，启动命令如下：
+   ```bash
+  # 单卡训练
+  python train.py --model=mobilenet_v1
+  # 多卡训练，以0到3号卡为例
+  python -m paddle.distributed.launch --gpus="0,1,2,3" train.py --model=mobilenet_v1
+   ```
+### 量化结果
+| 模型        | FP32模型准确率（Top1/Top5） | 量化方法     | 量化模型准确率（Top1/Top5） |
+| ----------- | --------------------------- | ------------ | --------------------------- |
+| MobileNetV1 | 70.99/89.65                 | PACT在线量化 | 70.63/89.65                 |
--- a/example/quantization/qat/classification/args.py
+++ b/example/quantization/qat/classification/args.py
+import argparse
+import six
+from inspect import isfunction
+from types import FunctionType
+from typing import Dict
+import paddle.vision.models as models
+SUPPORT_MODELS: Dict[str, FunctionType] = {}
+for _name, _module in models.__dict__.items():
+    if isfunction(_module) and 'pretrained' in _module.__code__.co_varnames:
+        SUPPORT_MODELS[_name] = _module
+def parse_args():
+    parser = create_argparse()
+    args = parser.parse_args()
+    print("-----------  Configuration Arguments -----------")
+    for arg, value in sorted(six.iteritems(vars(args))):
+        print("%s: %s" % (arg, value))
+    print("------------------------------------------------")
+    return args
+def create_argparse():
+    parser = argparse.ArgumentParser("Quantization on ImageNet")
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=128,
+        help="Single Card Minibatch size.", )
+    parser.add_argument(
+        "--pretrained_model",
+        type=str,
+        default=None,
+        help="Whether to use pretrained model.")
+    parser.add_argument(
+        "--use_gpu",
+        type=bool,
+        default=True,
+        help="Whether to use GPU or not.", )
+    parser.add_argument(
+        "--model", type=str, default="mobilenet_v1", help="The target model.")
+    parser.add_argument(
+        "--lr",
+        type=float,
+        default=0.0001,
+        help="The learning rate used to fine-tune pruned model.")
+    parser.add_argument(
+        "--lr_strategy",
+        type=str,
+        default="piecewise_decay",
+        help="The learning rate decay strategy.")
+    parser.add_argument(
+        "--l2_decay", type=float, default=3e-5, help="The l2_decay parameter.")
+    parser.add_argument(
+        "--ls_epsilon", type=float, default=0.0, help="Label smooth epsilon.")
+    parser.add_argument(
+        "--use_pact",
+        type=bool,
+        default=False,
+        help="Whether to use PACT method.")
+    parser.add_argument(
+        "--ce_test", type=bool, default=False, help="Whether to CE test.")
+    parser.add_argument(
+        "--onnx_format",
+        type=bool,
+        default=False,
+        help="Whether to export the quantized model with format of ONNX.")
+    parser.add_argument(
+        "--momentum_rate",
+        type=float,
+        default=0.9,
+        help="The value of momentum_rate.")
+    parser.add_argument(
+        "--num_epochs",
+        type=int,
+        default=10,
+        help="The number of total epochs.")
+    parser.add_argument(
+        "--total_images",
+        type=int,
+        default=1281167,
+        help="The number of total training images.")
+    parser.add_argument(
+        "--data",
+        type=str,
+        default="imagenet",
+        help="Which data to use. 'cifar10' or 'imagenet'")
+    parser.add_argument(
+        "--log_period", type=int, default=10, help="Log period in batches.")
+    parser.add_argument(
+        "--infer_model",
+        type=str,
+        default="./infer_model/int8_infer",
+        help="inference model saved directory.")
+    parser.add_argument(
+        "--checkpoints",
+        type=str,
+        default="./checkpoints",
+        help="checkpoints directory.")
+    parser.add_argument(
+        "--step_epochs",
+        nargs="+",
+        type=int,
+        default=[10, 20, 30],
+        help="piecewise decay step")
+    return parser
--- a/example/quantization/qat/classification/train.py
+++ b/example/quantization/qat/classification/train.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import logging
+import paddle
+import time
+import random
+import numpy as np
+from paddleslim.common import get_logger
+from paddle.quantization import QuantConfig
+from paddle.quantization.quanters import FakeQuanterWithAbsMaxObserver
+from paddle.quantization.quanters.abs_max import FakeQuanterWithAbsMaxObserverLayer
+from paddleslim.quant.quanters import PACTQuanter
+from paddle.quantization import QAT
+sys.path.append(os.path.join(os.path.dirname("__file__")))
+from optimizer import create_optimizer
+from args import parse_args
+from args import SUPPORT_MODELS
+_logger = get_logger(__name__, level=logging.INFO)
+def compress(args):
+    num_workers = 4
+    shuffle = True
+    if args.ce_test:
+        # set seed
+        seed = 111
+        paddle.seed(seed)
+        np.random.seed(seed)
+        random.seed(seed)
+        num_workers = 0
+        shuffle = False
+    if args.data == "cifar10":
+        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
+        train_dataset = paddle.vision.datasets.Cifar10(
+            mode="train", backend="cv2", transform=transform)
+        val_dataset = paddle.vision.datasets.Cifar10(
+            mode="test", backend="cv2", transform=transform)
+        class_dim = 10
+        image_shape = [3, 32, 32]
+        pretrain = False
+        args.total_images = 50000
+    elif args.data == "imagenet":
+        import imagenet_reader as reader
+        train_dataset = reader.ImageNetDataset(mode='train')
+        val_dataset = reader.ImageNetDataset(mode='val')
+        class_dim = 1000
+        image_shape = "3,224,224"
+    else:
+        raise ValueError("{} is not supported.".format(args.data))
+    trainer_num = paddle.distributed.get_world_size()
+    use_data_parallel = trainer_num != 1
+    place = paddle.set_device('gpu' if args.use_gpu else 'cpu')
+    # model definition
+    if use_data_parallel:
+        paddle.distributed.init_parallel_env()
+    pretrain = True if args.data == "imagenet" else False
+    model = SUPPORT_MODELS[args.model](
+        pretrained=pretrain, num_classes=class_dim)
+    train_batch_sampler = paddle.io.DistributedBatchSampler(
+        train_dataset,
+        batch_size=args.batch_size,
+        shuffle=shuffle,
+        drop_last=True)
+    train_loader = paddle.io.DataLoader(
+        train_dataset,
+        batch_sampler=train_batch_sampler,
+        places=place,
+        return_list=True,
+        num_workers=num_workers)
+    valid_loader = paddle.io.DataLoader(
+        val_dataset,
+        places=place,
+        batch_size=args.batch_size,
+        shuffle=False,
+        drop_last=False,
+        return_list=True,
+        num_workers=num_workers)
+    @paddle.no_grad()
+    def test(epoch, net):
+        net.eval()
+        batch_id = 0
+        acc_top1_ns = []
+        acc_top5_ns = []
+        eval_reader_cost = 0.0
+        eval_run_cost = 0.0
+        total_samples = 0
+        reader_start = time.time()
+        for data in valid_loader():
+            eval_reader_cost += time.time() - reader_start
+            image = data[0]
+            label = data[1]
+            if args.data == "cifar10":
+                label = paddle.reshape(label, [-1, 1])
+            eval_start = time.time()
+            out = net(image)
+            acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
+            acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
+            eval_run_cost += time.time() - eval_start
+            batch_size = image.shape[0]
+            total_samples += batch_size
+            if batch_id % args.log_period == 0:
+                log_period = 1 if batch_id == 0 else args.log_period
+                _logger.info(
+                    "Eval epoch[{}] batch[{}] - top1: {:.6f}; top5: {:.6f}; avg_reader_cost: {:.6f} s, avg_batch_cost: {:.6f} s, avg_samples: {}, avg_ips: {:.3f} images/s".
+                    format(epoch, batch_id,
+                           np.mean(acc_top1.numpy()),
+                           np.mean(acc_top5.numpy()), eval_reader_cost /
+                           log_period, (eval_reader_cost + eval_run_cost) /
+                           log_period, total_samples / log_period, total_samples
+                           / (eval_reader_cost + eval_run_cost)))
+                eval_reader_cost = 0.0
+                eval_run_cost = 0.0
+                total_samples = 0
+            acc_top1_ns.append(np.mean(acc_top1.numpy()))
+            acc_top5_ns.append(np.mean(acc_top5.numpy()))
+            batch_id += 1
+            reader_start = time.time()
+        _logger.info(
+            "Final eval epoch[{}] - acc_top1: {:.6f}; acc_top5: {:.6f}".format(
+                epoch,
+                np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns))))
+        return np.mean(np.array(acc_top1_ns))
+    test(-1, model)
+    ############################################################################################################
+    # 1. quantization
+    ############################################################################################################
+    activation_quanter = PACTQuanter(FakeQuanterWithAbsMaxObserverLayer)
+    weight_quanter = FakeQuanterWithAbsMaxObserver(moving_rate=0.9)
+    q_config = QuantConfig(activation=None, weight=None)
+    q_config.add_type_config(
+        [paddle.nn.Conv2D, paddle.nn.Linear],
+        activation=activation_quanter,
+        weight=weight_quanter)
+    quanter = QAT(config=q_config)
+    quant_model = quanter.quantize(model)
+    opt, lr = create_optimizer(quant_model, trainer_num, args)
+    if use_data_parallel:
+        net = paddle.DataParallel(quant_model)
+    def cross_entropy(input, target, ls_epsilon):
+        if ls_epsilon > 0:
+            if target.shape[-1] != class_dim:
+                target = paddle.nn.functional.one_hot(target, class_dim)
+            target = paddle.nn.functional.label_smooth(
+                target, epsilon=ls_epsilon)
+            target = paddle.reshape(target, shape=[-1, class_dim])
+            input = -paddle.nn.functional.log_softmax(input, axis=-1)
+            cost = paddle.sum(target * input, axis=-1)
+        else:
+            cost = paddle.nn.functional.cross_entropy(input=input, label=target)
+        avg_cost = paddle.mean(cost)
+        return avg_cost
+    def train(epoch, net):
+        net.train()
+        batch_id = 0
+        train_reader_cost = 0.0
+        train_run_cost = 0.0
+        total_samples = 0
+        reader_start = time.time()
+        for data in train_loader():
+            train_reader_cost += time.time() - reader_start
+            image = data[0]
+            label = data[1]
+            if args.data == "cifar10":
+                label = paddle.reshape(label, [-1, 1])
+            train_start = time.time()
+            out = net(image)
+            avg_cost = cross_entropy(out, label, args.ls_epsilon)
+            acc_top1 = paddle.metric.accuracy(input=out, label=label, k=1)
+            acc_top5 = paddle.metric.accuracy(input=out, label=label, k=5)
+            avg_cost.backward()
+            opt.step()
+            opt.clear_grad()
+            lr.step()
+            loss_n = np.mean(avg_cost.numpy())
+            acc_top1_n = np.mean(acc_top1.numpy())
+            acc_top5_n = np.mean(acc_top5.numpy())
+            train_run_cost += time.time() - train_start
+            batch_size = image.shape[0]
+            total_samples += batch_size
+            if batch_id % args.log_period == 0:
+                log_period = 1 if batch_id == 0 else args.log_period
+                _logger.info(
+                    "epoch[{}]-batch[{}] lr: {:.6f} - loss: {:.6f}; top1: {:.6f}; top5: {:.6f}; avg_reader_cost: {:.6f} s, avg_batch_cost: {:.6f} s, avg_samples: {}, avg_ips: {:.3f} images/s".
+                    format(epoch, batch_id,
+                           lr.get_lr(), loss_n, acc_top1_n, acc_top5_n,
+                           train_reader_cost / log_period, (
+                               train_reader_cost + train_run_cost) / log_period,
+                           total_samples / log_period, total_samples / (
+                               train_reader_cost + train_run_cost)))
+                train_reader_cost = 0.0
+                train_run_cost = 0.0
+                total_samples = 0
+            batch_id += 1
+            reader_start = time.time()
+    ############################################################################################################
+    # train loop
+    ############################################################################################################
+    start_epoch = 0
+    ck_info = args.checkpoints + "/checkpoints.info"
+    if not os.path.exists(args.checkpoints):
+        os.makedirs(args.checkpoints)
+    if os.path.isfile(ck_info):
+        with open(ck_info, 'r') as f:
+            start_epoch = int(f.readline()) + 1
+        quant_model.load_dict(
+            paddle.load(f"{args.checkpoints}/{start_epoch-1}.pdparams"))
+        _logger.info(
+            f"Load checkpoint from {args.checkpoints}/{start_epoch-1}.pdparams")
+        test(start_epoch - 1, quant_model)
+    for _epoch in range(start_epoch, args.num_epochs):
+        train(_epoch, quant_model)
+        acc1 = test(_epoch, quant_model)
+        paddle.save(quant_model.state_dict(),
+                    f"{args.checkpoints}/{_epoch}.pdparams")
+        with open(ck_info, 'w') as f:
+            f.write(str(_epoch))
+        _logger.info(f"Save checkpoint to {args.checkpoints}/{_epoch}.pdparams")
+    infer_model = quanter.convert(quant_model)
+    dummy_input = paddle.static.InputSpec(
+        shape=[None, 3, 224, 224], dtype='float32')
+    paddle.jit.save(infer_model, args.infer_model, [dummy_input])
+    _logger.info(f"Saved inference model to {args.infer_model}")
+if __name__ == '__main__':
+    args = parse_args()
+    compress(args)