Merge branch 'develop' into cp_fp16_training

1df66418 · huangxu96 · GitHub · 4ba3d47e · e02a35ac · 1df66418
5 changed file
--- a/deploy/slim/quant/README.md
+++ b/deploy/slim/quant/README.md
+
+## 介绍
+复杂的模型有利于提高模型的性能，但也导致模型中存在一定冗余，模型量化将全精度缩减到定点数减少这种冗余，达到减少模型计算复杂度，提高模型推理性能的目的。
+模型量化可以在基本不损失模型的精度的情况下，将FP32精度的模型参数转换为Int8精度，减小模型参数大小并加速计算，使用量化后的模型在移动端等部署时更具备速度优势。
+
+本教程将介绍如何使用飞桨模型压缩库PaddleSlim做PaddleClas模型的压缩。
+[PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim) 集成了模型剪枝、量化（包括量化训练和离线量化）、蒸馏和神经网络搜索等多种业界常用且领先的模型压缩功能，如果您感兴趣，可以关注并了解。
+
+在开始本教程之前，建议先了解[PaddleClas模型的训练方法](../../../docs/zh_CN/tutorials/quick_start.md)以及[PaddleSlim](https://paddleslim.readthedocs.io/zh_CN/latest/index.html)
+
+
+## 快速开始
+量化多适用于轻量模型在移动端的部署，当训练出一个模型后，如果希望进一步的压缩模型大小并加速预测，可使用量化的方法压缩模型。
+
+模型量化主要包括五个步骤：
+1. 安装 PaddleSlim
+2. 准备训练好的模型
+3. 量化训练
+4. 导出量化推理模型
+5. 量化模型预测部署
+
+### 1. 安装PaddleSlim
+
+* 可以通过pip install的方式进行安装。
+
+```bash
+pip3.7 install paddleslim==2.0.0
+```
+
+* 如果获取PaddleSlim的最新特性，可以从源码安装。
+
+```bash
+git clone https://github.com/PaddlePaddle/PaddleSlim.git
+cd Paddleslim
+python3.7 setup.py install
+```
+
+### 2. 准备训练好的模型
+
+PaddleClas提供了一系列训练好的[模型](../../../docs/zh_CN/models/models_intro.md)，如果待量化的模型不在列表中，需要按照[常规训练](../../../docs/zh_CN/tutorials/getting_started.md)方法得到训练好的模型。
+
+### 3. 量化训练
+量化训练包括离线量化训练和在线量化训练，在线量化训练效果更好，需加载预训练模型，在定义好量化策略后即可对模型进行量化。
+
+
+量化训练的代码位于`deploy/slim/quant/quant.py` 中，训练指令如下：
+
+* CPU/单机单卡启动
+
+```bash
+python3.7 deploy/slim/quant/quant.py \
+    -c configs/MobileNetV3/MobileNetV3_large_x1_0.yaml \
+    -o pretrained_model="./MobileNetV3_large_x1_0_pretrained"
+```
+
+* 单机单卡/单机多卡/多机多卡启动
+
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+python3.7 -m paddle.distributed.launch \
+    --gpus="0,1,2,3,4,5,6,7" \
+    deploy/slim/quant/quant.py \
+        -c configs/MobileNetV3/MobileNetV3_large_x1_0.yaml \
+        -o pretrained_model="./MobileNetV3_large_x1_0_pretrained"
+```
+
+
+* 下面是量化`MobileNetV3_large_x1_0`模型的训练示例脚本。
+
+```bash
+# 下载预训练模型
+wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_pretrained.pdparams
+# 启动训练，这里如果因为显存限制，batch size无法设置过大，可以将batch size和learning rate同比例缩小。
+python3.7 -m paddle.distributed.launch \
+    --gpus="0,1,2,3,4,5,6,7" \
+    deploy/slim/quant/quant.py \
+        -c configs/MobileNetV3/MobileNetV3_large_x1_0.yaml \
+        -o pretrained_model="./MobileNetV3_large_x1_0_pretrained"
+        -o LEARNING_RATE.params.lr=0.13 \
+        -o epochs=100
+```
+如果要训练识别模型的量化，修改配置文件和加载的模型参数即可。
+
+### 4. 导出模型
+
+在得到量化训练保存的模型后，可以将其导出为inference model，用于预测部署：
+
+```bash
+python3.7 deploy/slim/quant/export_model.py \
+    -m MobileNetV3_large_x1_0 \
+    -p output/MobileNetV3_large_x1_0/best_model/ppcls \
+    -o ./MobileNetV3_large_x1_0_infer/ \
+    --img_size=224 \
+    --class_dim=1000
+```
+
+
+### 5. 量化模型部署
+
+上述步骤导出的量化模型，参数精度仍然是FP32，但是参数的数值范围是int8，导出的模型可以通过PaddleLite的opt模型转换工具完成模型转换。
+量化模型部署的可参考 [移动端模型部署](../../lite/readme.md)
+
+
+## 量化训练超参数建议
+
+* 量化训练时，建议加载常规训练得到的预训练模型，加速量化训练收敛。
+* 量化训练时，建议初始学习率修改为常规训练的`1/20~1/10`，同时将训练epoch数修改为常规训练的`1/5~1/2`，其他配置信息不建议修改。
--- a/deploy/slim/quant/export_model.py
+++ b/deploy/slim/quant/export_model.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..')))
+sys.path.append(
+    os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools')))
+
+from ppcls.modeling import architectures
+from ppcls.utils.save_load import load_dygraph_pretrain
+import paddle
+import paddle.nn.functional as F
+from paddle.jit import to_static
+from paddleslim.dygraph.quant import QAT
+
+from pact_helper import get_default_quant_config
+
+
+def parse_args():
+    def str2bool(v):
+        return v.lower() in ("true", "t", "1")
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-m", "--model", type=str)
+    parser.add_argument("-p", "--pretrained_model", type=str)
+    parser.add_argument("-o", "--output_path", type=str, default="./inference")
+    parser.add_argument("--class_dim", type=int, default=1000)
+    parser.add_argument("--load_static_weights", type=str2bool, default=False)
+    parser.add_argument("--img_size", type=int, default=224)
+
+    return parser.parse_args()
+
+
+class Net(paddle.nn.Layer):
+    def __init__(self, net, class_dim, model=None):
+        super(Net, self).__init__()
+        self.pre_net = net(class_dim=class_dim)
+        self.model = model
+
+    def forward(self, inputs):
+        x = self.pre_net(inputs)
+        if self.model == "GoogLeNet":
+            x = x[0]
+        x = F.softmax(x)
+        return x
+
+
+def main():
+    args = parse_args()
+
+    net = architectures.__dict__[args.model]
+    model = Net(net, args.class_dim, args.model)
+
+    # get QAT model
+    quant_config = get_default_quant_config()
+    # TODO(littletomatodonkey): add PACT for export model
+    # quant_config["activation_preprocess_type"] = "PACT"
+    quanter = QAT(config=quant_config)
+    quanter.quantize(model)
+
+    load_dygraph_pretrain(
+        model.pre_net,
+        path=args.pretrained_model,
+        load_static_weights=args.load_static_weights)
+    model.eval()
+
+    save_path = os.path.join(args.output_path, "inference")
+    quanter.save_quantized_model(
+        model,
+        save_path,
+        input_spec=[
+            paddle.static.InputSpec(
+                shape=[None, 3, args.img_size, args.img_size], dtype='float32')
+        ])
+    print('inference QAT model is saved to {}'.format(save_path))
+
+
+if __name__ == "__main__":
+    main()
--- a/deploy/slim/quant/pact_helper.py
+++ b/deploy/slim/quant/pact_helper.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+
+
+def get_default_quant_config():
+    quant_config = {
+        # weight preprocess type, default is None and no preprocessing is performed. 
+        'weight_preprocess_type': None,
+        # activation preprocess type, default is None and no preprocessing is performed.
+        'activation_preprocess_type': None,
+        # weight quantize type, default is 'channel_wise_abs_max'
+        'weight_quantize_type': 'channel_wise_abs_max',
+        # activation quantize type, default is 'moving_average_abs_max'
+        'activation_quantize_type': 'moving_average_abs_max',
+        # weight quantize bit num, default is 8
+        'weight_bits': 8,
+        # activation quantize bit num, default is 8
+        'activation_bits': 8,
+        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
+        'dtype': 'int8',
+        # window size for 'range_abs_max' quantization. default is 10000
+        'window_size': 10000,
+        # The decay coefficient of moving average, default is 0.9
+        'moving_rate': 0.9,
+        # for dygraph quantization, layers of type in quantizable_layer_type will be quantized
+        'quantizable_layer_type': ['Conv2D', 'Linear'],
+    }
+    return quant_config
--- a/deploy/slim/quant/quant.py
+++ b/deploy/slim/quant/quant.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import os
+import sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append(os.path.abspath(os.path.join(__dir__, '..', '..', '..')))
+sys.path.append(
+    os.path.abspath(os.path.join(__dir__, '..', '..', '..', 'tools')))
+
+import paddle
+from paddleslim.dygraph.quant import QAT
+
+from ppcls.data import Reader
+from ppcls.utils.config import get_config
+from ppcls.utils.save_load import init_model, save_model
+from ppcls.utils import logger
+import program
+
+from pact_helper import get_default_quant_config
+
+
+def parse_args():
+    parser = argparse.ArgumentParser("PaddleClas train script")
+    parser.add_argument(
+        '-c',
+        '--config',
+        type=str,
+        default='configs/ResNet/ResNet50.yaml',
+        help='config file path')
+    parser.add_argument(
+        '-o',
+        '--override',
+        action='append',
+        default=[],
+        help='config options to be overridden')
+    args = parser.parse_args()
+    return args
+
+
+def main(args):
+    paddle.seed(12345)
+
+    config = get_config(args.config, overrides=args.override, show=True)
+    # assign the place
+    use_gpu = config.get("use_gpu", True)
+    place = paddle.set_device('gpu' if use_gpu else 'cpu')
+
+    trainer_num = paddle.distributed.get_world_size()
+    use_data_parallel = trainer_num != 1
+    config["use_data_parallel"] = use_data_parallel
+
+    if config["use_data_parallel"]:
+        paddle.distributed.init_parallel_env()
+
+    net = program.create_model(config.ARCHITECTURE, config.classes_num)
+
+    # prepare to quant
+    quant_config = get_default_quant_config()
+    quant_config["activation_preprocess_type"] = "PACT"
+    quanter = QAT(config=quant_config)
+    quanter.quantize(net)
+
+    optimizer, lr_scheduler = program.create_optimizer(
+        config, parameter_list=net.parameters())
+
+    init_model(config, net, optimizer)
+
+    if config["use_data_parallel"]:
+        net = paddle.DataParallel(net)
+
+    train_dataloader = Reader(config, 'train', places=place)()
+
+    if config.validate:
+        valid_dataloader = Reader(config, 'valid', places=place)()
+
+    last_epoch_id = config.get("last_epoch", -1)
+    best_top1_acc = 0.0  # best top1 acc record
+    best_top1_epoch = last_epoch_id
+    for epoch_id in range(last_epoch_id + 1, config.epochs):
+        net.train()
+        # 1. train with train dataset
+        program.run(train_dataloader, config, net, optimizer, lr_scheduler,
+                    epoch_id, 'train')
+
+        # 2. validate with validate dataset
+        if config.validate and epoch_id % config.valid_interval == 0:
+            net.eval()
+            with paddle.no_grad():
+                top1_acc = program.run(valid_dataloader, config, net, None,
+                                       None, epoch_id, 'valid')
+            if top1_acc > best_top1_acc:
+                best_top1_acc = top1_acc
+                best_top1_epoch = epoch_id
+                model_path = os.path.join(config.model_save_dir,
+                                          config.ARCHITECTURE["name"])
+                save_model(net, optimizer, model_path, "best_model")
+            message = "The best top1 acc {:.5f}, in epoch: {:d}".format(
+                best_top1_acc, best_top1_epoch)
+            logger.info(message)
+
+        # 3. save the persistable model
+        if epoch_id % config.save_interval == 0:
+            model_path = os.path.join(config.model_save_dir,
+                                      config.ARCHITECTURE["name"])
+            save_model(net, optimizer, model_path, epoch_id)
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
--- a/ppcls/utils/save_load.py
+++ b/ppcls/utils/save_load.py
@@ -105,7 +105,7 @@ def init_model(config, net, optimizer=None):
    load model from checkpoint or pretrained_model
    """
    checkpoints = config.get('checkpoints')
-    if checkpoints:
+    if checkpoints and optimizer is not None:
        assert os.path.exists(checkpoints + ".pdparams"), \
            "Given dir {}.pdparams not exist.".format(checkpoints)
        assert os.path.exists(checkpoints + ".pdopt"), \
@@ -114,7 +114,7 @@ def init_model(config, net, optimizer=None):
        opti_dict = paddle.load(checkpoints + ".pdopt")
        net.set_dict(para_dict)
        optimizer.set_state_dict(opti_dict)
-        logger.info("Finish initing model from {}".format(checkpoints))
+        logger.info("Finish load checkpoints from {}".format(checkpoints))
        return

    pretrained_model = config.get('pretrained_model')
@@ -129,7 +129,7 @@ def init_model(config, net, optimizer=None):
                path=pretrained_model,
                load_static_weights=load_static_weights)
            logger.info(
-                logger.coloring("Finish initing model from {}".format(
+                logger.coloring("Finish load pretrained model from {}".format(
                    pretrained_model), "HEADER"))