add ptq demo and ptq api docs (#894)

e56acbf0 · Guanghua Yu · GitHub · 4ec35251 · e56acbf0 · e56acbf0
9 changed file
--- a/demo/dygraph/post_quant/README.md
+++ b/demo/dygraph/post_quant/README.md
+# 动态图离线量化
+
+本示例介绍如何对动态图模型进行离线量化，示例以常用的MobileNetV1和MobileNetV3模型为例，介绍如何对其进行离线量化。
+
+
+## 分类模型的离线量化流程
+
+#### 准备数据
+
+在当前目录下创建``data``文件夹，将``ImageNet``的验证集解压在``data``文件夹下，解压后``data/ILSVRC2012``文件夹下应包含以下文件：
+- ``'val'``文件夹，验证图片
+- ``'val_list.txt'``文件
+
+#### 准备需要离线量化的模型
+
+- 对于paddle vision支持的[模型](https://github.com/PaddlePaddle/Paddle/tree/develop/python/paddle/vision/models)：`[lenet, mobilenetv1, mobilenetv2, resnet, vgg]`可以直接使用vision内置的模型定义和ImageNet预训练权重
+- 对于paddle vision暂未支持的模型，例如mobilenetv3，需要自行定义好模型结构以及准备相应的预训练权重
+  - 本示例使用的是的mobilenetv3模型，在ImageNet数据集上Top1精度达到75.0: [预训练权重下载](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_pretrained.pdparams)
+
+
+#### 初始化离线量化接口
+
+- 保持默认离线量化配置
+```python
+ptq = PTQ()
+```
+
+- 如mobilenetv3等模型，需修改默认离线量化配置，按照如下方式修改：
+```python
+ptq_config = {'activation_quantizer': 'HistQuantizer', 'upsample_bins': 127, 'hist_percent': 0.999}
+ptq = PTQ(**ptq_config)
+```
+
+
+#### 得到离线量化模型
+
+通过ptq.quantize接口即可得到离线量化后的模型结构
+
+```python
+quant_model = ptq.quantize(fp32_model)
+```
+
+如果需要对模型进行fuse融合，设置`fuse=True`即可，fuse_list默认是None，表示对网络所有层进行fuse优化，如果需要自定义fuse某些层，可根据如下方式增加`fuse_list`，fuse后的模型更小，推理可能更快，精度持平或可能降低。
+```python
+count = 0
+fuse_list = []
+for name, layer in fp32_model.named_sublayers():
+    if isinstance(layer, nn.Conv2D):
+        fuse_list.append([name])
+    if isinstance(layer, nn.BatchNorm2D):
+        fuse_list[count].append(name)
+    count += 1
+quant_model = ptq.quantize(fp32_model, fuse=True, fuse_list=fuse_list)
+```
+
+#### 校准模型
+
+```python
+calibrate(quant_model, val_dataset, FLAGS.quant_batch_num,
+              FLAGS.quant_batch_size)
+```
+
+## 启动命令
+
+- MobileNetV1
+
+   ```bash
+  python3.7 ptq.py \
+        --data=dataset/ILSVRC2012/ \
+        --model=mobilenet_v1 \
+        --quant_batch_num=10 \
+        --quant_batch_size=10 \
+        --output_dir="output_ptq"
+   ```
+- MobileNetV3
+
+  对于MobileNetV3，直接使用默认离线量化配置进行校准，精度损失较大，为降低量化损失，在代码中默认设置了`skip_se_quant=True`，将`SE模块`跳过量化，并且调整batch_size和激活量化方式，启动命令如下：
+
+  ```bash
+  python3.7 ptq.py \
+        --data=dataset/ILSVRC2012/ \
+        --model=mobilenet_v3 \
+        --pretrain_weight=MobileNetV3_large_x1_0_pretrained.pdparams \
+        --quant_batch_num=10 \
+        --quant_batch_size=32 \
+        --output_dir="output_ptq"
+  ```
+
+## 评估精度
+
+```bash
+python3.7 eval.py --model_path=output_ptq/mobilenet_v3/int8_infer/ --data_dir=dataset/ILSVRC2012/ --use_gpu=True
+```
+
+- 评估时支持CPU，并且不依赖TensorRT，MKLDNN。
+
+
+## 量化结果
+
+| 模型        | FP32模型准确率（Top1/Top5） | 量化方法     | 量化模型准确率（Top1/Top5） |
+| ----------- | --------------------------- | ------------ | --------------------------- |
+| MobileNetV1 | 70.82/89.63                 | 离线量化 | 70.49/89.41                 |
+| MobileNetV3 | 74.98/92.13                 | 离线量化 | 71.14/90.17               |
--- a/demo/dygraph/post_quant/eval.py
+++ b/demo/dygraph/post_quant/eval.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import numpy as np
+import time
+import sys
+import argparse
+import math
+
+import paddle
+import paddle.inference as paddle_infer
+from ptq import ImageNetValDataset
+
+
+def eval():
+    # create predictor
+    model_file = os.path.join(FLAGS.model_path, FLAGS.model_filename)
+    params_file = os.path.join(FLAGS.model_path, FLAGS.params_filename)
+    config = paddle_infer.Config(model_file, params_file)
+    if FLAGS.use_gpu:
+        config.enable_use_gpu(1000, 0)
+    if not FLAGS.ir_optim:
+        config.switch_ir_optim(False)
+
+    predictor = paddle_infer.create_predictor(config)
+
+    input_names = predictor.get_input_names()
+    input_handle = predictor.get_input_handle(input_names[0])
+    output_names = predictor.get_output_names()
+    output_handle = predictor.get_output_handle(output_names[0])
+
+    # prepare data
+    val_dataset = ImageNetValDataset(FLAGS.data_dir)
+    eval_loader = paddle.io.DataLoader(
+        val_dataset, batch_size=FLAGS.batch_size, num_workers=5)
+
+    cost_time = 0.
+    total_num = 0.
+    correct_1_num = 0
+    correct_5_num = 0
+    for batch_id, data in enumerate(eval_loader()):
+        # set input
+        img_np = np.array([tensor.numpy() for tensor in data[0]])
+        label_np = np.array([tensor.numpy() for tensor in data[1]])
+
+        input_handle.reshape(img_np.shape)
+        input_handle.copy_from_cpu(img_np)
+
+        # run
+        t1 = time.time()
+        predictor.run()
+        t2 = time.time()
+        cost_time += (t2 - t1)
+
+        output_data = output_handle.copy_to_cpu()
+
+        # calculate accuracy
+        for i in range(len(label_np)):
+            label = label_np[i][0]
+            result = output_data[i, :]
+            index = result.argsort()
+            total_num += 1
+            if index[-1] == label:
+                correct_1_num += 1
+            if label in index[-5:]:
+                correct_5_num += 1
+
+        if batch_id % 10 == 0:
+            acc1 = correct_1_num / total_num
+            acc5 = correct_5_num / total_num
+            avg_time = cost_time / total_num
+            print(
+                "batch_id {}, acc1 {:.3f}, acc5 {:.3f}, avg time {:.5f} sec/img".
+                format(batch_id, acc1, acc5, avg_time))
+
+        if FLAGS.test_samples > 0 and \
+            (batch_id + 1)* FLAGS.batch_size >= FLAGS.test_samples:
+            break
+
+    acc1 = correct_1_num / total_num
+    acc5 = correct_5_num / total_num
+    avg_time = cost_time / total_num
+    print("End test: test image {}".format(total_num))
+    print("test_acc1 {:.4f}, test_acc5 {:.4f}, avg time {:.5f} sec/img".format(
+        acc1, acc5, avg_time))
+    print("\n")
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        '--model_path', type=str, default="", help="The inference model path.")
+    parser.add_argument(
+        '--model_filename',
+        type=str,
+        default="model.pdmodel",
+        help="model filename")
+    parser.add_argument(
+        '--params_filename',
+        type=str,
+        default="model.pdiparams",
+        help="params filename")
+    parser.add_argument(
+        '--data_dir',
+        type=str,
+        default="dataset/ILSVRC2012/",
+        help="The ImageNet dataset root dir.")
+    parser.add_argument(
+        '--test_samples',
+        type=int,
+        default=-1,
+        help="Test samples. If set -1, use all test samples")
+    parser.add_argument(
+        '--batch_size', type=int, default=10, help="Batch size.")
+    parser.add_argument(
+        '--use_gpu', type=bool, default=False, help=" Whether use gpu or not.")
+    parser.add_argument(
+        '--ir_optim', type=bool, default=False, help="Enable ir optim.")
+
+    FLAGS = parser.parse_args()
+
+    eval()
--- a/demo/dygraph/post_quant/ptq.py
+++ b/demo/dygraph/post_quant/ptq.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import contextlib
+import os
+import time
+import math
+import numpy as np
+from PIL import Image
+
+import paddle
+from paddle.io import Dataset
+from paddle.vision.transforms import transforms
+import paddle.vision.models as models
+import paddle.nn as nn
+
+from paddleslim import PTQ
+
+import sys
+sys.path.append(os.path.dirname("__file__"))
+sys.path.append(
+    os.path.join(os.path.dirname("__file__"), os.path.pardir, os.path.pardir))
+from models.dygraph.mobilenet_v3 import MobileNetV3_large_x1_0
+
+
+class ImageNetValDataset(Dataset):
+    def __init__(self, data_dir, image_size=224, resize_short_size=256):
+        super(ImageNetValDataset, self).__init__()
+        train_file_list = os.path.join(data_dir, 'train_list.txt')
+        val_file_list = os.path.join(data_dir, 'val_list.txt')
+        test_file_list = os.path.join(data_dir, 'test_list.txt')
+        self.data_dir = data_dir
+
+        normalize = transforms.Normalize(
+            mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375])
+        self.transform = transforms.Compose([
+            transforms.Resize(resize_short_size),
+            transforms.CenterCrop(image_size), transforms.Transpose(), normalize
+        ])
+
+        with open(val_file_list) as flist:
+            lines = [line.strip() for line in flist]
+            self.data = [line.split() for line in lines]
+
+    def __getitem__(self, index):
+        img_path, label = self.data[index]
+        img_path = os.path.join(self.data_dir, img_path)
+        img = Image.open(img_path).convert('RGB')
+        label = np.array([label]).astype(np.int64)
+        return self.transform(img), label
+
+    def __len__(self):
+        return len(self.data)
+
+
+def calibrate(model, dataset, batch_num, batch_size):
+    data_loader = paddle.io.DataLoader(
+        dataset, batch_size=batch_size, num_workers=5)
+
+    for idx, data in enumerate(data_loader()):
+        img = data[0]
+        label = data[1]
+
+        out = model(img)
+
+        if (idx + 1) % 50 == 0:
+            print("idx:" + str(idx))
+        if (batch_num > 0) and (idx + 1 >= batch_num):
+            break
+
+
+def main():
+    # 1 load model
+    model_list = [x for x in models.__dict__["__all__"]]
+    model_list.append('mobilenet_v3')
+    assert FLAGS.model in model_list, "Expected FLAGS.model in {}, but received {}".format(
+        model_list, FLAGS.model)
+    if FLAGS.model == 'mobilenet_v3':
+        fp32_model = MobileNetV3_large_x1_0(skip_se_quant=True)
+    else:
+        fp32_model = models.__dict__[FLAGS.model](pretrained=True)
+    if FLAGS.pretrain_weight:
+        info_dict = paddle.load(FLAGS.pretrain_weight)
+        fp32_model.load_dict(info_dict)
+        print('Finish loading model weights:{}'.format(FLAGS.pretrain_weight))
+    fp32_model.eval()
+    for name, layer in fp32_model.named_sublayers():
+        print(name, layer)
+    count = 0
+    fuse_list = []
+    for name, layer in fp32_model.named_sublayers():
+        if isinstance(layer, nn.Conv2D):
+            fuse_list.append([name])
+        if isinstance(layer, nn.BatchNorm2D):
+            fuse_list[count].append(name)
+            count += 1
+    if FLAGS.model == 'resnet50':
+        fuse_list = None
+    val_dataset = ImageNetValDataset(FLAGS.data)
+
+    # 2 quantizations
+    if FLAGS.model == 'mobilenet_v3':
+        ptq_config = {
+            'activation_quantizer': 'HistQuantizer',
+            'upsample_bins': 127,
+            'hist_percent': 0.999
+        }
+        ptq = PTQ(**ptq_config)
+    else:
+        ptq = PTQ()
+    quant_model = ptq.quantize(fp32_model, fuse=FLAGS.fuse, fuse_list=fuse_list)
+
+    print("Start calibrate...")
+    calibrate(quant_model, val_dataset, FLAGS.quant_batch_num,
+              FLAGS.quant_batch_size)
+
+    # 3 save
+    quant_output_dir = os.path.join(FLAGS.output_dir, FLAGS.model, "int8_infer",
+                                    "model")
+    input_spec = paddle.static.InputSpec(
+        shape=[None, 3, 224, 224], dtype='float32')
+    ptq.save_quantized_model(quant_model, quant_output_dir, [input_spec])
+
+    fp32_output_dir = os.path.join(FLAGS.output_dir, FLAGS.model, "fp32_infer",
+                                   "model")
+    paddle.jit.save(fp32_model, fp32_output_dir, [input_spec])
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser("Quantization on ImageNet")
+
+    # model
+    parser.add_argument(
+        "--model", type=str, default='mobilenet_v3', help="model name")
+    parser.add_argument(
+        "--pretrain_weight",
+        type=str,
+        default=None,
+        help="pretrain weight path")
+    parser.add_argument(
+        "--output_dir", type=str, default='output', help="save dir")
+    parser.add_argument("--fuse", type=bool, default=False, help="fuse layers")
+
+    # data
+    parser.add_argument(
+        '--data',
+        default="/dataset/ILSVRC2012",
+        help='path to dataset (should have subdirectories named "train" and "val"'
+    )
+    parser.add_argument(
+        '--val_dir',
+        default="val",
+        help='the dir that saves val images for paddle.Model')
+
+    # train
+    parser.add_argument(
+        "--quant_batch_num", default=10, type=int, help="batch num for quant")
+    parser.add_argument(
+        "--quant_batch_size", default=10, type=int, help="batch size for quant")
+
+    FLAGS = parser.parse_args()
+    assert FLAGS.data, "error: must provide data path"
+
+    main()
--- a/demo/dygraph/quant/train.py
+++ b/demo/dygraph/quant/train.py
@@ -33,11 +33,11 @@ from paddleslim.common import get_logger
 from paddleslim.dygraph.quant import QAT

 sys.path.append(os.path.join(os.path.dirname("__file__")))
-from mobilenet_v3 import MobileNetV3_large_x1_0
 from optimizer import create_optimizer
 sys.path.append(
    os.path.join(os.path.dirname("__file__"), os.path.pardir, os.path.pardir))
 from utility import add_arguments, print_arguments
+from models.dygraph.mobilenet_v3 import MobileNetV3_large_x1_0

 _logger = get_logger(__name__, level=logging.INFO)


--- a/demo/models/__init__.py
+++ b/demo/models/__init__.py
@@ -6,13 +6,14 @@ from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2
 from .pvanet import PVANet
 from .slimfacenet import SlimFaceNet_A_x0_60, SlimFaceNet_B_x0_75, SlimFaceNet_C_x0_75
 from .mobilenet_v3 import *
+from .dygraph import *
 __all__ = [
    "model_list", "MobileNet", "ResNet34", "ResNet50", "MobileNetV2", "PVANet",
    "ResNet50_vd", "ResNet101_vd", "MobileNetV2_x0_25"
 ]
 model_list = [
-    'MobileNet', 'ResNet34', 'ResNet50', 'MobileNetV2', 'PVANet',
-    'ResNet50_vd', "ResNet101_vd", "MobileNetV2_x0_25"
+    'MobileNet', 'ResNet34', 'ResNet50', 'MobileNetV2', 'PVANet', 'ResNet50_vd',
+    "ResNet101_vd", "MobileNetV2_x0_25"
 ]

 __all__ += mobilenet_v3.__all__

--- a/demo/models/dygraph/__init__.py
+++ b/demo/models/dygraph/__init__.py
+from __future__ import absolute_import
--- a/demo/dygraph/quant/mobilenet_v3.py
+++ b/demo/dygraph/quant/mobilenet_v3.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -50,8 +50,10 @@ class MobileNetV3(nn.Layer):
                 scale=1.0,
                 model_name="small",
                 dropout_prob=0.2,
-                 class_dim=1000):
+                 class_dim=1000,
+                 skip_se_quant=False):
        super(MobileNetV3, self).__init__()
+        self.skip_se_quant = skip_se_quant

        inplanes = 16
        if model_name == "large":
@@ -121,6 +123,7 @@ class MobileNetV3(nn.Layer):
                    stride=s,
                    use_se=se,
                    act=nl,
+                    skip_se_quant=self.skip_se_quant,
                    name="conv" + str(i + 2)))
            self.block_list.append(block)
            inplanes = make_divisible(scale * c)
@@ -229,6 +232,7 @@ class ResidualUnit(nn.Layer):
                 stride,
                 use_se,
                 act=None,
+                 skip_se_quant=False,
                 name=''):
        super(ResidualUnit, self).__init__()
        self.if_shortcut = stride == 1 and in_c == out_c
@@ -254,7 +258,8 @@ class ResidualUnit(nn.Layer):
            act=act,
            name=name + "_depthwise")
        if self.if_se:
-            self.mid_se = SEModule(mid_c, name=name + "_se")
+            self.mid_se = SEModule(
+                mid_c, skip_se_quant=skip_se_quant, name=name + "_se")
        self.linear_conv = ConvBNLayer(
            in_c=mid_c,
            out_c=out_c,
@@ -277,7 +282,7 @@ class ResidualUnit(nn.Layer):


 class SEModule(nn.Layer):
-    def __init__(self, channel, reduction=4, name=""):
+    def __init__(self, channel, reduction=4, skip_se_quant=False, name=""):
        super(SEModule, self).__init__()
        self.avg_pool = AdaptiveAvgPool2D(1)
        self.conv1 = Conv2D(
@@ -296,6 +301,9 @@ class SEModule(nn.Layer):
            padding=0,
            weight_attr=ParamAttr(name + "_2_weights"),
            bias_attr=ParamAttr(name=name + "_2_offset"))
+        if skip_se_quant:
+            self.conv1.skip_quant = True
+            self.conv2.skip_quant = True

    def forward(self, inputs):
        outputs = self.avg_pool(inputs)

--- a/docs/zh_cn/api_cn/dygraph/quanter/qat.rst
+++ b/docs/zh_cn/api_cn/dygraph/quanter/qat.rst
 QAT
 ==================

+模型量化包含三种量化方法，分别是动态离线量化方法、静态离线量化方法和量化训练方法。
+
+下图展示了如何选择模型量化方法。
+
+
+.. image:: https://user-images.githubusercontent.com/52520497/83991261-cbe55800-a97e-11ea-880c-d83fb7924454.png
+   :scale: 80 %
+   :alt: 图1：选择模型量化方法
+   :align: center
+
+下图综合对比了模型量化方法的使用条件、易用性、精度损失和预期收益。
+
+.. image:: https://user-images.githubusercontent.com/52520497/83991268-cee04880-a97e-11ea-9ecd-2d0f04a15205.png
+   :scale: 80 %
+   :alt: 图2：综合对比模型量化方法
+   :align: center
+
+量化训练
+-------------------
+
 .. py:class:: paddleslim.QAT(config=None, weight_preprocess=None, act_preprocess=None, weight_quantize=None, act_quantize=None)

-`源代码 <https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/dygraph/quant/quanter.py>`_
+`源代码 <https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/dygraph/quant/qat.py>`_

 使用量化训练方法（Quant Aware Training, QAT）得到模拟量化模型，在需要量化的算子前插入模拟量化节点，为其激活和权重输入提前执行`量化-反量化`逻辑。

@@ -96,7 +116,7 @@ QAT

   将指定的动态图量化模型导出为静态图预测模型，用于预测部署。
   
-   量化预测模型可以使用`netron`软件打开，进行可视化查看。该量化预测模型和普通FP32预测模型一样，可以使用PaddleLite和PaddleInference加载预测，具体请参考`推理部署`章节。
+   量化预测模型可以使用`VisualDL`软件打开，进行可视化查看。该量化预测模型和普通FP32预测模型一样，可以使用PaddleLite和PaddleInference加载预测，具体请参考`推理部署`章节。
   
   **参数：**
   
@@ -131,3 +151,120 @@ QAT

   ..

+
+静态离线量化
+-------------------
+
+.. py:class:: paddleslim.PTQ(activation_quantizer='KLQuantizer', weight_quantizer='PerChannelAbsmaxQuantizer', **kwargs)
+
+`源代码 <https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/dygraph/quant/ptq.py>`_
+
+**参数：**
+
+- **activation_quantizer(str, Optional)** - 激活量化方式。 可选择`KLQuantizer`、`HistQuantizer`和`AbsmaxQuantizer`，默认为`KLQuantizer`。
+
+- **weight_quantizer(str, Optional)** - 激活量化方式。 可选择`AbsmaxQuantizer`和`PerChannelAbsmaxQuantizer`，默认为`PerChannelAbsmaxQuantizer`。
+
+**返回：** 离线量化器实例。
+
+**KLQuantizer参数：**
+- **quant_bits(int): ** - 量化比特数，默认是8。
+- **bins(int): ** - 指定统计的区间个数，默认是1024。
+- **upsample_bins(int): ** - 上采样统计的区间个数，默认是64。
+
+**HistQuantizer参数：**
+- **quant_bits(int): ** - 量化比特数，默认是8。
+- **bins(int): ** - 指定统计的区间个数，默认是1024。
+- **upsample_bins(int): ** - 上采样统计的区间个数，默认是64。
+- **hist_percent(float): ** - 采样百分比，默认是0.99999。
+
+**AbsmaxQuantizer参数：**
+- **quant_bits(int): ** - 量化比特数，默认是8。
+
+
+**示例代码：**
+
+.. code-block:: python
+
+   from paddleslim import PTQ
+   ptq = PTQ()
+..
+
+如果想要更改离线量化默认配置，可以给PTQ()传入dict，例如下面所示：
+
+.. code-block:: python
+
+   from paddleslim import PTQ
+   ptq_config = {'activation_quantizer': 'HistQuantizer', 'upsample_bins': 127, 'hist_percent': 0.999}
+   ptq = PTQ(**ptq_config)
+..
+
+    .. py:method:: quantize(model, fuse=False, fuse_list=None)
+
+    对模型进行离线量化的处理，插入量化-反量化节点。
+    
+            **参数：**
+    
+    - **model(paddle.nn.Layer)** - 一个paddle Layer的实例，需要包含支持量化的算子，如：`Conv, Linear`。
+    - **fuse(bool)** - 是否对模型进行fuse融合，默认是False。
+    - **fuse_list(list)** - 如果对模型进行fuse融合，需要在fuse_list中添加需要fuse的层，默认是None。
+
+    **示例代码：**
+
+    .. code-block:: python
+        from paddleslim import PTQ
+        from paddle.vision.models import mobilenet_v1
+        fp32_model = mobilenet_v1(pretrained=True)
+        ptq = PTQ()
+        quant_model = ptq.quantize(fp32_model)
+    ..
+
+    如果需要对模型进行fuse融合，可根据如下方式增加`fuse_list`，目前支持`Conv2D`和`BatchNorm2D`的融合，fuse后的模型更小，推理可能更快，精度持平或可能降低。
+
+    .. code-block:: python
+
+    fuse_list = []
+        for name, layer in fp32_model.named_sublayers():
+            if isinstance(layer, nn.Conv2D):
+                fuse_list.append([name])
+            if isinstance(layer, nn.BatchNorm2D):
+                fuse_list[count].append(name)
+        quant_model = ptq.quantize(fp32_model, fuse=True, fuse_list=fuse_list)
+    ..
+
+    .. py:method:: save_quantized_model(model, path, input_spec=None)
+
+    将指定的动态图量化模型导出为静态图预测模型，用于预测部署。
+ 
+    量化预测模型可以使用`VisualDL`软件打开，进行可视化查看。该量化预测模型和普通FP32预测模型一样，可以使用PaddleLite和PaddleInference加载预测，具体请参考`推理部署`章节。
+    
+    **参数：**
+    
+    - **model(paddle.nn.Layer)** - 量化训练结束，需要导出的量化模型，该模型由`quantize`接口产出。
+    
+    - **path(str)** - 导出的量化预测模型保存的路径，导出后在该路径下可以找到`model`和`params`文件。
+    
+    - **input_spec(list[InputSpec|Tensor], Optional)** - 描述存储模型forward方法的输入，可以通过InputSpec或者示例Tensor进行描述。如果为 None ，所有原 Layer forward方法的输入变量将都会被配置为存储模型的输入变量。默认为 None。
+    
+    
+    **示例：**
+    
+
+    .. code-block:: python
+
+        from paddleslim import PTQ
+        from paddle.vision.models import mobilenet_v1
+        fp32_model = mobilenet_v1(pretrained=True)
+        ptq = PTQ()
+        quant_model = ptq.quantize(fp32_model)
+
+        ptq.save_quantized_model(
+            quant_model,
+            './quant_model',
+            input_spec=[paddle.static.InputSpec(shape=[None, 3, 224, 224], dtype='float32')])
+    ..
+
+动态离线量化
+-------------------
+
+动态离线量化接口请参考`quant_post_dynamic`API。
--- a/paddleslim/dygraph/quant/ptq.py
+++ b/paddleslim/dygraph/quant/ptq.py
@@ -41,15 +41,21 @@ class PTQ(object):
    """

    def __init__(self,
-                 activation_quantizer=Q.KLQuantizer(),
-                 weight_quantizer=Q.PerChannelAbsmaxQuantizer()):
+                 activation_quantizer='KLQuantizer',
+                 weight_quantizer='PerChannelAbsmaxQuantizer',
+                 **kwargs):
        """
        Args:
            activation_quantizer(Quantizer): The quantizer method for activation.
+                Can be set to `KLQuantizer`/`HistQuantizer`/`AbsmaxQuantizer`.
                Default: KLQuantizer.
            weight_quantizer(Quantizer): The quantizer method for weight.
+                Can be set to `AbsmaxQuantizer`/`PerChannelAbsmaxQuantizer`.
                Default: PerChannelAbsmaxQuantizer.
        """
+        print("activation_quantizer", activation_quantizer)
+        activation_quantizer = eval(activation_quantizer)(**kwargs)
+        weight_quantizer = eval(weight_quantizer)()
        assert isinstance(activation_quantizer, tuple(Q.SUPPORT_ACT_QUANTIZERS))
        assert isinstance(weight_quantizer, tuple(Q.SUPPORT_WT_QUANTIZERS))