Add PTQ CE Test (#845)

* Add ce test for ptq

Add PTQ CE Test (#845)
* Add ce test for ptq
3babb011 · cc · GitHub · 1026858c · 1026858c · 1026858c
15 changed file
--- a/ce_tests/dygraph/qat/readme.md
+++ b/ce_tests/dygraph/qat/readme.md
-1. 准备
-
-安装需要测试的Paddle版本和PaddleSlim版本。
-
-准备ImageNet数据集，假定解压到`/dataset/ILSVRC2012`文件夹，该文件夹下有`train文件夹、val文件夹、train_list.txt和val_list.txt文件`。
-
-2. 产出量化模型
-
-在`run_train.sh`文件中设置`data_path`为上述ImageNet数据集的路径`/dataset/ILSVRC2012`。
-
-根据实际情况，在`run_train.sh`文件中设置使用GPU的id等参数。
-
-执行`sh run_train.sh` 会对几个分类模型使用动态图量化训练功能进行量化，其中只执行一个epoch。
-执行完后，在`output_models/quant_dygraph`目录下有产出的量化模型。
-
-3. 转换量化模型
-
-在Intel CPU上部署量化模型，需要使用`test/save_quant_model.py`脚本进行模型转换。
-
-如下是对`mobilenet_v1`模型进行转换的示例。
-```
-python src/save_quant_model.py --load_model_path output_models/quant_dygraph/mobilenet_v1 --save_model_path int8_models/mobilenet_v1
-```
-
-4. 测试量化模型
-
-在`run_test.sh`脚本中设置`data_path`为上述ImageNet数据集的路径`/dataset/ILSVRC2012`。
-
-根据实际情况，在`run_test.sh`文件中设置使用GPU的id等参数。
-
-使用`run_test.sh`脚本测试转换前和转换后的量化模型精度。
-
-比如：
-```
-sh run_test.sh output_models/quant_dygraph/mobilenet_v1
-sh run_test.sh int8_models/mobilenet_v1
-```
-
-5. 测试目标
-
-使用动态图量化训练功能，产出`mobilenet_v1`,`mobilenet_v2`,`resnet50`,`vgg16`量化模型，测试转换前后量化模型精度在1%误差范围内。
--- a/ce_tests/dygraph/qat/run_test.sh
+++ b/ce_tests/dygraph/qat/run_test.sh
-model_path=$1
-test_samples=1000  # if set as -1, use all test samples
-data_path='/dataset/ILSVRC2012/'
-batch_size=16
-
-echo "--------eval model: ${model_name}-------------"
-python ./src/eval.py \
-   --model_path=$model_path \
-   --data_dir=${data_path} \
-   --test_samples=${test_samples} \
-   --batch_size=${batch_size}
--- a/ce_tests/dygraph/qat/src/imagenet_dataset.py
+++ b/ce_tests/dygraph/qat/src/imagenet_dataset.py
-import os
-import math
-import random
-import functools
-import numpy as np
-import paddle
-from PIL import Image, ImageEnhance
-from paddle.io import Dataset
-
-random.seed(0)
-np.random.seed(0)
-
-DATA_DIM = 224
-
-THREAD = 16
-BUF_SIZE = 10240
-
-DATA_DIR = './data/ILSVRC2012/'
-DATA_DIR = os.path.join(os.path.split(os.path.realpath(__file__))[0], DATA_DIR)
-
-img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
-img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
-
-
-def resize_short(img, target_size):
-    percent = float(target_size) / min(img.size[0], img.size[1])
-    resized_width = int(round(img.size[0] * percent))
-    resized_height = int(round(img.size[1] * percent))
-    img = img.resize((resized_width, resized_height), Image.LANCZOS)
-    return img
-
-
-def crop_image(img, target_size, center):
-    width, height = img.size
-    size = target_size
-    if center == True:
-        w_start = (width - size) / 2
-        h_start = (height - size) / 2
-    else:
-        w_start = np.random.randint(0, width - size + 1)
-        h_start = np.random.randint(0, height - size + 1)
-    w_end = w_start + size
-    h_end = h_start + size
-    img = img.crop((w_start, h_start, w_end, h_end))
-    return img
-
-
-def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
-    aspect_ratio = math.sqrt(np.random.uniform(*ratio))
-    w = 1. * aspect_ratio
-    h = 1. / aspect_ratio
-
-    bound = min((float(img.size[0]) / img.size[1]) / (w**2),
-                (float(img.size[1]) / img.size[0]) / (h**2))
-    scale_max = min(scale[1], bound)
-    scale_min = min(scale[0], bound)
-
-    target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min,
-                                                                scale_max)
-    target_size = math.sqrt(target_area)
-    w = int(target_size * w)
-    h = int(target_size * h)
-
-    i = np.random.randint(0, img.size[0] - w + 1)
-    j = np.random.randint(0, img.size[1] - h + 1)
-
-    img = img.crop((i, j, i + w, j + h))
-    img = img.resize((size, size), Image.LANCZOS)
-    return img
-
-
-def rotate_image(img):
-    angle = np.random.randint(-10, 11)
-    img = img.rotate(angle)
-    return img
-
-
-def distort_color(img):
-    def random_brightness(img, lower=0.5, upper=1.5):
-        e = np.random.uniform(lower, upper)
-        return ImageEnhance.Brightness(img).enhance(e)
-
-    def random_contrast(img, lower=0.5, upper=1.5):
-        e = np.random.uniform(lower, upper)
-        return ImageEnhance.Contrast(img).enhance(e)
-
-    def random_color(img, lower=0.5, upper=1.5):
-        e = np.random.uniform(lower, upper)
-        return ImageEnhance.Color(img).enhance(e)
-
-    ops = [random_brightness, random_contrast, random_color]
-    np.random.shuffle(ops)
-
-    img = ops[0](img)
-    img = ops[1](img)
-    img = ops[2](img)
-
-    return img
-
-
-def process_image(sample, mode, color_jitter, rotate):
-    img_path = sample[0]
-
-    try:
-        img = Image.open(img_path)
-    except:
-        print(img_path, "not exists!")
-        return None
-    if mode == 'train':
-        if rotate: img = rotate_image(img)
-        img = random_crop(img, DATA_DIM)
-    else:
-        img = resize_short(img, target_size=256)
-        img = crop_image(img, target_size=DATA_DIM, center=True)
-    if mode == 'train':
-        if color_jitter:
-            img = distort_color(img)
-        if np.random.randint(0, 2) == 1:
-            img = img.transpose(Image.FLIP_LEFT_RIGHT)
-
-    if img.mode != 'RGB':
-        img = img.convert('RGB')
-
-    img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
-    img -= img_mean
-    img /= img_std
-
-    if mode == 'train' or mode == 'val':
-        return img, sample[1]
-    elif mode == 'test':
-        return [img]
-
-
-def _reader_creator(file_list,
-                    mode,
-                    shuffle=False,
-                    color_jitter=False,
-                    rotate=False,
-                    data_dir=DATA_DIR,
-                    batch_size=1):
-    def reader():
-        try:
-            with open(file_list) as flist:
-                full_lines = [line.strip() for line in flist]
-                if shuffle:
-                    np.random.shuffle(full_lines)
-                if mode == 'train' and os.getenv('PADDLE_TRAINING_ROLE'):
-                    # distributed mode if the env var `PADDLE_TRAINING_ROLE` exits
-                    trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
-                    trainer_count = int(os.getenv("PADDLE_TRAINERS", "1"))
-                    per_node_lines = len(full_lines) // trainer_count
-                    lines = full_lines[trainer_id * per_node_lines:(
-                        trainer_id + 1) * per_node_lines]
-                    print(
-                        "read images from %d, length: %d, lines length: %d, total: %d"
-                        % (trainer_id * per_node_lines, per_node_lines,
-                           len(lines), len(full_lines)))
-                else:
-                    lines = full_lines
-
-                for line in lines:
-                    if mode == 'train' or mode == 'val':
-                        img_path, label = line.split()
-                        img_path = os.path.join(data_dir, img_path)
-                        yield img_path, int(label)
-                    elif mode == 'test':
-                        img_path = os.path.join(data_dir, line)
-                        yield [img_path]
-        except Exception as e:
-            print("Reader failed!\n{}".format(str(e)))
-            os._exit(1)
-
-    mapper = functools.partial(
-        process_image, mode=mode, color_jitter=color_jitter, rotate=rotate)
-
-    return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
-
-
-def train(data_dir=DATA_DIR):
-    file_list = os.path.join(data_dir, 'train_list.txt')
-    return _reader_creator(
-        file_list,
-        'train',
-        shuffle=True,
-        color_jitter=False,
-        rotate=False,
-        data_dir=data_dir)
-
-
-def val(data_dir=DATA_DIR):
-    file_list = os.path.join(data_dir, 'val_list.txt')
-    return _reader_creator(file_list, 'val', shuffle=False, data_dir=data_dir)
-
-
-def test(data_dir=DATA_DIR):
-    file_list = os.path.join(data_dir, 'test_list.txt')
-    return _reader_creator(file_list, 'test', shuffle=False, data_dir=data_dir)
-
-
-class ImageNetDataset(Dataset):
-    def __init__(self, data_dir=DATA_DIR, mode='train'):
-        super(ImageNetDataset, self).__init__()
-        self._data_dir = data_dir
-        train_file_list = os.path.join(data_dir, 'train_list.txt')
-        val_file_list = os.path.join(data_dir, 'val_list.txt')
-        test_file_list = os.path.join(data_dir, 'test_list.txt')
-        self.mode = mode
-        if mode == 'train':
-            with open(train_file_list) as flist:
-                full_lines = [line.strip() for line in flist]
-                np.random.shuffle(full_lines)
-                if os.getenv('PADDLE_TRAINING_ROLE'):
-                    # distributed mode if the env var `PADDLE_TRAINING_ROLE` exits
-                    trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
-                    trainer_count = int(os.getenv("PADDLE_TRAINERS", "1"))
-                    per_node_lines = len(full_lines) // trainer_count
-                    lines = full_lines[trainer_id * per_node_lines:(
-                        trainer_id + 1) * per_node_lines]
-                    print(
-                        "read images from %d, length: %d, lines length: %d, total: %d"
-                        % (trainer_id * per_node_lines, per_node_lines,
-                           len(lines), len(full_lines)))
-                else:
-                    lines = full_lines
-            self.data = [line.split() for line in lines]
-        else:
-            with open(val_file_list) as flist:
-                lines = [line.strip() for line in flist]
-                self.data = [line.split() for line in lines]
-
-    def __getitem__(self, index):
-        sample = self.data[index]
-        data_path = os.path.join(self._data_dir, sample[0])
-        if self.mode == 'train':
-            data, label = process_image(
-                [data_path, sample[1]],
-                mode='train',
-                color_jitter=False,
-                rotate=False)
-        if self.mode == 'val':
-            data, label = process_image(
-                [data_path, sample[1]],
-                mode='val',
-                color_jitter=False,
-                rotate=False)
-        return data, np.array([label]).astype('int64')
-
-    def __len__(self):
-        return len(self.data)
--- a/ce_tests/dygraph/quant/readme.md
+++ b/ce_tests/dygraph/quant/readme.md
+1. 准备
+
+安装需要测试的Paddle版本和PaddleSlim版本。
+
+准备ImageNet数据集，数据集需要满足paddle hapi的要求。假定解压到`/dataset/ILSVRC2012`文件夹，该文件夹下有`train文件夹、val_hapi文件夹、train_list.txt和val_list.txt文件`。如果数据集有问题，可以私聊。
+
+通过`export CUDA_VISIBLE_DEVICES=xx`指定需要使用的GPU ID。
+
+2. 产出PTQ量化模型
+
+在`run_ptq.sh`中设置数据路径，比如`data_path="/dataset/ILSVRC2012"`。
+
+执行`sh run_ptq.sh`，会使用动态图离线量化方法量化mobilenetv1、mobilenetv2、resnet50和vgg16模型。
+
+执行完成，量化模型保存在当前`output_ptq`目录下。
+比如`output_ptq/mobilenet_v1/fp32_infer`是原始FP32模型，`output_ptq/mobilenet_v1/int8_infer`是PTQ量化模型。
+
+3. 产出QAT量化模型
+
+在`run_qat.sh`文件中设置`data_path`为上述ImageNet数据集的路径`/dataset/ILSVRC2012`。
+
+执行`sh run_train.sh` 会对几个分类模型使用动态图量化训练功能进行量化，其中只执行一个epoch。
+
+执行完后，在`output_qat`目录下有产出的量化模型。
+比如`output_qat/mobilenet_v1`是QAT量化模型。
+
+4. 转换量化模型
+
+在X86 CPU上部署量化模型，需要使用`src/save_quant_model.py`脚本对量化模型进行转换。
+
+如下是对`output_qat/mobilenet_v1`模型进行转换的示例。
+```
+sh run_convert.sh output_qat/mobilenet_v1 int8_qat_models/mobilenet_v1
+```
+
+按照上述示例，将所有QAT和PTQ产出的量化模型进行转换，假定分别保存在`int8_qat_models`和`int8_ptq_models`文件中。
+
+4. 测试模型
+
+在`run_test.sh`脚本中设置`data_path`为上述ImageNet数据集的路径`/dataset/ILSVRC2012`。
+
+使用`run_test.sh`脚本测试原始FP32模型（共4个）的精度，可以开启GPU，举例如下。
+```
+sh run_test.sh output_ptq/mobilenet_v1/fp32_infer/mobilenet_v1 True
+```
+
+使用`run_test.sh`脚本测试转换前PTQ和QAT量化模型(分别4个)的精度，可以开启GPU，举例如下。
+```
+sh run_test.sh output_qat/mobilenet_v1 True
+```
+
+使用`run_test.sh`脚本测试转换后PTQ和QAT量化模型（分别4个）的精度，不可以开启GPU，举例如下。
+```
+sh run_test.sh int8_qat_models/mobilenet_v1 False
+```
+
+5. 测试目标
+
+使用动态图量化训练功能，产出`mobilenet_v1`,`mobilenet_v2`,`resnet50`,`vgg16`量化模型，测试转换前后量化模型精度在1%误差范围内。
--- a/ce_tests/dygraph/quant/run_convert.sh
+++ b/ce_tests/dygraph/quant/run_convert.sh
+load_model=$1
+save_model=$2
+
+python src/save_quant_model.py \
+    --load_model_path ${load_model} \
+    --save_model_path ${save_model}
--- a/ce_tests/dygraph/quant/run_ptq.sh
+++ b/ce_tests/dygraph/quant/run_ptq.sh
+data_path="/dataset/ILSVRC2012"
+quant_batch_num=10
+quant_batch_size=10
+
+for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16 
+do
+    echo "--------quantize model: ${model}-------------"
+    python ./src/ptq.py \
+        --data=${data_path} \
+        --arch=${model} \
+        --quant_batch_num=${quant_batch_num} \
+        --quant_batch_size=${quant_batch_size} \
+        --output_dir="output_ptq"
+done
+
+echo "\n"
--- a/ce_tests/dygraph/qat/run_train.sh
+++ b/ce_tests/dygraph/qat/run_train.sh
-export CUDA_VISIBLE_DEVICES=5
-
 data_path="/dataset/ILSVRC2012"
+val_dir="val_hapi"
 epoch=1
 lr=0.0001
 batch_size=32
 num_workers=3
-output_dir=$PWD/output_models
+output_dir=$PWD/output_qat

-for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16 
+for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16
 do
    python ./src/qat.py \
        --arch=${model} \
        --data=${data_path} \
+        --val_dir=${val_dir} \
        --epoch=${epoch} \
        --batch_size=${batch_size} \
        --num_workers=${num_workers} \
@@ -20,3 +20,5 @@ do
        --enable_quant
        #--use_pact
 done
+
+echo "\n"
--- a/ce_tests/dygraph/quant/run_test.sh
+++ b/ce_tests/dygraph/quant/run_test.sh
+data_path='/dataset/ILSVRC2012/'
+
+model_path=$1
+use_gpu=$2
+ir_optim=False
+echo "--------test model: ${model_path}-------------"
+
+python ./src/test.py \
+   --model_path=${model_path} \
+   --data_dir=${data_path} \
+   --test_samples=-1 \
+   --batch_size=32 \
+   --use_gpu=${use_gpu} \
+   --ir_optim=${ir_optim}
+
+echo "\n"
--- a/ce_tests/dygraph/qat/src/eval.py
+++ b/ce_tests/dygraph/qat/src/eval.py
@@ -21,6 +21,7 @@ def eval(args):
    params_file = os.path.join(args.model_path, args.params_filename)
    config = paddle_infer.Config(model_file, params_file)
    config.enable_mkldnn()
+    config.switch_ir_optim(False)

    predictor = paddle_infer.create_predictor(config)

@@ -82,8 +83,8 @@ def main():
    parser = argparse.ArgumentParser(description=__doc__)
    add_arg = functools.partial(add_arguments, argparser=parser)
    add_arg('model_path', str, "", "The inference model path.")
-    add_arg('model_filename', str, "int8_infer.pdmodel", "model filename")
-    add_arg('params_filename', str, "int8_infer.pdiparams", "params filename")
+    add_arg('model_filename', str, "model.pdmodel", "model filename")
+    add_arg('params_filename', str, "model.pdiparams", "params filename")
    add_arg('data_dir', str, "/dataset/ILSVRC2012/",
            "The ImageNet dataset root dir.")
    add_arg('test_samples', int, -1,

--- a/ce_tests/dygraph/quant/src/imagenet_dataset.py
+++ b/ce_tests/dygraph/quant/src/imagenet_dataset.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import cv2
+import math
+import random
+import numpy as np
+from PIL import Image
+
+from paddle.vision.datasets import DatasetFolder
+from paddle.vision.transforms import transforms
+
+
+class ImageNetDataset(DatasetFolder):
+    def __init__(self,
+                 path,
+                 mode='train',
+                 image_size=224,
+                 resize_short_size=256):
+        super(ImageNetDataset, self).__init__(path)
+        self.mode = mode
+
+        normalize = transforms.Normalize(
+            mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375])
+        if self.mode == 'train':
+            self.transform = transforms.Compose([
+                transforms.RandomResizedCrop(image_size),
+                transforms.RandomHorizontalFlip(), transforms.Transpose(),
+                normalize
+            ])
+        else:
+            self.transform = transforms.Compose([
+                transforms.Resize(resize_short_size),
+                transforms.CenterCrop(image_size), transforms.Transpose(),
+                normalize
+            ])
+
+    def __getitem__(self, idx):
+        img_path, label = self.samples[idx]
+        img = Image.open(img_path).convert('RGB')
+        label = np.array([label]).astype(np.int64)
+        return self.transform(img), label
+
+    def __len__(self):
+        return len(self.samples)
--- a/ce_tests/dygraph/quant/src/ptq.py
+++ b/ce_tests/dygraph/quant/src/ptq.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import contextlib
+import os
+
+import time
+import math
+import numpy as np
+
+import paddle
+import paddle.vision.models as models
+
+from imagenet_dataset import ImageNetDataset
+from paddleslim import PTQ
+
+
+def calibrate(model, dataset, batch_num, batch_size):
+    data_loader = paddle.io.DataLoader(
+        dataset, batch_size=batch_size, num_workers=5)
+
+    acc_list = []
+    for idx, data in enumerate(data_loader()):
+        img = data[0]
+        label = data[1]
+
+        out = model(img)
+
+        acc = paddle.metric.accuracy(out, label)
+        acc_list.append(acc.numpy())
+
+        if (idx + 1) % 50 == 0:
+            print("idx:" + str(idx))
+        if (batch_num > 0) and (idx + 1 >= batch_num):
+            break
+
+    return np.mean(acc_list)
+
+
+def main():
+    # 1 load model
+    model_list = [x for x in models.__dict__["__all__"]]
+    assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format(
+        model_list, FLAGS.arch)
+    fp32_model = models.__dict__[FLAGS.arch](pretrained=True)
+    fp32_model.eval()
+
+    val_dataset = ImageNetDataset(
+        os.path.join(FLAGS.data, FLAGS.val_dir), mode='val')
+
+    # 2 quantizations
+    ptq = PTQ()
+    quant_model = ptq.quantize(fp32_model)
+
+    print("Calibrate")
+    calibrate(quant_model, val_dataset, FLAGS.quant_batch_num,
+              FLAGS.quant_batch_size)
+
+    # 3 save
+    quant_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "int8_infer",
+                                    "model")
+    input_spec = paddle.static.InputSpec(
+        shape=[None, 3, 224, 224], dtype='float32')
+    ptq.save_quantized_model(quant_model, quant_output_dir, [input_spec])
+
+    fp32_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "fp32_infer",
+                                   "model")
+    paddle.jit.save(fp32_model, fp32_output_dir, [input_spec])
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser("Quantization on ImageNet")
+
+    # model
+    parser.add_argument(
+        "--arch", type=str, default='mobilenet_v2', help="model name")
+    parser.add_argument(
+        "--output_dir", type=str, default='output', help="save dir")
+
+    # data
+    parser.add_argument(
+        '--data',
+        default="/dataset/ILSVRC2012",
+        help='path to dataset (should have subdirectories named "train" and "val"'
+    )
+    parser.add_argument(
+        '--val_dir',
+        default="val_hapi",
+        help='the dir that saves val images for paddle.Model')
+
+    # train
+    parser.add_argument(
+        "--quant_batch_num", default=10, type=int, help="batch num for quant")
+    parser.add_argument(
+        "--quant_batch_size", default=10, type=int, help="batch size for quant")
+
+    FLAGS = parser.parse_args()
+    assert FLAGS.data, "error: must provide data path"
+
+    main()
--- a/ce_tests/dygraph/qat/src/qat.py
+++ b/ce_tests/dygraph/qat/src/qat.py
@@ -16,73 +16,120 @@ from __future__ import division
 from __future__ import print_function

 import argparse
+import contextlib
 import os
+
 import time
 import math
 import numpy as np

 import paddle
-import paddle.hapi as hapi
-from paddle.hapi.model import Input
+from paddle.distributed import ParallelEnv
+
+from paddle.optimizer.lr import PiecewiseDecay
 from paddle.metric.metrics import Accuracy
 import paddle.vision.models as models

-from paddleslim.dygraph.quant import QAT
+from paddleslim import QAT
+from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
+
+from imagenet_dataset import ImageNetDataset
+
+
+def make_optimizer(step_per_epoch, parameter_list=None):
+    assert FLAGS.lr_scheduler == 'piecewise'

-import imagenet_dataset as dataset
+    base_lr = FLAGS.lr
+    lr_scheduler = FLAGS.lr_scheduler
+    momentum = FLAGS.momentum
+    weight_decay = FLAGS.weight_decay
+    milestones = FLAGS.milestones
+
+    boundaries = [step_per_epoch * e for e in milestones]
+    values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
+    learning_rate = PiecewiseDecay(boundaries=boundaries, values=values)
+
+    optimizer = paddle.optimizer.Momentum(
+        learning_rate=learning_rate,
+        momentum=momentum,
+        weight_decay=weight_decay,
+        parameters=parameter_list)
+
+    return optimizer


 def main():
+    # create model
    model_list = [x for x in models.__dict__["__all__"]]
-    assert FLAGS.arch in model_list, "Expected FLAGS.arch in {}, but received {}".format(
+    assert FLAGS.arch in model_list, \
+        "Expected FLAGS.arch in {}, but received {}".format(
        model_list, FLAGS.arch)
-    model = models.__dict__[FLAGS.arch](pretrained=True)
+    model = models.__dict__[FLAGS.arch](pretrained=not FLAGS.resume)

+    # quantize model
    if FLAGS.enable_quant:
-        print("quantize model")
-        quant_config = {
-            'weight_preprocess_type': None,
-            'activation_preprocess_type': 'PACT' if FLAGS.use_pact else None,
-            'weight_quantize_type': "channel_wise_abs_max",
-            'activation_quantize_type': 'moving_average_abs_max',
-            'weight_bits': 8,
-            'activation_bits': 8,
-            'window_size': 10000,
-            'moving_rate': 0.9,
-            'quantizable_layer_type': ['Conv2D', 'Linear'],
-        }
-        dygraph_qat = QAT(quant_config)
+        if not FLAGS.use_naive_api:
+            print("use slim api")
+            quant_config = {
+                'weight_quantize_type': FLAGS.weight_quantize_type,
+            }
+            dygraph_qat = QAT(quant_config)
+        else:
+            print("use navie api")
+            dygraph_qat = ImperativeQuantAware(
+                weight_quantize_type=FLAGS.weight_quantize_type, )
        dygraph_qat.quantize(model)

-    model = hapi.Model(model)
+    # prepare
+    model = paddle.Model(model)
+    if FLAGS.resume is not None:
+        print("Resume from " + FLAGS.resume)
+        model.load(FLAGS.resume)

-    train_dataset = dataset.ImageNetDataset(data_dir=FLAGS.data, mode='train')
-    val_dataset = dataset.ImageNetDataset(data_dir=FLAGS.data, mode='val')
+    train_dataset = ImageNetDataset(
+        os.path.join(FLAGS.data, 'train'), mode='train')
+    val_dataset = ImageNetDataset(
+        os.path.join(FLAGS.data, FLAGS.val_dir), mode='val')

-    optim = paddle.optimizer.SGD(learning_rate=FLAGS.lr,
-                                 parameters=model.parameters(),
-                                 weight_decay=FLAGS.weight_decay)
+    optim = make_optimizer(
+        np.ceil(
+            float(len(train_dataset)) / FLAGS.batch_size /
+            ParallelEnv().nranks),
+        parameter_list=model.parameters())

    model.prepare(optim, paddle.nn.CrossEntropyLoss(), Accuracy(topk=(1, 5)))

-    checkpoint_dir = os.path.join(
-        FLAGS.output_dir, "checkpoint", FLAGS.arch + "_checkpoint",
-        time.strftime('%Y-%m-%d-%H-%M', time.localtime()))
+    # test
+    if FLAGS.eval_only:
+        model.evaluate(
+            val_dataset,
+            batch_size=FLAGS.batch_size,
+            num_workers=FLAGS.num_workers)
+        return
+
+    # train
+    output_dir = os.path.join(FLAGS.output_dir, "checkpoint",
+                              FLAGS.arch + "_checkpoint",
+                              time.strftime('%Y-%m-%d-%H-%M', time.localtime()))
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
    model.fit(train_dataset,
              val_dataset,
              batch_size=FLAGS.batch_size,
              epochs=FLAGS.epoch,
-              save_dir=checkpoint_dir,
+              save_dir=output_dir,
              num_workers=FLAGS.num_workers)

+    # save
    if FLAGS.enable_quant:
-        quant_output_dir = os.path.join(FLAGS.output_dir, "quant_dygraph",
-                                        FLAGS.arch, "int8_infer")
+        quant_output_dir = os.path.join(FLAGS.output_dir, FLAGS.arch, "model")
        input_spec = paddle.static.InputSpec(
            shape=[None, 3, 224, 224], dtype='float32')
        dygraph_qat.save_quantized_model(model.network, quant_output_dir,
                                         [input_spec])
-        print("Save quantized inference model in " + quant_output_dir)
+        print("save all checkpoints in " + output_dir)
+        print("save quantized inference model in " + quant_output_dir)


 if __name__ == '__main__':
@@ -90,32 +137,63 @@ if __name__ == '__main__':

    # model
    parser.add_argument(
-        "--arch", type=str, default='mobilenet_v1', help="model arch")
+        "--arch", type=str, default='mobilenet_v2', help="model name")
+    parser.add_argument(
+        "--resume", default=None, type=str, help="checkpoint path to resume")
+    parser.add_argument(
+        "--eval_only", action='store_true', help="only evaluate the model")
    parser.add_argument(
-        "--output_dir", type=str, default='output', help="output dir")
+        "--output_dir", type=str, default='output', help="save dir")

    # data
    parser.add_argument(
        '--data',
+        metavar='DIR',
        default="",
-        help='path to dataset (should have subdirectories named "train" and "val"'
-    )
+        help='path to dataset '
+        '(should have subdirectories named "train" and "val"')
+    parser.add_argument(
+        '--val_dir',
+        default="val_hapi",
+        help='the dir that saves val images for paddle.Model')

    # train
-    parser.add_argument("--epoch", default=1, type=int, help="number of epoch")
-    parser.add_argument("--batch_size", default=10, type=int, help="batch size")
    parser.add_argument(
-        "--num_workers", default=2, type=int, help="dataloader workers")
+        "-e", "--epoch", default=1, type=int, help="number of epoch")
+    parser.add_argument(
+        "-b", "--batch_size", default=10, type=int, help="batch size")
    parser.add_argument(
-        '--lr', default=0.0001, type=float, help='initial learning rate')
+        "-n", "--num_workers", default=2, type=int, help="dataloader workers")
+    parser.add_argument(
+        '--lr',
+        default=0.0001,
+        type=float,
+        metavar='LR',
+        help='initial learning rate')
+    parser.add_argument(
+        "--lr-scheduler",
+        default='piecewise',
+        type=str,
+        help="learning rate scheduler")
+    parser.add_argument(
+        "--milestones",
+        nargs='+',
+        type=int,
+        default=[1, 2, 3, 4, 5],
+        help="piecewise decay milestones")
    parser.add_argument(
        "--weight-decay", default=1e-4, type=float, help="weight decay")
+    parser.add_argument("--momentum", default=0.9, type=float, help="momentum")

    # quant
    parser.add_argument(
        "--enable_quant", action='store_true', help="enable quant model")
-    parser.add_argument("--use_pact", action='store_true', help="use pact")
+    parser.add_argument(
+        "--use_naive_api", action='store_true', help="use the navie api")
+    parser.add_argument(
+        "--weight_quantize_type", type=str, default='abs_max', help="")

    FLAGS = parser.parse_args()
+    assert FLAGS.data, "error: must provide data path"

    main()
--- a/ce_tests/dygraph/qat/src/save_quant_model.py
+++ b/ce_tests/dygraph/qat/src/save_quant_model.py
@@ -66,8 +66,8 @@ def transform_and_save_int8_model(original_path, save_path):
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    inference_scope = fluid.executor.global_scope()
-    model_filename = 'int8_infer.pdmodel'
-    params_filename = 'int8_infer.pdiparams'
+    model_filename = 'model.pdmodel'
+    params_filename = 'model.pdiparams'

    with fluid.scope_guard(inference_scope):
        if os.path.exists(os.path.join(original_path, '__model__')):

--- a/ce_tests/dygraph/quant/src/test.py
+++ b/ce_tests/dygraph/quant/src/test.py
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import numpy as np
+import time
+import sys
+import argparse
+import functools
+import math
+
+import paddle
+import paddle.inference as paddle_infer
+from utility import add_arguments, print_arguments
+import imagenet_dataset as dataset
+
+
+def eval(args):
+    # create predictor
+    model_file = os.path.join(args.model_path, args.model_filename)
+    params_file = os.path.join(args.model_path, args.params_filename)
+    config = paddle_infer.Config(model_file, params_file)
+    if args.use_gpu:
+        config.enable_use_gpu(1000, 0)
+    if not args.ir_optim:
+        config.switch_ir_optim(False)
+
+    predictor = paddle_infer.create_predictor(config)
+
+    input_names = predictor.get_input_names()
+    input_handle = predictor.get_input_handle(input_names[0])
+    output_names = predictor.get_output_names()
+    output_handle = predictor.get_output_handle(output_names[0])
+
+    # prepare data
+    val_dataset = dataset.ImageNetDataset(
+        path=os.path.join(args.data_dir, 'val_hapi'), mode='val')
+    eval_loader = paddle.io.DataLoader(
+        val_dataset, batch_size=args.batch_size, num_workers=5)
+
+    cost_time = 0.
+    total_num = 0.
+    correct_1_num = 0
+    correct_5_num = 0
+    for batch_id, data in enumerate(eval_loader()):
+        # set input
+        img_np = np.array([tensor.numpy() for tensor in data[0]])
+        label_np = np.array([tensor.numpy() for tensor in data[1]])
+
+        input_handle.reshape(img_np.shape)
+        input_handle.copy_from_cpu(img_np)
+
+        # run
+        t1 = time.time()
+        predictor.run()
+        t2 = time.time()
+        cost_time += (t2 - t1)
+
+        output_data = output_handle.copy_to_cpu()
+
+        # calculate accuracy
+        for i in range(len(label_np)):
+            label = label_np[i][0]
+            result = output_data[i, :]
+            index = result.argsort()
+            total_num += 1
+            if index[-1] == label:
+                correct_1_num += 1
+            if label in index[-5:]:
+                correct_5_num += 1
+
+        if batch_id % 10 == 0:
+            acc1 = correct_1_num / total_num
+            acc5 = correct_5_num / total_num
+            avg_time = cost_time / total_num
+            print(
+                "batch_id {}, acc1 {:.3f}, acc5 {:.3f}, avg time {:.5f} sec/img".
+                format(batch_id, acc1, acc5, avg_time))
+
+        if args.test_samples > 0 and \
+            (batch_id + 1)* args.batch_size >= args.test_samples:
+            break
+
+    acc1 = correct_1_num / total_num
+    acc5 = correct_5_num / total_num
+    avg_time = cost_time / total_num
+    print("End test: test image {}".format(total_num))
+    print("test_acc1 {:.4f}, test_acc5 {:.4f}, avg time {:.5f} sec/img".format(
+        acc1, acc5, avg_time))
+    print("\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    add_arg = functools.partial(add_arguments, argparser=parser)
+    add_arg('model_path', str, "", "The inference model path.")
+    add_arg('model_filename', str, "model.pdmodel", "model filename")
+    add_arg('params_filename', str, "model.pdiparams", "params filename")
+    add_arg('data_dir', str, "/dataset/ILSVRC2012/",
+            "The ImageNet dataset root dir.")
+    add_arg('test_samples', int, -1,
+            "Test samples. If set -1, use all test samples")
+    add_arg('batch_size', int, 10, "Batch size.")
+    add_arg('use_gpu', bool, False, "Use gpu.")
+    add_arg('ir_optim', bool, False, "Enable ir optim.")
+
+    args = parser.parse_args()
+    print_arguments(args)
+
+    eval(args)
+
+
+if __name__ == '__main__':
+    main()
--- a/ce_tests/dygraph/qat/src/utility.py
+++ b/ce_tests/dygraph/qat/src/utility.py
@@ -20,6 +20,7 @@ import os
 import numpy as np
 import six
 import logging
+from distutils.util import strtobool


 def print_arguments(args):
@@ -54,7 +55,7 @@ def add_arguments(argname, type, default, help, argparser, **kwargs):
        add_argument("name", str, "Jonh", "User name.", parser)
        args = parser.parse_args()
    """
-    type = distutils.util.strtobool if type == bool else type
+    type = strtobool if type == bool else type
    argparser.add_argument(
        "--" + argname,
        default=default,