该示例内容为对[MobileNet-SSD](https://github.com/PaddlePaddle/models/tree/v1.4/PaddleCV/object_detection)的主干网络进行int8量化训练。

## 0. 目录

[1. 定义工具函数](http://10.255.142.41:8888/notebooks/slim/Mobile-SSD-Quantization.ipynb#1.-%E5%AE%9A%E4%B9%89%E5%B7%A5%E5%85%B7%E5%87%BD%E6%95%B0)

[2. 数据准备](http://10.255.142.41:8888/notebooks/slim/Mobile-SSD-Quantization.ipynb#2.-%E6%95%B0%E6%8D%AE%E5%87%86%E5%A4%87)

[3. 定义网络](http://10.255.142.41:8888/notebooks/slim/Mobile-SSD-Quantization.ipynb#3.-%E5%AE%9A%E4%B9%89%E7%BD%91%E7%BB%9C)

[4. 配置压缩任务](http://10.255.142.41:8888/notebooks/slim/Mobile-SSD-Quantization.ipynb#4.-%E9%85%8D%E7%BD%AE%E5%8E%8B%E7%BC%A9%E4%BB%BB%E5%8A%A1)

## 1. 定义工具函数

在[image_util.py](https://github.com/PaddlePaddle/models/blob/v1.4/PaddleCV/object_detection/image_util.py)中定义了图像数据预处理处理使用到的工具函数。

In [67]:
from PIL import Image, ImageEnhance, ImageDraw
from PIL import ImageFile
import numpy as np
import random
import math

ImageFile.LOAD_TRUNCATED_IMAGES = True  #otherwise IOError raised image file is truncated


class sampler():
    def __init__(self, max_sample, max_trial, min_scale, max_scale,
                 min_aspect_ratio, max_aspect_ratio, min_jaccard_overlap,
                 max_jaccard_overlap):
        self.max_sample = max_sample
        self.max_trial = max_trial
        self.min_scale = min_scale
        self.max_scale = max_scale
        self.min_aspect_ratio = min_aspect_ratio
        self.max_aspect_ratio = max_aspect_ratio
        self.min_jaccard_overlap = min_jaccard_overlap
        self.max_jaccard_overlap = max_jaccard_overlap


class bbox():
    def __init__(self, xmin, ymin, xmax, ymax):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax


def bbox_area(src_bbox):
    width = src_bbox.xmax - src_bbox.xmin
    height = src_bbox.ymax - src_bbox.ymin
    return width * height


def generate_sample(sampler):
    scale = np.random.uniform(sampler.min_scale, sampler.max_scale)
    aspect_ratio = np.random.uniform(sampler.min_aspect_ratio,
                                  sampler.max_aspect_ratio)
    aspect_ratio = max(aspect_ratio, (scale**2.0))
    aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))

    bbox_width = scale * (aspect_ratio**0.5)
    bbox_height = scale / (aspect_ratio**0.5)
    xmin_bound = 1 - bbox_width
    ymin_bound = 1 - bbox_height
    xmin = np.random.uniform(0, xmin_bound)
    ymin = np.random.uniform(0, ymin_bound)
    xmax = xmin + bbox_width
    ymax = ymin + bbox_height
    sampled_bbox = bbox(xmin, ymin, xmax, ymax)
    return sampled_bbox


def jaccard_overlap(sample_bbox, object_bbox):
    if sample_bbox.xmin >= object_bbox.xmax or \
            sample_bbox.xmax <= object_bbox.xmin or \
            sample_bbox.ymin >= object_bbox.ymax or \
            sample_bbox.ymax <= object_bbox.ymin:
        return 0
    intersect_xmin = max(sample_bbox.xmin, object_bbox.xmin)
    intersect_ymin = max(sample_bbox.ymin, object_bbox.ymin)
    intersect_xmax = min(sample_bbox.xmax, object_bbox.xmax)
    intersect_ymax = min(sample_bbox.ymax, object_bbox.ymax)
    intersect_size = (intersect_xmax - intersect_xmin) * (
        intersect_ymax - intersect_ymin)
    sample_bbox_size = bbox_area(sample_bbox)
    object_bbox_size = bbox_area(object_bbox)
    overlap = intersect_size / (
        sample_bbox_size + object_bbox_size - intersect_size)
    return overlap


def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels):
    if sampler.min_jaccard_overlap == 0 and sampler.max_jaccard_overlap == 0:
        return True
    for i in range(len(bbox_labels)):
        object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2],
                           bbox_labels[i][3], bbox_labels[i][4])
        overlap = jaccard_overlap(sample_bbox, object_bbox)
        if sampler.min_jaccard_overlap != 0 and \
                overlap < sampler.min_jaccard_overlap:
            continue
        if sampler.max_jaccard_overlap != 0 and \
                overlap > sampler.max_jaccard_overlap:
            continue
        return True
    return False


def generate_batch_samples(batch_sampler, bbox_labels):
    sampled_bbox = []
    index = []
    c = 0
    for sampler in batch_sampler:
        found = 0
        for i in range(sampler.max_trial):
            if found >= sampler.max_sample:
                break
            sample_bbox = generate_sample(sampler)
            if satisfy_sample_constraint(sampler, sample_bbox, bbox_labels):
                sampled_bbox.append(sample_bbox)
                found = found + 1
                index.append(c)
        c = c + 1
    return sampled_bbox


def clip_bbox(src_bbox):
    src_bbox.xmin = max(min(src_bbox.xmin, 1.0), 0.0)
    src_bbox.ymin = max(min(src_bbox.ymin, 1.0), 0.0)
    src_bbox.xmax = max(min(src_bbox.xmax, 1.0), 0.0)
    src_bbox.ymax = max(min(src_bbox.ymax, 1.0), 0.0)
    return src_bbox


def meet_emit_constraint(src_bbox, sample_bbox):
    center_x = (src_bbox.xmax + src_bbox.xmin) / 2
    center_y = (src_bbox.ymax + src_bbox.ymin) / 2
    if center_x >= sample_bbox.xmin and \
        center_x <= sample_bbox.xmax and \
        center_y >= sample_bbox.ymin and \
        center_y <= sample_bbox.ymax:
        return True
    return False


def transform_labels(bbox_labels, sample_bbox):
    proj_bbox = bbox(0, 0, 0, 0)
    sample_labels = []
    for i in range(len(bbox_labels)):
        sample_label = []
        object_bbox = bbox(bbox_labels[i][1], bbox_labels[i][2],
                           bbox_labels[i][3], bbox_labels[i][4])
        if not meet_emit_constraint(object_bbox, sample_bbox):
            continue
        sample_width = sample_bbox.xmax - sample_bbox.xmin
        sample_height = sample_bbox.ymax - sample_bbox.ymin
        proj_bbox.xmin = (object_bbox.xmin - sample_bbox.xmin) / sample_width
        proj_bbox.ymin = (object_bbox.ymin - sample_bbox.ymin) / sample_height
        proj_bbox.xmax = (object_bbox.xmax - sample_bbox.xmin) / sample_width
        proj_bbox.ymax = (object_bbox.ymax - sample_bbox.ymin) / sample_height
        proj_bbox = clip_bbox(proj_bbox)
        if bbox_area(proj_bbox) > 0:
            sample_label.append(bbox_labels[i][0])
            sample_label.append(float(proj_bbox.xmin))
            sample_label.append(float(proj_bbox.ymin))
            sample_label.append(float(proj_bbox.xmax))
            sample_label.append(float(proj_bbox.ymax))
            #sample_label.append(bbox_labels[i][5])
            sample_label = sample_label + bbox_labels[i][5:]
            sample_labels.append(sample_label)
    return sample_labels


def crop_image(img, bbox_labels, sample_bbox, image_width, image_height):
    sample_bbox = clip_bbox(sample_bbox)
    xmin = int(sample_bbox.xmin * image_width)
    xmax = int(sample_bbox.xmax * image_width)
    ymin = int(sample_bbox.ymin * image_height)
    ymax = int(sample_bbox.ymax * image_height)
    sample_img = img[ymin:ymax, xmin:xmax]
    sample_labels = transform_labels(bbox_labels, sample_bbox)
    return sample_img, sample_labels


def random_brightness(img, settings):
    prob = np.random.uniform(0, 1)
    if prob < settings._brightness_prob:
        delta = np.random.uniform(-settings._brightness_delta,
                               settings._brightness_delta) + 1
        img = ImageEnhance.Brightness(img).enhance(delta)
    return img


def random_contrast(img, settings):
    prob = np.random.uniform(0, 1)
    if prob < settings._contrast_prob:
        delta = np.random.uniform(-settings._contrast_delta,
                               settings._contrast_delta) + 1
        img = ImageEnhance.Contrast(img).enhance(delta)
    return img


def random_saturation(img, settings):
    prob = np.random.uniform(0, 1)
    if prob < settings._saturation_prob:
        delta = np.random.uniform(-settings._saturation_delta,
                               settings._saturation_delta) + 1
        img = ImageEnhance.Color(img).enhance(delta)
    return img


def random_hue(img, settings):
    prob = np.random.uniform(0, 1)
    if prob < settings._hue_prob:
        delta = np.random.uniform(-settings._hue_delta, settings._hue_delta)
        img_hsv = np.array(img.convert('HSV'))
        img_hsv[:, :, 0] = img_hsv[:, :, 0] + delta
        img = Image.fromarray(img_hsv, mode='HSV').convert('RGB')
    return img


def distort_image(img, settings):
    prob = np.random.uniform(0, 1)
    # Apply different distort order
    if prob > 0.5:
        img = random_brightness(img, settings)
        img = random_contrast(img, settings)
        img = random_saturation(img, settings)
        img = random_hue(img, settings)
    else:
        img = random_brightness(img, settings)
        img = random_saturation(img, settings)
        img = random_hue(img, settings)
        img = random_contrast(img, settings)
    return img


def expand_image(img, bbox_labels, img_width, img_height, settings):
    prob = np.random.uniform(0, 1)
    if prob < settings._expand_prob:
        if settings._expand_max_ratio - 1 >= 0.01:
            expand_ratio = np.random.uniform(1, settings._expand_max_ratio)
            height = int(img_height * expand_ratio)
            width = int(img_width * expand_ratio)
            h_off = math.floor(np.random.uniform(0, height - img_height))
            w_off = math.floor(np.random.uniform(0, width - img_width))
            expand_bbox = bbox(-w_off / img_width, -h_off / img_height,
                               (width - w_off) / img_width,
                               (height - h_off) / img_height)
            expand_img = np.ones((height, width, 3))
            expand_img = np.uint8(expand_img * np.squeeze(settings._img_mean))
            expand_img = Image.fromarray(expand_img)
            expand_img.paste(img, (int(w_off), int(h_off)))
            bbox_labels = transform_labels(bbox_labels, expand_bbox)
            return expand_img, bbox_labels, width, height
    return img, bbox_labels, img_width, img_height

class Settings(object):
    def __init__(self,
                 dataset=None,
                 data_dir=None,
                 label_file=None,
                 resize_h=300,
                 resize_w=300,
                 mean_value=[127.5, 127.5, 127.5],
                 apply_distort=True,
                 apply_expand=True,
                 ap_version='11point'):
        self._dataset = dataset
        self._ap_version = ap_version
        self._data_dir = data_dir
        if 'pascalvoc' in dataset:
            self._label_list = []
            label_fpath = os.path.join(data_dir, label_file)
            for line in open(label_fpath):
                self._label_list.append(line.strip())

        self._apply_distort = apply_distort
        self._apply_expand = apply_expand
        self._resize_height = resize_h
        self._resize_width = resize_w
        self._img_mean = np.array(mean_value)[:, np.newaxis, np.newaxis].astype(
            'float32')
        self._expand_prob = 0.5
        self._expand_max_ratio = 4
        self._hue_prob = 0.5
        self._hue_delta = 18
        self._contrast_prob = 0.5
        self._contrast_delta = 0.5
        self._saturation_prob = 0.5
        self._saturation_delta = 0.5
        self._brightness_prob = 0.5
        self._brightness_delta = 0.125

    @property
    def dataset(self):
        return self._dataset

    @property
    def ap_version(self):
        return self._ap_version

    @property
    def apply_distort(self):
        return self._apply_expand

    @property
    def apply_distort(self):
        return self._apply_distort

    @property
    def data_dir(self):
        return self._data_dir

    @data_dir.setter
    def data_dir(self, data_dir):
        self._data_dir = data_dir

    @property
    def label_list(self):
        return self._label_list

    @property
    def resize_h(self):
        return self._resize_height

    @property
    def resize_w(self):
        return self._resize_width

    @property
    def img_mean(self):
        return self._img_mean



## 2. 数据准备

### 2.1 下载数据
使用PaddlePaddle/models repo下提供的[数据下载脚本](https://github.com/PaddlePaddle/models/blob/release/1.4/PaddleCV/object_detection/data/coco/download.sh)下载数据。
或通过其它更便捷的方式准备数据

### 2.2 下载pretrained model

使用PaddlePaddle/models repo下提供的[下载脚本](https://github.com/PaddlePaddle/models/blob/release/1.4/PaddleCV/object_detection/pretrained/download_coco.sh)下载pretrained model.

### 2.3 设置数据变量

- args_image_shape：训练数据的shape, 格式为‘channel, height, width’
- args_mean_BGR: BGR三通道的均值
- args_data_dir： 数据存放的地址
- coco2014：使用的数据集名称，该变量主要影响了下文中data reader的表现。
- args_pretrained_model：pretrained model存放的地址。

代码如下：

In [68]:
args_image_shape = '3,300,300'
args_mean_BGR = '127.5,127.5,127.5'
args_data_dir = '/root/data/coco'
args_dataset = 'coco2014'
args_pretrained_model = 'pretrained/ssd_mobilenet_v1_coco/'
args_ap_version = '11point'


data_dir = args_data_dir
dataset = args_dataset
assert dataset in ['pascalvoc', 'coco2014', 'coco2017']

# for pascalvoc
label_file = 'label_list'
train_file_list = 'trainval.txt'
val_file_list = 'test.txt'

if dataset == 'coco2014':
    train_file_list = 'annotations/instances_train2014.json'
    val_file_list = 'annotations/instances_val2014.json'
elif dataset == 'coco2017':
    train_file_list = 'annotations/instances_train2017.json'
    val_file_list = 'annotations/instances_val2017.json'

mean_BGR = [float(m) for m in args_mean_BGR.split(",")]
image_shape = [int(m) for m in args_image_shape.split(",")]

data_args = Settings(
dataset=args_dataset,
data_dir=data_dir,
label_file=label_file,
resize_h=image_shape[1],
resize_w=image_shape[2],
mean_value=mean_BGR,
apply_distort=True,
apply_expand=True,
ap_version = args_ap_version)

### 2.4 准备data reader

在paddle中通过data reader为训练网络或测试网络提供数据，reader就是一个数据迭代产生器。

参考[reader.py](https://github.com/PaddlePaddle/models/blob/v1.4/PaddleCV/object_detection/reader.py)实现代码如下：

In [69]:
import xml.etree.ElementTree
import os
import time
import copy
import six
import math
import numpy as np
from PIL import Image
from PIL import ImageDraw
import paddle





def preprocess(img, bbox_labels, mode, settings):
    img_width, img_height = img.size
    sampled_labels = bbox_labels
    if mode == 'train':
        if settings._apply_distort:
            img = distort_image(img, settings)
        if settings._apply_expand:
            img, bbox_labels, img_width, img_height = expand_image(
                img, bbox_labels, img_width, img_height, settings)
        # sampling
        batch_sampler = []
        # hard-code here
        batch_sampler.append(
            sampler(1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0))
        batch_sampler.append(
            sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 0.0))
        batch_sampler.append(
            sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 0.0))
        batch_sampler.append(
            sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.5, 0.0))
        batch_sampler.append(
            sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.7, 0.0))
        batch_sampler.append(
            sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.9, 0.0))
        batch_sampler.append(
            sampler(1, 50, 0.3, 1.0, 0.5, 2.0, 0.0, 1.0))
        sampled_bbox = generate_batch_samples(batch_sampler,
                                                         bbox_labels)

        img = np.array(img)
        if len(sampled_bbox) > 0:
            idx = int(np.random.uniform(0, len(sampled_bbox)))
            img, sampled_labels = crop_image(
                img, bbox_labels, sampled_bbox[idx], img_width, img_height)

        img = Image.fromarray(img)
    img = img.resize((settings.resize_w, settings.resize_h), Image.ANTIALIAS)
    img = np.array(img)

    if mode == 'train':
        mirror = int(np.random.uniform(0, 2))
        if mirror == 1:
            img = img[:, ::-1, :]
            for i in six.moves.xrange(len(sampled_labels)):
                tmp = sampled_labels[i][1]
                sampled_labels[i][1] = 1 - sampled_labels[i][3]
                sampled_labels[i][3] = 1 - tmp
    # HWC to CHW
    if len(img.shape) == 3:
        img = np.swapaxes(img, 1, 2)
        img = np.swapaxes(img, 1, 0)
    # RBG to BGR
    img = img[[2, 1, 0], :, :]
    img = img.astype('float32')
    img -= settings.img_mean
    img = img * 0.007843
    return img, sampled_labels


def coco(settings, coco_api, file_list, mode, batch_size, shuffle, data_dir):
    from pycocotools.coco import COCO

    def reader():
        if mode == 'train' and shuffle:
            np.random.shuffle(file_list)
        batch_out = []
        for image in file_list:
            image_name = image['file_name']
            image_path = os.path.join(data_dir, image_name)
            if not os.path.exists(image_path):
                raise ValueError("%s is not exist, you should specify "
                                 "data path correctly." % image_path)
            im = Image.open(image_path)
            if im.mode == 'L':
                im = im.convert('RGB')
            im_width, im_height = im.size
            im_id = image['id']

            # layout: category_id | xmin | ymin | xmax | ymax | iscrowd
            bbox_labels = []
            annIds = coco_api.getAnnIds(imgIds=image['id'])
            anns = coco_api.loadAnns(annIds)
            for ann in anns:
                bbox_sample = []
                # start from 1, leave 0 to background
                bbox_sample.append(float(ann['category_id']))
                bbox = ann['bbox']
                xmin, ymin, w, h = bbox
                xmax = xmin + w
                ymax = ymin + h
                bbox_sample.append(float(xmin) / im_width)
                bbox_sample.append(float(ymin) / im_height)
                bbox_sample.append(float(xmax) / im_width)
                bbox_sample.append(float(ymax) / im_height)
                bbox_sample.append(float(ann['iscrowd']))
                bbox_labels.append(bbox_sample)
            im, sample_labels = preprocess(im, bbox_labels, mode, settings)
            sample_labels = np.array(sample_labels)
            if len(sample_labels) == 0: continue
            im = im.astype('float32')
            boxes = sample_labels[:, 1:5]
            lbls = sample_labels[:, 0].astype('int32')
            iscrowd = sample_labels[:, -1].astype('int32')
            if 'cocoMAP' in settings.ap_version:
                batch_out.append((im, boxes, lbls, iscrowd,
                                  [im_id, im_width, im_height]))
            else:
                batch_out.append((im, boxes, lbls, iscrowd))

            if len(batch_out) == batch_size:
                yield batch_out
                batch_out = []

        if mode == 'test' and len(batch_out) > 1:
            yield batch_out
            batch_out = []

    return reader


def pascalvoc(settings, file_list, mode, batch_size, shuffle):
    def reader():
        if mode == 'train' and shuffle:
            np.random.shuffle(file_list)
        batch_out = []
        cnt = 0
        for image in file_list:
            image_path, label_path = image.split()
            image_path = os.path.join(settings.data_dir, image_path)
            label_path = os.path.join(settings.data_dir, label_path)
            if not os.path.exists(image_path):
                raise ValueError("%s is not exist, you should specify "
                                 "data path correctly." % image_path)
            im = Image.open(image_path)
            if im.mode == 'L':
                im = im.convert('RGB')
            im_width, im_height = im.size

            # layout: label | xmin | ymin | xmax | ymax | difficult
            bbox_labels = []
            root = xml.etree.ElementTree.parse(label_path).getroot()
            for object in root.findall('object'):
                bbox_sample = []
                # start from 1
                bbox_sample.append(
                    float(settings.label_list.index(object.find('name').text)))
                bbox = object.find('bndbox')
                difficult = float(object.find('difficult').text)
                bbox_sample.append(float(bbox.find('xmin').text) / im_width)
                bbox_sample.append(float(bbox.find('ymin').text) / im_height)
                bbox_sample.append(float(bbox.find('xmax').text) / im_width)
                bbox_sample.append(float(bbox.find('ymax').text) / im_height)
                bbox_sample.append(difficult)
                bbox_labels.append(bbox_sample)
            im, sample_labels = preprocess(im, bbox_labels, mode, settings)
            sample_labels = np.array(sample_labels)
            if len(sample_labels) == 0: continue
            im = im.astype('float32')
            boxes = sample_labels[:, 1:5]
            lbls = sample_labels[:, 0].astype('int32')
            difficults = sample_labels[:, -1].astype('int32')

            batch_out.append((im, boxes, lbls, difficults))
            if len(batch_out) == batch_size:
                yield batch_out
                cnt += len(batch_out)
                batch_out = []

        if mode == 'test' and len(batch_out) > 1:
            yield batch_out
            cnt += len(batch_out)
            batch_out = []

    return reader


def train_data_reader(settings,
          file_list,
          batch_size,
          shuffle=True,
          num_workers=8,
          enable_ce=False):
    file_path = os.path.join(settings.data_dir, file_list)
    readers = []
    if 'coco' in settings.dataset:
        # cocoapi
        from pycocotools.coco import COCO
        coco_api = COCO(file_path)
        image_ids = coco_api.getImgIds()
        images = coco_api.loadImgs(image_ids)
        n = int(math.ceil(len(images) // num_workers))
        image_lists = [images[i:i + n] for i in range(0, len(images), n)]

        if '2014' in file_list:
            sub_dir = "train2014"
        elif '2017' in file_list:
            sub_dir = "train2017"
        data_dir = os.path.join(settings.data_dir, sub_dir)
        for l in image_lists:
            readers.append(
                coco(settings, coco_api, l, 'train', batch_size, shuffle,
                     data_dir))
    else:
        images = [line.strip() for line in open(file_path)]
        n = int(math.ceil(len(images) // num_workers))
        image_lists = [images[i:i + n] for i in range(0, len(images), n)]
        for l in image_lists:
            readers.append(pascalvoc(settings, l, 'train', batch_size, shuffle))

    return paddle.reader.multiprocess_reader(readers, False)


def test_data_reader(settings, file_list, batch_size):
    file_list = os.path.join(settings.data_dir, file_list)
    if 'coco' in settings.dataset:
        from pycocotools.coco import COCO
        coco_api = COCO(file_list)
        image_ids = coco_api.getImgIds()
        images = coco_api.loadImgs(image_ids)
        if '2014' in file_list:
            sub_dir = "val2014"
        elif '2017' in file_list:
            sub_dir = "val2017"
        data_dir = os.path.join(settings.data_dir, sub_dir)
        return coco(settings, coco_api, images, 'test', batch_size, False,
                    data_dir)
    else:
        image_list = [line.strip() for line in open(file_list)]
        return pascalvoc(settings, image_list, 'test', batch_size, False)


def infer(settings, image_path):
    def reader():
        if not os.path.exists(image_path):
            raise ValueError("%s is not exist, you should specify "
                             "data path correctly." % image_path)
        img = Image.open(image_path)
        if img.mode == 'L':
            img = im.convert('RGB')
        im_width, im_height = img.size
        img = img.resize((settings.resize_w, settings.resize_h),
                         Image.ANTIALIAS)
        img = np.array(img)
        # HWC to CHW
        if len(img.shape) == 3:
            img = np.swapaxes(img, 1, 2)
            img = np.swapaxes(img, 1, 0)
        # RBG to BGR
        img = img[[2, 1, 0], :, :]
        img = img.astype('float32')
        img -= settings.img_mean
        img = img * 0.007843
        return img

    return reader

在以上实现的reader中，通过调用`train_data_reader`函数返回一个数据迭代器, 以下代码示例简单展示了该迭代器如何使用：

In [70]:
#train_reader = train_data_reader(data_args,
#                                train_file_list,
#                                2,
#                                shuffle=False,
#                                num_workers=1)

#for i, data in enumerate(train_reader()):
#    print("len: {}".format(len(data)))
#    if i > 1:
#        break

## 3. 定义网络

MobileNet-V1-SSD网络结构如下：
![MobileNet-V1-SSD](https://images2017.cnblogs.com/blog/310015/201710/310015-20171008120716074-895532111.png)

在文件[mobilenet_ssd.py](https://github.com/PaddlePaddle/models/blob/v1.4/PaddleCV/object_detection/mobilenet_ssd.py)中定义了MobileNet-V1-SSD网络结构。

In [71]:
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr


def conv_bn(input,
            filter_size,
            num_filters,
            stride,
            padding,
            channels=None,
            num_groups=1,
            act='relu',
            use_cudnn=True,
            name=None):
    parameter_attr = ParamAttr(learning_rate=0.1, initializer=MSRA())
    conv = fluid.layers.conv2d(
        input=input,
        num_filters=num_filters,
        filter_size=filter_size,
        stride=stride,
        padding=padding,
        groups=num_groups,
        act=None,
        use_cudnn=use_cudnn,
        param_attr=parameter_attr,
        bias_attr=False)
    return fluid.layers.batch_norm(input=conv, act=act)


def depthwise_separable(input, num_filters1, num_filters2, num_groups, stride,
                        scale):
    depthwise_conv = conv_bn(
        input=input,
        filter_size=3,
        num_filters=int(num_filters1 * scale),
        stride=stride,
        padding=1,
        num_groups=int(num_groups * scale),
        use_cudnn=False)

    pointwise_conv = conv_bn(
        input=depthwise_conv,
        filter_size=1,
        num_filters=int(num_filters2 * scale),
        stride=1,
        padding=0)
    return pointwise_conv


def extra_block(input, num_filters1, num_filters2, num_groups, stride, scale):
    # 1x1 conv
    pointwise_conv = conv_bn(
        input=input,
        filter_size=1,
        num_filters=int(num_filters1 * scale),
        stride=1,
        num_groups=int(num_groups * scale),
        padding=0)

    # 3x3 conv
    normal_conv = conv_bn(
        input=pointwise_conv,
        filter_size=3,
        num_filters=int(num_filters2 * scale),
        stride=2,
        num_groups=int(num_groups * scale),
        padding=1)
    return normal_conv


def mobile_net(num_classes, img, img_shape, scale=1.0):
    # 300x300
    tmp = conv_bn(img, 3, int(32 * scale), 2, 1, 3)
    # 150x150
    tmp = depthwise_separable(tmp, 32, 64, 32, 1, scale)
    tmp = depthwise_separable(tmp, 64, 128, 64, 2, scale)
    # 75x75
    tmp = depthwise_separable(tmp, 128, 128, 128, 1, scale)
    tmp = depthwise_separable(tmp, 128, 256, 128, 2, scale)
    # 38x38
    tmp = depthwise_separable(tmp, 256, 256, 256, 1, scale)
    tmp = depthwise_separable(tmp, 256, 512, 256, 2, scale)

    # 19x19
    for i in range(5):
        tmp = depthwise_separable(tmp, 512, 512, 512, 1, scale)
    module11 = tmp
    tmp = depthwise_separable(tmp, 512, 1024, 512, 2, scale)

    # 10x10
    module13 = depthwise_separable(tmp, 1024, 1024, 1024, 1, scale)
    module14 = extra_block(module13, 256, 512, 1, 2, scale)
    # 5x5
    module15 = extra_block(module14, 128, 256, 1, 2, scale)
    # 3x3
    module16 = extra_block(module15, 128, 256, 1, 2, scale)
    # 2x2
    module17 = extra_block(module16, 64, 128, 1, 2, scale)

    mbox_locs, mbox_confs, box, box_var = fluid.layers.multi_box_head(
        inputs=[module11, module13, module14, module15, module16, module17],
        image=img,
        num_classes=num_classes,
        min_ratio=20,
        max_ratio=90,
        min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
        max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
        aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]],
        base_size=img_shape[2],
        offset=0.5,
        flip=True)

    return mbox_locs, mbox_confs, box, box_var

## 4. 配置压缩任务

### 4.1 配置int8量化压缩策略

现阶段的量化训练主要针对卷积层（包括二维卷积和Depthwise卷积）以及全连接层进行量化。卷积层和全连接层在PaddlePaddle框架中对应算子包括conv2d、depthwise_conv2d和mul等。量化训练会对所有的conv2d、depthwise_conv2d和mul进行量化操作，且要求它们的输入中必须包括激活和参数两部分。
int8量化训练策略目前可配置的参数如下：

- **class:** 量化策略的类名称，目前仅支持QuantizationStrategy

- **start_epoch:** 在start_epoch开始之前，量化训练策略会往train_program和eval_program插入量化operators和反量化operators. 从start_epoch开始，进入量化训练阶段。

- **end_epoch:** 在end_epoch结束之后，会保存用户指定格式的模型。注意：end_epoch之后并不会停止量化训练，而是继续训练到compressor.epoch为止。

- **float_model_save_path:** 保存float数据格式模型的路径。模型weight的实际大小在int8可表示范围内，但是是以float格式存储的。如果设置为None, 则不存储float格式的模型。默认为None.

- **int8_model_save_path:** 保存int8数据格式模型的路径。如果设置为None, 则不存储int8格式的模型。默认为None.

- **mobile_model_save_path:** 保存兼容paddle-mobile框架的模型的路径。如果设置为None, 则不存储mobile格式的模型。默认为None.

- **weight_bits:** 量化weight的bit数，bias不会被量化。

- **activation_bits:** 量化activation的bit数。

- **weight_quantize_type:** 对于weight的量化方式，目前支持'abs_max'， 'channel_wise_abs_max'.

- **activation_quantize_type:** 对activation的量化方法，目前可选abs_max或range_abs_max。abs_max意为在训练的每个step和inference阶段动态的计算量化范围。range_abs_max意为在训练阶段计算出一个静态的范围，并将其用于inference阶段。

- **save_in_nodes:** variable名称列表。在保存量化后模型的时候，需要根据save_in_nodes对eval programg 网络进行前向遍历剪枝。默认为eval_feed_list内指定的variable的名称列表。

- **save_out_nodes:** varibale名称列表。在保存量化后模型的时候，需要根据save_out_nodes对eval programg 网络进行回溯剪枝。默认为eval_fetch_list内指定的variable的名称列表。

其中`save_in_nodes`和`save_out_nodes`中的variable名称，可以在构建网络时，通过`print`打印。例如：


```

nmsed_out = fluid.layers.detection_output(
                        locs, confs, box, box_var, nms_threshold=0.45)
print(nms_out.name)

```

以下代码将配置文件写到当前工作路径下：

In [72]:
config="""
version: 1.0
strategies:
    quantization_strategy:
        class: 'QuantizationStrategy'
        start_epoch: 0
        end_epoch: 20
        float_model_save_path: './output/float'
        mobile_model_save_path: './output/mobile'
        int8_model_save_path: './output/int8'
        weight_bits: 8
        activation_bits: 8
        weight_quantize_type: 'abs_max'
        activation_quantize_type: 'abs_max'
        save_in_nodes: ['image']
        save_out_nodes: ['inferenceinferencedetection_output_0.tmp_0']
compressor:
    epoch: 2
    checkpoint_path: './checkpoints/'
    strategies:
        - quantization_strategy
"""

f = open("./compress.yaml", 'w')
f.write(config)
f.close()

### 4.2 配置Compressor

压缩目标即我们要压缩的网络，我们需要为其准一下内容：
#### 4.2.1 准备工作
##### step1: import

In [73]:
import os
import time
import numpy as np
import argparse
import functools
import shutil
import math
import multiprocessing

import paddle
import paddle.fluid as fluid
from paddle.fluid.contrib.slim import Compressor

##### step2: 设置训练相关参数

In [74]:
args_learning_rate= 0.001
args_batch_size = 32
args_epoc_num = 120
args_use_gpu = False
args_parallel = True

args_model_save_dir = 'model'


train_parameters = {
    "pascalvoc": {
        "train_images": 16551,
        "image_shape": [3, 300, 300],
        "class_num": 21,
        "batch_size": 64,
        "lr": 0.001,
        "lr_epochs": [40, 60, 80, 100],
        "lr_decay": [1, 0.5, 0.25, 0.1, 0.01],
        "ap_version": '11point',
    },
    "coco2014": {
        "train_images": 82783,
        "image_shape": [3, 300, 300],
        "class_num": 91,
        "batch_size": 64,
        "lr": 0.001,
        "lr_epochs": [12, 19],
        "lr_decay": [1, 0.5, 0.25],
        "ap_version": 'integral', # should use eval_coco_map.py to test model
    },
    "coco2017": {
        "train_images": 118287,
        "image_shape": [3, 300, 300],
        "class_num": 91,
        "batch_size": 64,
        "lr": 0.001,
        "lr_epochs": [12, 19],
        "lr_decay": [1, 0.5, 0.25],
        "ap_version": 'integral', # should use eval_coco_map.py to test model
    }
}

train_parameters[dataset]['image_shape'] = image_shape
train_parameters[dataset]['batch_size'] = args_batch_size
train_parameters[dataset]['lr'] = args_learning_rate
train_parameters[dataset]['epoc_num'] = args_epoc_num
train_parameters[dataset]['ap_version'] = args_ap_version

##### step3: 定义优化器生成函数

In [75]:
def optimizer_setting(train_params):
    batch_size = train_params["batch_size"]
    iters = train_params["train_images"] // batch_size
    lr = train_params["lr"]
    boundaries = [i * iters  for i in train_params["lr_epochs"]]
    values = [ i * lr for i in train_params["lr_decay"]]

    optimizer = fluid.optimizer.RMSProp(
        # learning_rate=fluid.layers.piecewise_decay(boundaries, values),
        learning_rate=0.1,
        regularization=fluid.regularizer.L2Decay(0.00005), )

    return optimizer

##### step4: 定义构建网络函数

>注意：Paddle1.5版本的压缩库才开始支持py_reader. 本步骤的示例代码用的是普通reader，而不是py_reader.
如果需要py_reader, 需要修改build_program函数。

In [76]:
def build_program(main_prog, startup_prog, train_params, is_train):
    image_shape = train_params['image_shape']
    class_num = train_params['class_num']
    ap_version = train_params['ap_version']
    outs = []
    with fluid.program_guard(main_prog, startup_prog):
        #py_reader = fluid.layers.py_reader(
        #    capacity=64,
        #    shapes=[[-1] + image_shape, [-1, 4], [-1, 1], [-1, 1]],
        #    lod_levels=[0, 1, 1, 1],
        #    dtypes=["float32", "float32", "int32", "int32"],
        #    use_double_buffer=True)
        with fluid.unique_name.guard():
            
            image = fluid.layers.data(name="image", shape=[-1]+image_shape, dtype="float32", lod_level=0)
            gt_box = fluid.layers.data(name="gt_box", shape=[-1, 4], dtype="float32", lod_level=1)
            gt_label = fluid.layers.data(name="gt_label", shape=[-1, 1], dtype="float32", lod_level=1)
            difficult = fluid.layers.data(name="difficult", shape=[-1, 1], dtype="float32", lod_level=1)
            fluid.layers.Print(image, message="image", summarize=10)
            fluid.layers.Print(gt_box, message="gt_box", summarize=10)
            fluid.layers.Print(gt_label, message="gt_label", summarize=10)
            fluid.layers.Print(difficult, message="difficult", summarize=10)
            #image, gt_box, gt_label, difficult = fluid.layers.read_file(py_reader)
            locs, confs, box, box_var = mobile_net(class_num, image, image_shape)
            gt_label.stop_gradient=True
            difficult.stop_gradient=True
            gt_box.stop_gradient=True
            if is_train:
                with fluid.unique_name.guard("train"):
                    
                    loss = fluid.layers.ssd_loss(locs, confs, gt_box, gt_label, box,
                        box_var)
                    loss = fluid.layers.reduce_sum(loss)
                    optimizer = optimizer_setting(train_parameters[dataset])
                    optimizer.minimize(loss)
                outs = ((image, gt_box, gt_label, difficult), loss, optimizer)
            else:
                with fluid.unique_name.guard("inference"):
                    nmsed_out = fluid.layers.detection_output(
                        locs, confs, box, box_var, nms_threshold=0.45)
                    
                    print("nmsed_out: {}".format(nmsed_out))
                    gt_label = fluid.layers.cast(x=gt_label, dtype=gt_box.dtype)
                    if difficult:
                        difficult = fluid.layers.cast(x=difficult, dtype=gt_box.dtype)
                        gt_label = fluid.layers.reshape(gt_label, [-1, 1])
                        difficult = fluid.layers.reshape(difficult, [-1, 1])
                        label = fluid.layers.concat([gt_label, difficult, gt_box], axis=1)
                    else:
                        label = fluid.layers.concat([gt_label, gt_box], axis=1)
                    map_var = fluid.layers.detection.detection_map(
                            nmsed_out,
                            label,
                            class_num,
                            background_label=0,
                            overlap_threshold=0.5,
                            evaluate_difficult=False,
                            ap_version=ap_version)
                    
                # nmsed_out and image is used to save mode for inference
                outs = ((image, gt_box, gt_label, difficult), map_var, nmsed_out, image)
    return outs

#### 4.2.2 配置Compressor

In [77]:
#def compress(args,
#          data_args,
#          train_params,
#          train_file_list,
#          val_file_list):

model_save_dir = args_model_save_dir
pretrained_model = args_pretrained_model
use_gpu = args_use_gpu
parallel = args_parallel
is_shuffle = True

##### 设置device信息，并根据device信息设置batch size

In [78]:
if not use_gpu:
    devices_num = int(os.environ.get('CPU_NUM',
                          multiprocessing.cpu_count()))
    devices_num = 1
else:
    devices_num = fluid.core.get_cuda_device_count()

batch_size = train_parameters[dataset]['batch_size']
batch_size_per_device = batch_size // devices_num    

##### 声明并构造train_program和test_program

- **train_program:** 用于在压缩过程中迭代训练模型，该program必须包含loss。一般改program不要有backward op和weights update op，否则不能使用蒸馏策略。

- **test_program:** 用于在压缩过程中评估模型的精度，一般会包含accuracy、IoU等评估指标的计算layer。

In [79]:

epoc_num = train_parameters[dataset]['epoc_num']

startup_prog = fluid.Program()
train_prog = fluid.Program()
test_prog = fluid.Program()

train_inputs, loss, optimizer = build_program(
        main_prog=train_prog,
        startup_prog=startup_prog,
        train_params=train_parameters[dataset],
        is_train=True)
    
test_inputs, map_var, _, _ = build_program(
        main_prog=test_prog,
        startup_prog=startup_prog,
        train_params=train_parameters[dataset],
        is_train=False)
    
test_prog = test_prog.clone(for_test=True) 

nmsed_out: name: "inferenceinferencedetection_output_0.tmp_0"
type {
  type: LOD_TENSOR
  lod_tensor {
    tensor {
      data_type: FP32
      dims: 1917
      dims: 6
    }
  }
}
persistable: false



#### 初始化网络并加载预训练模型

In [80]:
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_prog)
    
if pretrained_model:
    def if_exist(var):
        return os.path.exists(os.path.join(pretrained_model, var.name))
    fluid.io.load_vars(exe, pretrained_model, main_program=train_prog,
                           predicate=if_exist)

##### 构造data reader

- train_reader: 用于给train_program的执行提供数据

- eval_reader: 用于给eval_program的执行提供数据

In [105]:
num_workers = 8
train_reader = train_data_reader(data_args,
                                train_file_list,
                                batch_size_per_device,
                                shuffle=is_shuffle,
                                num_workers=num_workers)
test_reader = test_data_reader(data_args, val_file_list, batch_size)
    

##### 指定训练网络和测试网络的input和output

feed list和fetch list是两个有序的字典, 其中，feed_list中的key为自定义的有一定含义的字符串，value是Variable的名称, feed_list中的顺序需要和DataReader提供的数据的顺序对应。

对于train_program和test_program都需要有与其对应的feed_list和fetch_list。

>注意： 在train_program对应的fetch_list中，loss variable(loss layer的输出)对应的key一定要是‘‘loss’’

In [85]:
image, gt_box, gt_label, difficult = train_inputs
train_feed_list = [("image", "image"), ("gt_box", "gt_box"), ("gt_label", "gt_label"), ("difficult", "difficult")]
train_fetch_list=[("loss", loss.name)]

image, gt_box, gt_label, difficult = test_inputs
val_feed_list=[("image", "image"), ("gt_box", "gt_box"), ("gt_label", "gt_label"), ("difficult", "difficult")]
val_fetch_list=[("map",  map_var.name)]

### 4.3 构造并执行Compressor

In [104]:
com_pass = Compressor(
        place,
        fluid.global_scope(),
        train_prog,
        train_reader=train_reader,
        train_feed_list=train_feed_list,
        train_fetch_list=train_fetch_list,
        eval_program=test_prog,
        eval_reader=test_reader,
        eval_feed_list=val_feed_list,
        eval_fetch_list=val_fetch_list,
        train_optimizer=None)
com_pass.config('./compress.yaml')
eval_graph = com_pass.run()