infer.py

import os
import time
import numpy as np
import argparse
import functools
from PIL import Image
from PIL import ImageDraw

import paddle
import paddle.fluid as fluid
import reader
from pyramidbox import PyramidBox
from utility import add_arguments, print_arguments
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('use_gpu',          bool,  True,      "Whether use GPU.")
add_arg('use_pyramidbox',   bool,  False, "Whether use PyramidBox model.")
add_arg('confs_threshold',  float, 0.25,    "Confidence threshold to draw bbox.")
add_arg('image_path',       str,   '',        "The data root path.")
add_arg('model_dir',        str,   '',     "The model path.")
# yapf: enable


def draw_bounding_box_on_image(image_path, nms_out, confs_threshold):
    image = Image.open(image_path)
    draw = ImageDraw.Draw(image)
    for dt in nms_out:
        xmin, ymin, xmax, ymax, score = dt
        if score < confs_threshold:
            continue
        (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
        draw.line(
            [(left, top), (left, bottom), (right, bottom), (right, top),
             (left, top)],
            width=4,
            fill='red')
    image_name = image_path.split('/')[-1]
    image_class = image_path.split('/')[-2]
    print("image with bbox drawed saved as {}".format(image_name))
    image.save('./infer_results/' + image_class.encode('utf-8') + '/' +
               image_name.encode('utf-8'))


def write_to_txt(image_path, f, nms_out):
    image_name = image_path.split('/')[-1]
    image_class = image_path.split('/')[-2]
    f.write('{:s}\n'.format(
        image_class.encode('utf-8') + '/' + image_name.encode('utf-8')))
    f.write('{:d}\n'.format(nms_out.shape[0]))
    for dt in nms_out:
        xmin, ymin, xmax, ymax, score = dt
        f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (
            xmax - xmin + 1), (ymax - ymin + 1), score))
    print("image infer result saved {}".format(image_name[:-4]))


def get_round(x, loc):
    str_x = str(x)
    if '.' in str_x:
        len_after = len(str_x.split('.')[1])
        str_before = str_x.split('.')[0]
        str_after = str_x.split('.')[1]
        if len_after >= 3:
            str_final = str_before + '.' + str_after[0:loc]
            return float(str_final)
        else:
            return x


def bbox_vote(det):
    order = det[:, 4].ravel().argsort()[::-1]
    det = det[order, :]
    if det.shape[0] == 0:
        dets = np.array([[10, 10, 20, 20, 0.002]])
        det = np.empty(shape=[0, 5])
    while det.shape[0] > 0:
        # IOU
        area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
        xx1 = np.maximum(det[0, 0], det[:, 0])
        yy1 = np.maximum(det[0, 1], det[:, 1])
        xx2 = np.minimum(det[0, 2], det[:, 2])
        yy2 = np.minimum(det[0, 3], det[:, 3])
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        o = inter / (area[0] + area[:] - inter)

        # get needed merge det and delete these det
        merge_index = np.where(o >= 0.3)[0]
        det_accu = det[merge_index, :]
        det = np.delete(det, merge_index, 0)
        if merge_index.shape[0] <= 1:
            if det.shape[0] == 0:
                try:
                    dets = np.row_stack((dets, det_accu))
                except:
                    dets = det_accu
            continue
        det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
        max_score = np.max(det_accu[:, 4])
        det_accu_sum = np.zeros((1, 5))
        det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
                                      axis=0) / np.sum(det_accu[:, -1:])
        det_accu_sum[:, 4] = max_score
        try:
            dets = np.row_stack((dets, det_accu_sum))
        except:
            dets = det_accu_sum
    dets = dets[0:750, :]
    return dets


def image_preprocess(image):
    img = np.array(image)
    # HWC to CHW
    if len(img.shape) == 3:
        img = np.swapaxes(img, 1, 2)
        img = np.swapaxes(img, 1, 0)
    # RBG to BGR
    img = img[[2, 1, 0], :, :]
    img = img.astype('float32')
    img -= np.array(
        [104., 117., 123.])[:, np.newaxis, np.newaxis].astype('float32')
    img = img * 0.007843
    img = [img]
    img = np.array(img)
    return img


def detect_face(image, shrink):
    image_shape = [3, image.size[1], image.size[0]]
    num_classes = 2
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)

    if shrink != 1:
        image = image.resize((int(image_shape[2] * shrink),
                              int(image_shape[1] * shrink)), Image.ANTIALIAS)
        image_shape = [
            image_shape[0], int(image_shape[1] * shrink),
            int(image_shape[2] * shrink)
        ]
    print "image_shape:", image_shape
    img = image_preprocess(image)

    scope = fluid.core.Scope()
    main_program = fluid.Program()
    startup_program = fluid.Program()

    with fluid.scope_guard(scope):
        with fluid.unique_name.guard():
            with fluid.program_guard(main_program, startup_program):
                fetches = []
                network = PyramidBox(
                    image_shape,
                    num_classes,
                    sub_network=args.use_pyramidbox,
                    is_infer=True)
                infer_program, nmsed_out = network.infer(main_program)
                fetches = [nmsed_out]
                fluid.io.load_persistables(
                    exe, args.model_dir, main_program=main_program)

                detection, = exe.run(infer_program,
                                     feed={'image': img},
                                     fetch_list=fetches,
                                     return_numpy=False)
                detection = np.array(detection)
    # layout: xmin, ymin, xmax. ymax, score
    det_conf = detection[:, 1]
    det_xmin = image_shape[2] * detection[:, 2] / shrink
    det_ymin = image_shape[1] * detection[:, 3] / shrink
    det_xmax = image_shape[2] * detection[:, 4] / shrink
    det_ymax = image_shape[1] * detection[:, 5] / shrink

    det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))
    keep_index = np.where(det[:, 4] >= 0)[0]
    det = det[keep_index, :]
    return det


def flip_test(image, shrink):
    img = image.transpose(Image.FLIP_LEFT_RIGHT)
    det_f = detect_face(img, shrink)
    det_t = np.zeros(det_f.shape)
    # image.size: [width, height]
    det_t[:, 0] = image.size[0] - det_f[:, 2]
    det_t[:, 1] = det_f[:, 1]
    det_t[:, 2] = image.size[0] - det_f[:, 0]
    det_t[:, 3] = det_f[:, 3]
    det_t[:, 4] = det_f[:, 4]
    return det_t


def multi_scale_test(image, max_shrink):
    # shrink detecting and shrink only detect big face
    st = 0.5 if max_shrink >= 0.75 else 0.5 * max_shrink
    det_s = detect_face(image, st)
    index = np.where(
        np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1)
        > 30)[0]
    det_s = det_s[index, :]
    # enlarge one times
    bt = min(2, max_shrink) if max_shrink > 1 else (st + max_shrink) / 2
    det_b = detect_face(image, bt)

    # enlarge small image x times for small face
    if max_shrink > 2:
        bt *= 2
        while bt < max_shrink:
            det_b = np.row_stack((det_b, detect_face(image, bt)))
            bt *= 2
        det_b = np.row_stack((det_b, detect_face(image, max_shrink)))

    # enlarge only detect small face
    if bt > 1:
        index = np.where(
            np.minimum(det_b[:, 2] - det_b[:, 0] + 1,
                       det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
        det_b = det_b[index, :]
    else:
        index = np.where(
            np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
                       det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
        det_b = det_b[index, :]
    return det_s, det_b


def get_im_shrink(image_shape):
    max_shrink_v1 = (0x7fffffff / 577.0 /
                     (image_shape[1] * image_shape[2]))**0.5
    max_shrink_v2 = (
        (678 * 1024 * 2.0 * 2.0) / (image_shape[1] * image_shape[2]))**0.5
    max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3

    if max_shrink >= 1.5 and max_shrink < 2:
        max_shrink = max_shrink - 0.1
    elif max_shrink >= 2 and max_shrink < 3:
        max_shrink = max_shrink - 0.2
    elif max_shrink >= 3 and max_shrink < 4:
        max_shrink = max_shrink - 0.3
    elif max_shrink >= 4 and max_shrink < 5:
        max_shrink = max_shrink - 0.4
    elif max_shrink >= 5:
        max_shrink = max_shrink - 0.5

    print 'max_shrink = ', max_shrink
    shrink = max_shrink if max_shrink < 1 else 1
    print "shrink = ", shrink

    return shrink, max_shrink


def infer(args, batch_size, data_args):
    if not os.path.exists(args.model_dir):
        raise ValueError("The model path [%s] does not exist." %
                         (args.model_dir))

    infer_reader = paddle.batch(
        reader.test(data_args, file_list), batch_size=batch_size)

    for batch_id, img in enumerate(infer_reader()):
        image = img[0][0]
        image_path = img[0][1]

        # image.size: [width, height]
        image_shape = [3, image.size[1], image.size[0]]

        shrink, max_shrink = get_im_shrink(image_shape)

        det0 = detect_face(image, shrink)
        det1 = flip_test(image, shrink)
        [det2, det3] = multi_scale_test(image, max_shrink)
        det = np.row_stack((det0, det1, det2, det3))
        dets = bbox_vote(det)

        image_name = image_path.split('/')[-1]
        image_class = image_path.split('/')[-2]
        if not os.path.exists('./infer_results/' + image_class.encode('utf-8')):
            os.makedirs('./infer_results/' + image_class.encode('utf-8'))

        f = open('./infer_results/' + image_class.encode('utf-8') + '/' +
                 image_name.encode('utf-8')[:-4] + '.txt', 'w')
        write_to_txt(image_path, f, dets)
        # draw_bounding_box_on_image(image_path, dets, args.confs_threshold)
    print "Done"


if __name__ == '__main__':
    args = parser.parse_args()
    print_arguments(args)

    data_dir = 'data/WIDERFACE/WIDER_val/images/'
    file_list = 'label/val_gt_widerface.res'

    data_args = reader.Settings(
        data_dir=data_dir,
        mean_value=[104., 117., 123],
        apply_distort=False,
        apply_expand=False,
        ap_version='11point')
    infer(args, batch_size=1, data_args=data_args)