cpp_infer.py 10.4 KB
Newer Older
W
wangguanzhong 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
import os
import time

import numpy as np
from PIL import Image

import paddle.fluid as fluid

import argparse
from ppdet.utils.visualizer import visualize_results, draw_bbox
from ppdet.utils.eval_utils import eval_results
import ppdet.utils.voc_eval as voc_eval
import ppdet.utils.coco_eval as coco_eval
import cv2
import yaml
W
wangguanzhong 已提交
16
import copy
W
wangguanzhong 已提交
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80

import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)

eval_clses = {'COCO': coco_eval, 'VOC': voc_eval}

precision_map = {
    'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
    'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
    'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
}


def create_config(model_path, mode='fluid', batch_size=1, min_subgraph_size=3):
    model_file = os.path.join(model_path, '__model__')
    params_file = os.path.join(model_path, '__params__')
    config = fluid.core.AnalysisConfig(model_file, params_file)
    config.enable_use_gpu(100, 0)
    logger.info('min_subgraph_size = %d.' % (min_subgraph_size))

    if mode in precision_map.keys():
        config.enable_tensorrt_engine(
            workspace_size=1 << 30,
            max_batch_size=batch_size,
            min_subgraph_size=min_subgraph_size,
            precision_mode=precision_map[mode],
            use_static=False,
            use_calib_mode=mode == 'trt_int8')
        logger.info('Run inference by {}.'.format(mode))
    elif mode == 'fluid':
        logger.info('Run inference by Fluid FP32.')
    else:
        logger.fatal(
            'Wrong mode, only support trt_int8, trt_fp32, trt_fp16, fluid.')
    return config


def offset_to_lengths(lod):
    offset = lod[0]
    lengths = [offset[i + 1] - offset[i] for i in range(len(offset) - 1)]
    return [lengths]


def DecodeImage(im_path):
    with open(im_path, 'rb') as f:
        im = f.read()
    data = np.frombuffer(im, dtype='uint8')
    im = cv2.imdecode(data, 1)  # BGR mode, but need RGB mode
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    return im


def get_extra_info(im, arch, shape, scale):
    info = []
    input_shape = []
    im_shape = []
    logger.info('The architecture is {}'.format(arch))
    if 'YOLO' in arch:
        im_size = np.array([shape[:2]]).astype('int32')
        logger.info('Extra info: im_size')
        info.append(im_size)
    elif 'SSD' in arch:
W
wangguanzhong 已提交
81 82 83
        im_shape = np.array([shape[:2]]).astype('int32')
        logger.info('Extra info: im_shape')
        info.append([im_shape])
W
wangguanzhong 已提交
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
    elif 'RetinaNet' in arch:
        input_shape.extend(im.shape[2:])
        im_info = np.array([input_shape + [scale]]).astype('float32')
        logger.info('Extra info: im_info')
        info.append(im_info)
    elif 'RCNN' in arch:
        input_shape.extend(im.shape[2:])
        im_shape.extend(shape[:2])
        im_info = np.array([input_shape + [scale]]).astype('float32')
        im_shape = np.array([im_shape + [1.]]).astype('float32')
        logger.info('Extra info: im_info, im_shape')
        info.append(im_info)
        info.append(im_shape)
    else:
        logger.error(
            "Unsupported arch: {}, expect YOLO, SSD, RetinaNet and RCNN".format(
                arch))
    return info


class Resize(object):
    def __init__(self, target_size, max_size=0, interp=cv2.INTER_LINEAR):
        super(Resize, self).__init__()
        self.target_size = target_size
        self.max_size = max_size
        self.interp = interp

111
    def __call__(self, im, arch):
W
wangguanzhong 已提交
112 113
        origin_shape = im.shape[:2]
        im_c = im.shape[2]
114 115
        scale_set = {'RCNN', 'RetinaNet'}
        if self.max_size != 0 and arch in scale_set:
W
wangguanzhong 已提交
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
            im_size_min = np.min(origin_shape[0:2])
            im_size_max = np.max(origin_shape[0:2])
            im_scale = float(self.target_size) / float(im_size_min)
            if np.round(im_scale * im_size_max) > self.max_size:
                im_scale = float(self.max_size) / float(im_size_max)
            im_scale_x = im_scale
            im_scale_y = im_scale
            resize_w = int(im_scale_x * float(origin_shape[1]))
            resize_h = int(im_scale_y * float(origin_shape[0]))
        else:
            im_scale_x = float(self.target_size) / float(origin_shape[1])
            im_scale_y = float(self.target_size) / float(origin_shape[0])
        im = cv2.resize(
            im,
            None,
            None,
            fx=im_scale_x,
            fy=im_scale_y,
            interpolation=self.interp)
        # padding im
136
        if self.max_size != 0 and arch in scale_set:
W
wangguanzhong 已提交
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
            padding_im = np.zeros(
                (self.max_size, self.max_size, im_c), dtype=np.float32)
            im_h, im_w = im.shape[:2]
            padding_im[:im_h, :im_w, :] = im
            im = padding_im
        return im, im_scale_x


class Normalize(object):
    def __init__(self, mean, std, is_scale=True):
        super(Normalize, self).__init__()
        self.mean = mean
        self.std = std
        self.is_scale = is_scale

    def __call__(self, im):
        im = im.astype(np.float32, copy=False)
        if self.is_scale:
            im = im / 255.0
        im -= self.mean
        im /= self.std
        return im


class Permute(object):
    def __init__(self, to_bgr=False):
        self.to_bgr = to_bgr

    def __call__(self, im):
        im = im.transpose((2, 0, 1)).copy()
        if self.to_bgr:
            im = im[[2, 1, 0], :, :]
        return im


172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
class PadStride(object):
    def __init__(self, stride=0):
        assert stride >= 0, "Unsupported stride: {}, the stride in PadStride must be greater or equal to 0".format(
            stride)
        self.coarsest_stride = stride

    def __call__(self, im):
        coarsest_stride = self.coarsest_stride
        if coarsest_stride == 0:
            return im
        im_c, im_h, im_w = im.shape
        pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
        pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
        padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
        padding_im[:, :im_h, :im_w] = im
        return padding_im


W
wangguanzhong 已提交
190 191 192 193 194
def Preprocess(img_path, arch, config):
    img = DecodeImage(img_path)
    orig_shape = img.shape
    scale = 1.
    data = []
W
wangguanzhong 已提交
195 196
    data_config = copy.deepcopy(config)
    for data_aug_conf in data_config:
W
wangguanzhong 已提交
197 198 199
        obj = data_aug_conf.pop('type')
        preprocess = eval(obj)(**data_aug_conf)
        if obj == 'Resize':
200
            img, scale = preprocess(img, arch)
W
wangguanzhong 已提交
201 202 203 204 205 206 207 208 209 210 211 212 213
        else:
            img = preprocess(img)

    img = img[np.newaxis, :]  # N, C, H, W
    data.append(img)
    extra_info = get_extra_info(img, arch, orig_shape, scale)
    data += extra_info
    return data


def infer():
    model_path = FLAGS.model_path
    config_path = FLAGS.config_path
W
wangguanzhong 已提交
214
    res = {}
W
wangguanzhong 已提交
215 216 217 218 219 220 221 222
    assert model_path is not None, "Model path: {} does not exist!".format(
        model_path)
    assert config_path is not None, "Config path: {} does not exist!".format(
        config_path)
    with open(config_path) as f:
        conf = yaml.safe_load(f)

    img_data = Preprocess(FLAGS.infer_img, conf['arch'], conf['Preprocess'])
W
wangguanzhong 已提交
223 224 225
    if 'SSD' in conf['arch']:
        img_data, res['im_shape'] = img_data
        img_data = [img_data]
W
wangguanzhong 已提交
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247

    if conf['use_python_inference']:
        place = fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        infer_prog, feed_var_names, fetch_targets = fluid.io.load_inference_model(
            dirname=model_path,
            executor=exe,
            model_filename='__model__',
            params_filename='__params__')
        data_dict = {k: v for k, v in zip(feed_var_names, img_data)}
    else:
        inputs = [fluid.core.PaddleTensor(d.copy()) for d in img_data]
        config = create_config(
            model_path,
            mode=conf['mode'],
            min_subgraph_size=conf['min_subgraph_size'])
        predict = fluid.core.create_paddle_predictor(config)

    logger.info('warmup...')
    for i in range(10):
        if conf['use_python_inference']:
            outs = exe.run(infer_prog,
248
                           feed=data_dict,
W
wangguanzhong 已提交
249 250 251 252 253 254 255 256 257 258 259
                           fetch_list=fetch_targets,
                           return_numpy=False)
        else:
            outs = predict.run(inputs)

    cnt = 100
    logger.info('run benchmark...')
    t1 = time.time()
    for i in range(cnt):
        if conf['use_python_inference']:
            outs = exe.run(infer_prog,
260
                           feed=data_dict,
W
wangguanzhong 已提交
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
                           fetch_list=fetch_targets,
                           return_numpy=False)
        else:
            outs = predict.run(inputs)
    t2 = time.time()

    ms = (t2 - t1) * 1000.0 / float(cnt)

    print("Inference: {} ms per batch image".format(ms))

    if FLAGS.visualize:
        eval_cls = eval_clses[conf['metric']]

        with_background = conf['arch'] != 'YOLO'
        clsid2catid, catid2name = eval_cls.get_category_info(
            None, with_background, True)

        is_bbox_normalized = True if 'SSD' in conf['arch'] else False

        out = outs[-1]
        lod = out.lod() if conf['use_python_inference'] else out.lod
        lengths = offset_to_lengths(lod)
        np_data = np.array(out) if conf[
            'use_python_inference'] else out.as_ndarray()

        res['bbox'] = (np_data, lengths)
        res['im_id'] = np.array([[0]])

        bbox_results = eval_cls.bbox2out([res], clsid2catid, is_bbox_normalized)

        image = Image.open(FLAGS.infer_img).convert('RGB')
        image = draw_bbox(image, 0, catid2name, bbox_results, 0.5)
        image_path = os.path.split(FLAGS.infer_img)[-1]
        if not os.path.exists(FLAGS.output_dir):
            os.makedirs(FLAGS.output_dir)
        out_path = os.path.join(FLAGS.output_dir, image_path)
        image.save(out_path, quality=95)


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument(
        "--model_path", type=str, default=None, help="model path.")
    parser.add_argument(
        "--config_path", type=str, default=None, help="preprocess config path.")
    parser.add_argument(
        "--infer_img", type=str, default=None, help="Image path")
    parser.add_argument(
        "--visualize",
        action='store_true',
        default=False,
        help="Whether to visualize detection output")
    parser.add_argument(
        "--output_dir",
        type=str,
        default="output",
        help="Directory for storing the output visualization files.")
    FLAGS = parser.parse_args()
    infer()