diff --git a/demo/infer_cfg.yml b/demo/infer_cfg.yml new file mode 100644 index 0000000000000000000000000000000000000000..99f1d63fa6b3159924ac781f488e9d12ee1b2192 --- /dev/null +++ b/demo/infer_cfg.yml @@ -0,0 +1,48 @@ +draw_threshold: 0.5 +use_python_inference: false +mode: fluid +metric: VOC +arch: YOLO +min_subgraph_size: 3 +with_background: false +Preprocess: +- interp: 2 + max_size: 0 + target_size: 608 + type: Resize + use_cv2: true +- is_channel_first: false + is_scale: true + mean: + - 0.485 + - 0.456 + - 0.406 + std: + - 0.229 + - 0.224 + - 0.225 + type: Normalize +- channel_first: true + to_bgr: false + type: Permute +label_list: +- aeroplane +- bicycle +- bird +- boat +- bottle +- bus +- car +- cat +- chair +- cow +- diningtable +- dog +- horse +- motorbike +- person +- pottedplant +- sheep +- sofa +- train +- tvmonitor diff --git a/docs/advanced_tutorials/inference/EXPORT_MODEL.md b/docs/advanced_tutorials/inference/EXPORT_MODEL.md index 8cd73a4bffc03c384c8c9540593cc02293110d33..47c074e7ff595f262c7ad721401f8d0064e10d4f 100644 --- a/docs/advanced_tutorials/inference/EXPORT_MODEL.md +++ b/docs/advanced_tutorials/inference/EXPORT_MODEL.md @@ -1,7 +1,6 @@ # 模型导出 -训练得到一个满足要求的模型后,如果想要将该模型接入到C++预测库或者Serving服务,需要通过`tools/export_model.py`导出该模型。 - +训练得到一个满足要求的模型后,如果想要将该模型接入到C++预测库或者Serving服务,需要通过`tools/export_model.py`导出该模型。同时,会导出预测时使用的配置文件,路径与模型保存路径相同, 配置文件名为`infer_cfg.yml`。 **说明:** - 导出模型输入为网络输入图像,即原始图片经过预处理后的图像,具体预处理方式可参考配置文件中TestReader部分。各类检测模型的输入格式分别为: @@ -57,6 +56,6 @@ python tools/export_model.py -c configs/yolov3_darknet.yml \ # 导出SSD模型,输入是3x300x300 python tools/export_model.py -c configs/ssd/ssd_mobilenet_v1_voc.yml \ --output_dir=./inference_model \ - -o weights= https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar \ + -o weights=https://paddlemodels.bj.bcebos.com/object_detection/ssd_mobilenet_v1_voc.tar \ TestReader.inputs_def.image_shape=[3,300,300] ``` diff --git a/docs/advanced_tutorials/inference/INFERENCE.md b/docs/advanced_tutorials/inference/INFERENCE.md index c97db89d083b1f6b64bd478c1c8fd9991cbc8bf1..36505f98831d688a01db9846bb924bd1ef757b16 100644 --- a/docs/advanced_tutorials/inference/INFERENCE.md +++ b/docs/advanced_tutorials/inference/INFERENCE.md @@ -22,9 +22,10 @@ python tools/cpp_infer.py --model_path=inference_model/faster_rcnn_r50_1x/ --con - config_path: 参数配置、数据预处理配置文件,注意不是训练时的配置文件 - infer_img: 待预测图片 - visualize: 是否保存可视化结果,默认保存路径为```output/``` +- dump_result: 是否保存预测结果,保存格式为json文件,默认保存路径为```output/``` -更多参数可在```tools/cpp_demo.yml```中查看,主要参数: +更多参数可在```demo/infer_cfg.yml```中查看,主要参数: - use_python_inference: diff --git a/ppdet/core/config/yaml_helpers.py b/ppdet/core/config/yaml_helpers.py index 8a7738b47f4f86acde78ab8a3bcac590d61615fa..1545b6be72288a1faa82b662d3c409aa8fc84897 100644 --- a/ppdet/core/config/yaml_helpers.py +++ b/ppdet/core/config/yaml_helpers.py @@ -21,6 +21,15 @@ from .schema import SharedConfig __all__ = ['serializable', 'Callable'] +def represent_dictionary_order(self, dict_data): + return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items()) + + +def setup_orderdict(): + from collections import OrderedDict + yaml.add_representer(OrderedDict, represent_dictionary_order) + + def _make_python_constructor(cls): def python_constructor(loader, node): if isinstance(node, yaml.SequenceNode): diff --git a/tools/cpp_demo.yml b/tools/cpp_demo.yml deleted file mode 100644 index 7fac69840da252734156e9f47d75190a24e6be06..0000000000000000000000000000000000000000 --- a/tools/cpp_demo.yml +++ /dev/null @@ -1,29 +0,0 @@ -# demo for cpp_infer.py - -use_python_inference: true # whether to use python inference -mode: fluid # trt_fp32, trt_fp16, trt_int8, fluid -arch: RCNN # YOLO, SSD, RCNN, RetinaNet -min_subgraph_size: 40 # need 3 for YOLO arch - -# visualize the predicted image -metric: COCO # COCO, VOC -draw_threshold: 0.5 - -Preprocess: -- type: Resize - target_size: 640 - max_size: 640 -- type: Normalize - mean: - - 0.485 - - 0.456 - - 0.406 - std: - - 0.229 - - 0.224 - - 0.225 - is_scale: True -- type: Permute - to_bgr: False -- type: PadStride - stride: 0 # set 32 on FPN and 128 on RetinaNet diff --git a/tools/cpp_infer.py b/tools/cpp_infer.py index c7232897fbf45f746e43a778eb79d3039766c7ee..2dd2cce44a84a16086974dd2b1c200145fca5ee0 100644 --- a/tools/cpp_infer.py +++ b/tools/cpp_infer.py @@ -2,15 +2,11 @@ import os import time import numpy as np -from PIL import Image +from PIL import Image, ImageDraw import paddle.fluid as fluid import argparse -from ppdet.utils.visualizer import visualize_results, draw_bbox -from ppdet.utils.eval_utils import eval_results -import ppdet.utils.voc_eval as voc_eval -import ppdet.utils.coco_eval as coco_eval import cv2 import yaml import copy @@ -20,8 +16,6 @@ FORMAT = '%(asctime)s-%(levelname)s: %(message)s' logging.basicConfig(level=logging.INFO, format=FORMAT) logger = logging.getLogger(__name__) -eval_clses = {'COCO': coco_eval, 'VOC': voc_eval} - precision_map = { 'trt_int8': fluid.core.AnalysisConfig.Precision.Int8, 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32, @@ -34,6 +28,8 @@ def create_config(model_path, mode='fluid', batch_size=1, min_subgraph_size=3): params_file = os.path.join(model_path, '__params__') config = fluid.core.AnalysisConfig(model_file, params_file) config.enable_use_gpu(100, 0) + config.switch_use_feed_fetch_ops(False) + config.switch_specify_input_names(True) logger.info('min_subgraph_size = %d.' % (min_subgraph_size)) if mode in precision_map.keys(): @@ -60,6 +56,8 @@ def offset_to_lengths(lod): def DecodeImage(im_path): + assert os.path.exists(im_path), "Image path {} can not be found".format( + im_path) with open(im_path, 'rb') as f: im = f.read() data = np.frombuffer(im, dtype='uint8') @@ -102,17 +100,21 @@ def get_extra_info(im, arch, shape, scale): class Resize(object): - def __init__(self, target_size, max_size=0, interp=cv2.INTER_LINEAR): + def __init__(self, + target_size, + max_size=0, + interp=cv2.INTER_LINEAR, + use_cv2=True): super(Resize, self).__init__() self.target_size = target_size self.max_size = max_size self.interp = interp + self.use_cv2 = use_cv2 - def __call__(self, im, arch): + def __call__(self, im, use_trt=False): origin_shape = im.shape[:2] im_c = im.shape[2] - scale_set = {'RCNN', 'RetinaNet'} - if self.max_size != 0 and arch in scale_set: + if self.max_size != 0: im_size_min = np.min(origin_shape[0:2]) im_size_max = np.max(origin_shape[0:2]) im_scale = float(self.target_size) / float(im_size_min) @@ -125,15 +127,30 @@ class Resize(object): else: im_scale_x = float(self.target_size) / float(origin_shape[1]) im_scale_y = float(self.target_size) / float(origin_shape[0]) - im = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) + resize_w = self.target_size + resize_h = self.target_size + if self.use_cv2: + im = cv2.resize( + im, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) + else: + if self.max_size != 0: + raise TypeError( + 'If you set max_size to cap the maximum size of image,' + 'please set use_cv2 to True to resize the image.') + im = im.astype('uint8') + im = Image.fromarray(im) + im = im.resize((int(resize_w), int(resize_h)), self.interp) + im = np.array(im) # padding im - if self.max_size != 0 and arch in scale_set: + if self.max_size != 0 and use_trt: + logger.warning('Due to the limitation of tensorRT, padding the ' + 'image shape to {} * {}'.format(self.max_size, + self.max_size)) padding_im = np.zeros( (self.max_size, self.max_size, im_c), dtype=np.float32) im_h, im_w = im.shape[:2] @@ -143,27 +160,36 @@ class Resize(object): class Normalize(object): - def __init__(self, mean, std, is_scale=True): + def __init__(self, mean, std, is_scale=True, is_channel_first=False): super(Normalize, self).__init__() self.mean = mean self.std = std self.is_scale = is_scale + self.is_channel_first = is_channel_first def __call__(self, im): im = im.astype(np.float32, copy=False) + if self.is_channel_first: + mean = np.array(self.mean)[:, np.newaxis, np.newaxis] + std = np.array(self.std)[:, np.newaxis, np.newaxis] + else: + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] if self.is_scale: im = im / 255.0 - im -= self.mean - im /= self.std + im -= mean + im /= std return im class Permute(object): - def __init__(self, to_bgr=False): + def __init__(self, to_bgr=False, channel_first=True): self.to_bgr = to_bgr + self.channel_first = channel_first def __call__(self, im): - im = im.transpose((2, 0, 1)).copy() + if self.channel_first: + im = im.transpose((2, 0, 1)).copy() if self.to_bgr: im = im[[2, 1, 0], :, :] return im @@ -171,8 +197,9 @@ class Permute(object): class PadStride(object): def __init__(self, stride=0): - assert stride >= 0, "Unsupported stride: {}, the stride in PadStride must be greater or equal to 0".format( - stride) + assert stride >= 0, "Unsupported stride: {}," + " the stride in PadStride must be greater " + "or equal to 0".format(stride) self.coarsest_stride = stride def __call__(self, im): @@ -187,7 +214,7 @@ class PadStride(object): return padding_im -def Preprocess(img_path, arch, config): +def Preprocess(img_path, arch, config, use_trt): img = DecodeImage(img_path) orig_shape = img.shape scale = 1. @@ -197,7 +224,7 @@ def Preprocess(img_path, arch, config): obj = data_aug_conf.pop('type') preprocess = eval(obj)(**data_aug_conf) if obj == 'Resize': - img, scale = preprocess(img, arch) + img, scale = preprocess(img, use_trt) else: img = preprocess(img) @@ -208,6 +235,268 @@ def Preprocess(img_path, arch, config): return data +def get_category_info(with_background, label_list): + if label_list[0] != 'background' and with_background: + label_list.insert(0, 'background') + if label_list[0] == 'background' and not with_background: + label_list = label_list[1:] + clsid2catid = {i: i for i in range(len(label_list))} + catid2name = {i: name for i, name in enumerate(label_list)} + return clsid2catid, catid2name + + +def bbox2out(results, clsid2catid, is_bbox_normalized=False): + """ + Args: + results: request a dict, should include: `bbox`, `im_id`, + if is_bbox_normalized=True, also need `im_shape`. + clsid2catid: class id to category id map of COCO2017 dataset. + is_bbox_normalized: whether or not bbox is normalized. + """ + xywh_res = [] + for t in results: + bboxes = t['bbox'][0] + lengths = t['bbox'][1][0] + if bboxes.shape == (1, 1) or bboxes is None: + continue + + k = 0 + for i in range(len(lengths)): + num = lengths[i] + for j in range(num): + dt = bboxes[k] + clsid, score, xmin, ymin, xmax, ymax = dt.tolist() + catid = (clsid2catid[int(clsid)]) + + if is_bbox_normalized: + xmin, ymin, xmax, ymax = \ + clip_bbox([xmin, ymin, xmax, ymax]) + w = xmax - xmin + h = ymax - ymin + im_shape = t['im_shape'][0][i].tolist() + im_height, im_width = int(im_shape[0]), int(im_shape[1]) + xmin *= im_width + ymin *= im_height + w *= im_width + h *= im_height + else: + w = xmax - xmin + 1 + h = ymax - ymin + 1 + + bbox = [xmin, ymin, w, h] + coco_res = {'category_id': catid, 'bbox': bbox, 'score': score} + xywh_res.append(coco_res) + k += 1 + return xywh_res + + +def expand_boxes(boxes, scale): + """ + Expand an array of boxes by a given scale. + """ + w_half = (boxes[:, 2] - boxes[:, 0]) * .5 + h_half = (boxes[:, 3] - boxes[:, 1]) * .5 + x_c = (boxes[:, 2] + boxes[:, 0]) * .5 + y_c = (boxes[:, 3] + boxes[:, 1]) * .5 + + w_half *= scale + h_half *= scale + + boxes_exp = np.zeros(boxes.shape) + boxes_exp[:, 0] = x_c - w_half + boxes_exp[:, 2] = x_c + w_half + boxes_exp[:, 1] = y_c - h_half + boxes_exp[:, 3] = y_c + h_half + + return boxes_exp + + +def mask2out(results, clsid2catid, resolution, thresh_binarize=0.5): + import pycocotools.mask as mask_util + scale = (resolution + 2.0) / resolution + + segm_res = [] + + for t in results: + bboxes = t['bbox'][0] + lengths = t['bbox'][1][0] + if bboxes.shape == (1, 1) or bboxes is None: + continue + if len(bboxes.tolist()) == 0: + continue + masks = t['mask'][0] + + s = 0 + # for each sample + for i in range(len(lengths)): + num = lengths[i] + im_shape = t['im_shape'][i] + + bbox = bboxes[s:s + num][:, 2:] + clsid_scores = bboxes[s:s + num][:, 0:2] + mask = masks[s:s + num] + s += num + + im_h = int(im_shape[0]) + im_w = int(im_shape[1]) + + expand_bbox = expand_boxes(bbox, scale) + expand_bbox = expand_bbox.astype(np.int32) + + padded_mask = np.zeros( + (resolution + 2, resolution + 2), dtype=np.float32) + + for j in range(num): + xmin, ymin, xmax, ymax = expand_bbox[j].tolist() + clsid, score = clsid_scores[j].tolist() + clsid = int(clsid) + padded_mask[1:-1, 1:-1] = mask[j, clsid, :, :] + + catid = clsid2catid[clsid] + + w = xmax - xmin + 1 + h = ymax - ymin + 1 + w = np.maximum(w, 1) + h = np.maximum(h, 1) + + resized_mask = cv2.resize(padded_mask, (w, h)) + resized_mask = np.array( + resized_mask > thresh_binarize, dtype=np.uint8) + im_mask = np.zeros((im_h, im_w), dtype=np.uint8) + + x0 = min(max(xmin, 0), im_w) + x1 = min(max(xmax + 1, 0), im_w) + y0 = min(max(ymin, 0), im_h) + y1 = min(max(ymax + 1, 0), im_h) + + im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), ( + x0 - xmin):(x1 - xmin)] + segm = mask_util.encode( + np.array( + im_mask[:, :, np.newaxis], order='F'))[0] + catid = clsid2catid[clsid] + segm['counts'] = segm['counts'].decode('utf8') + coco_res = { + 'category_id': catid, + 'segmentation': segm, + 'score': score + } + segm_res.append(coco_res) + return segm_res + + +def color_map(num_classes): + color_map = num_classes * [0, 0, 0] + for i in range(0, num_classes): + j = 0 + lab = i + while lab: + color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j)) + color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)) + color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)) + j += 1 + lab >>= 3 + color_map = np.array(color_map).reshape(-1, 3) + return color_map + + +def draw_bbox(image, catid2name, bboxes, threshold, color_list): + """ + draw bbox on image + """ + draw = ImageDraw.Draw(image) + + for dt in np.array(bboxes): + catid, bbox, score = dt['category_id'], dt['bbox'], dt['score'] + if score < threshold: + continue + + xmin, ymin, w, h = bbox + xmax = xmin + w + ymax = ymin + h + + color = tuple(color_list[catid]) + + # draw bbox + draw.line( + [(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), + (xmin, ymin)], + width=2, + fill=color) + + # draw label + text = "{} {:.2f}".format(catid2name[catid], score) + tw, th = draw.textsize(text) + draw.rectangle( + [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color) + draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) + + return image + + +def draw_mask(image, masks, threshold, color_list, alpha=0.7): + """ + Draw mask on image + """ + mask_color_id = 0 + w_ratio = .4 + img_array = np.array(image).astype('float32') + for dt in np.array(masks): + segm, score = dt['segmentation'], dt['score'] + if score < threshold: + continue + import pycocotools.mask as mask_util + mask = mask_util.decode(segm) * 255 + color_mask = color_list[mask_color_id % len(color_list), 0:3] + mask_color_id += 1 + for c in range(3): + color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255 + idx = np.nonzero(mask) + img_array[idx[0], idx[1], :] *= 1.0 - alpha + img_array[idx[0], idx[1], :] += alpha * color_mask + return Image.fromarray(img_array.astype('uint8')) + + +def get_bbox_result(output, result, conf, clsid2catid): + is_bbox_normalized = True if 'SSD' in conf['arch'] else False + lengths = offset_to_lengths(output.lod()) + np_data = np.array(output) if conf[ + 'use_python_inference'] else output.copy_to_cpu() + result['bbox'] = (np_data, lengths) + result['im_id'] = np.array([[0]]) + + bbox_results = bbox2out([result], clsid2catid, is_bbox_normalized) + return bbox_results + + +def get_mask_result(output, result, conf, clsid2catid): + resolution = conf['mask_resolution'] + bbox_out, mask_out = output + lengths = offset_to_lengths(bbox_out.lod()) + bbox = np.array(bbox_out) if conf[ + 'use_python_inference'] else bbox_out.copy_to_cpu() + mask = np.array(mask_out) if conf[ + 'use_python_inference'] else mask_out.copy_to_cpu() + result['bbox'] = (bbox, lengths) + result['mask'] = (mask, lengths) + mask_results = mask2out([result], clsid2catid, conf['mask_resolution']) + return mask_results + + +def visualize(bbox_results, catid2name, num_classes, mask_results=None): + image = Image.open(FLAGS.infer_img).convert('RGB') + color_list = color_map(num_classes) + image = draw_bbox(image, catid2name, bbox_results, 0.5, color_list) + if mask_results is not None: + image = draw_mask(image, mask_results, 0.5, color_list) + image_path = os.path.split(FLAGS.infer_img)[-1] + if not os.path.exists(FLAGS.output_dir): + os.makedirs(FLAGS.output_dir) + out_path = os.path.join(FLAGS.output_dir, image_path) + image.save(out_path, quality=95) + logger.info('Save visualize result to {}'.format(out_path)) + + def infer(): model_path = FLAGS.model_path config_path = FLAGS.config_path @@ -219,7 +508,9 @@ def infer(): with open(config_path) as f: conf = yaml.safe_load(f) - img_data = Preprocess(FLAGS.infer_img, conf['arch'], conf['Preprocess']) + use_trt = not conf['use_python_inference'] and 'trt' in conf['mode'] + img_data = Preprocess(FLAGS.infer_img, conf['arch'], conf['Preprocess'], + use_trt) if 'SSD' in conf['arch']: img_data, res['im_shape'] = img_data img_data = [img_data] @@ -234,12 +525,15 @@ def infer(): params_filename='__params__') data_dict = {k: v for k, v in zip(feed_var_names, img_data)} else: - inputs = [fluid.core.PaddleTensor(d.copy()) for d in img_data] config = create_config( model_path, mode=conf['mode'], min_subgraph_size=conf['min_subgraph_size']) predict = fluid.core.create_paddle_predictor(config) + input_names = predict.get_input_names() + for ind, d in enumerate(img_data): + input_tensor = predict.get_input_tensor(input_names[ind]) + input_tensor.copy_from_cpu(d.copy()) logger.info('warmup...') for i in range(10): @@ -249,7 +543,7 @@ def infer(): fetch_list=fetch_targets, return_numpy=False) else: - outs = predict.run(inputs) + predict.zero_copy_run() cnt = 100 logger.info('run benchmark...') @@ -261,40 +555,40 @@ def infer(): fetch_list=fetch_targets, return_numpy=False) else: - outs = predict.run(inputs) + outs = [] + predict.zero_copy_run() + output_names = predict.get_output_names() + for o_name in output_names: + outs.append(predict.get_output_tensor(o_name)) t2 = time.time() ms = (t2 - t1) * 1000.0 / float(cnt) print("Inference: {} ms per batch image".format(ms)) - if FLAGS.visualize: - eval_cls = eval_clses[conf['metric']] - - with_background = conf['arch'] != 'YOLO' - clsid2catid, catid2name = eval_cls.get_category_info( - None, with_background, True) + clsid2catid, catid2name = get_category_info(conf['with_background'], + conf['label_list']) + bbox_result = get_bbox_result(outs[0], res, conf, clsid2catid) - is_bbox_normalized = True if 'SSD' in conf['arch'] else False + mask_result = None + if 'mask_resolution' in conf: + res['im_shape'] = img_data[-1] + mask_result = get_mask_result(outs, res, conf, clsid2catid) - out = outs[-1] - lod = out.lod() if conf['use_python_inference'] else out.lod - lengths = offset_to_lengths(lod) - np_data = np.array(out) if conf[ - 'use_python_inference'] else out.as_ndarray() - - res['bbox'] = (np_data, lengths) - res['im_id'] = np.array([[0]]) - - bbox_results = eval_cls.bbox2out([res], clsid2catid, is_bbox_normalized) + if FLAGS.visualize: + visualize(bbox_result, catid2name, len(conf['label_list']), mask_result) - image = Image.open(FLAGS.infer_img).convert('RGB') - image = draw_bbox(image, 0, catid2name, bbox_results, 0.5) - image_path = os.path.split(FLAGS.infer_img)[-1] - if not os.path.exists(FLAGS.output_dir): - os.makedirs(FLAGS.output_dir) - out_path = os.path.join(FLAGS.output_dir, image_path) - image.save(out_path, quality=95) + if FLAGS.dump_result: + import json + bbox_file = os.path.join(FLAGS.output_dir, 'bbox.json') + logger.info('dump bbox to {}'.format(bbox_file)) + with open(bbox_file, 'w') as f: + json.dump(bbox_result, f) + if mask_result is not None: + mask_file = os.path.join(FLAGS.output_dir, 'mask.json') + logger.info('dump mask to {}'.format(mask_file)) + with open(mask_file, 'w') as f: + json.dump(mask_result, f) if __name__ == '__main__': @@ -315,5 +609,10 @@ if __name__ == '__main__': type=str, default="output", help="Directory for storing the output visualization files.") + parser.add_argument( + "--dump_result", + action='store_true', + default=False, + help="Whether to dump result") FLAGS = parser.parse_args() infer() diff --git a/tools/export_model.py b/tools/export_model.py index 9e31aca5390ca7e76e12d90b91e57c57264e038b..e2e166f91066388e8f5fe296d3257f25f7f52822 100644 --- a/tools/export_model.py +++ b/tools/export_model.py @@ -23,13 +23,90 @@ from paddle import fluid from ppdet.core.workspace import load_config, merge_config, create from ppdet.utils.cli import ArgsParser import ppdet.utils.checkpoint as checkpoint - +import yaml import logging +from collections import OrderedDict FORMAT = '%(asctime)s-%(levelname)s: %(message)s' logging.basicConfig(level=logging.INFO, format=FORMAT) logger = logging.getLogger(__name__) +def parse_reader(reader_cfg, metric, arch): + preprocess_list = [] + + image_shape = reader_cfg['inputs_def'].get('image_shape', [None]) + has_shape_def = not None in image_shape + scale_set = {'RCNN', 'RetinaNet'} + + dataset = reader_cfg['dataset'] + anno_file = dataset.get_anno() + with_background = dataset.with_background + use_default_label = dataset.use_default_label + + if metric == 'COCO': + from ppdet.utils.coco_eval import get_category_info + if metric == "VOC": + from ppdet.utils.voc_eval import get_category_info + clsid2catid, catid2name = get_category_info(anno_file, with_background, + use_default_label) + label_list = [str(cat) for cat in catid2name.values()] + + sample_transforms = reader_cfg['sample_transforms'] + for st in sample_transforms[1:]: + method = st.__class__.__name__ + p = {'type': method.replace('Image', '')} + params = st.__dict__ + params.pop('_id') + if p['type'] == 'Resize' and has_shape_def: + params['target_size'] = image_shape[1] + params['max_size'] = image_shape[2] if arch in scale_set else 0 + + p.update(params) + preprocess_list.append(p) + batch_transforms = reader_cfg.get('batch_transforms', None) + if batch_transforms: + methods = [bt.__class__.__name__ for bt in batch_transforms] + for bt in batch_transforms: + method = bt.__class__.__name__ + if method == 'PadBatch': + preprocess_list.append({'type': 'PadStride'}) + params = bt.__dict__ + preprocess_list[-1].update({'stride': params['pad_to_stride']}) + break + + return with_background, preprocess_list, label_list + + +def dump_infer_config(config): + cfg_name = os.path.basename(FLAGS.config).split('.')[0] + save_dir = os.path.join(FLAGS.output_dir, cfg_name) + from ppdet.core.config.yaml_helpers import setup_orderdict + setup_orderdict() + infer_cfg = OrderedDict({ + 'use_python_inference': False, + 'mode': 'fluid', + 'draw_threshold': 0.5, + 'metric': config['metric'] + }) + trt_min_subgraph = {'YOLO': 3, 'SSD': 40, 'RCNN': 40, 'RetinaNet': 40} + infer_arch = config['architecture'] + + for arch, min_subgraph_size in trt_min_subgraph.items(): + if arch in infer_arch: + infer_cfg['arch'] = arch + infer_cfg['min_subgraph_size'] = min_subgraph_size + break + + if 'Mask' in config['architecture']: + infer_cfg['mask_resolution'] = config['MaskHead']['resolution'] + infer_cfg['with_background'], infer_cfg['Preprocess'], infer_cfg[ + 'label_list'] = parse_reader(config['TestReader'], config['metric'], + infer_cfg['arch']) + yaml.dump(infer_cfg, open(os.path.join(save_dir, 'infer_cfg.yml'), 'w')) + logger.info("Export inference config file to {}".format( + os.path.join(save_dir, 'infer_cfg.yml'))) + + def prune_feed_vars(feeded_var_names, target_vars, prog): """ Filter out feed variables which are not in program, @@ -57,7 +134,8 @@ def save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog): cfg_name = os.path.basename(FLAGS.config).split('.')[0] save_dir = os.path.join(FLAGS.output_dir, cfg_name) feed_var_names = [var.name for var in feed_vars.values()] - target_vars = list(test_fetches.values()) + fetch_list = sorted(test_fetches.items(), key=lambda i: i[0]) + target_vars = [var[1] for var in fetch_list] feed_var_names = prune_feed_vars(feed_var_names, target_vars, infer_prog) logger.info("Export inference model to {}, input: {}, output: " "{}...".format(save_dir, feed_var_names, @@ -101,6 +179,7 @@ def main(): checkpoint.load_params(exe, infer_prog, cfg.weights) save_infer_model(FLAGS, exe, feed_vars, test_fetches, infer_prog) + dump_infer_config(cfg) if __name__ == '__main__': @@ -110,5 +189,6 @@ if __name__ == '__main__': type=str, default="output", help="Directory for storing the output model files.") + FLAGS = parser.parse_args() main()