# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import numpy as np import xml.etree.ElementTree as ET def get_roidb(anno_path, sample_num=-1, cname2cid=None, with_background=True): """ Load VOC records with annotations in xml directory 'anno_path' Notes: ${anno_path} must contains xml file and image file path for annotations Args: anno_path (str): root directory for voc annotation data sample_num (int): number of samples to load, -1 means all cname2cid (dict): the label name to id dictionary with_background (bool): whether load background as a class. if True, total class number will be 81. default True Returns: (records, catname2clsid) 'records' is list of dict whose structure is: { 'im_file': im_fname, # image file name 'im_id': im_id, # image id 'h': im_h, # height of image 'w': im_w, # width 'is_crowd': is_crowd, 'gt_class': gt_class, 'gt_bbox': gt_bbox, 'gt_poly': gt_poly, } 'cname2id' is a dict to map category name to class id """ data_dir = os.path.dirname(anno_path) records = [] ct = 0 existence = False if cname2cid is None else True if cname2cid is None: cname2cid = {} # mapping category name to class id # background:0, first_class:1, second_class:2, ... with open(anno_path, 'r') as fr: while True: line = fr.readline() if not line: break img_file, xml_file = [os.path.join(data_dir, x) \ for x in line.strip().split()[:2]] if not os.path.isfile(xml_file): continue tree = ET.parse(xml_file) if tree.find('id') is None: im_id = np.array([ct]) else: im_id = np.array([int(tree.find('id').text)]) objs = tree.findall('object') im_w = float(tree.find('size').find('width').text) im_h = float(tree.find('size').find('height').text) gt_bbox = np.zeros((len(objs), 4), dtype=np.float32) gt_class = np.zeros((len(objs), 1), dtype=np.int32) gt_score = np.ones((len(objs), 1), dtype=np.float32) is_crowd = np.zeros((len(objs), 1), dtype=np.int32) difficult = np.zeros((len(objs), 1), dtype=np.int32) for i, obj in enumerate(objs): cname = obj.find('name').text if not existence and cname not in cname2cid: # the background's id is 0, so need to add 1. cname2cid[cname] = len(cname2cid) + int(with_background) elif existence and cname not in cname2cid: raise KeyError( 'Not found cname[%s] in cname2cid when map it to cid.' % (cname)) gt_class[i][0] = cname2cid[cname] _difficult = int(obj.find('difficult').text) x1 = float(obj.find('bndbox').find('xmin').text) y1 = float(obj.find('bndbox').find('ymin').text) x2 = float(obj.find('bndbox').find('xmax').text) y2 = float(obj.find('bndbox').find('ymax').text) x1 = max(0, x1) y1 = max(0, y1) x2 = min(im_w - 1, x2) y2 = min(im_h - 1, y2) gt_bbox[i] = [x1, y1, x2, y2] is_crowd[i][0] = 0 difficult[i][0] = _difficult voc_rec = { 'im_file': img_file, 'im_id': im_id, 'h': im_h, 'w': im_w, 'is_crowd': is_crowd, 'gt_class': gt_class, 'gt_score': gt_score, 'gt_bbox': gt_bbox, 'gt_poly': [], 'difficult': difficult } if len(objs) != 0: records.append(voc_rec) ct += 1 if sample_num > 0 and ct >= sample_num: break assert len(records) > 0, 'not found any voc record in %s' % (anno_path) return [records, cname2cid] def load(anno_path, sample_num=-1, use_default_label=True, with_background=True): """ Load VOC records with annotations in xml directory 'anno_path' Notes: ${anno_path} must contains xml file and image file path for annotations Args: @anno_path (str): root directory for voc annotation data @sample_num (int): number of samples to load, -1 means all @use_default_label (bool): whether use the default mapping of label to id @with_background (bool): whether load background as a class. if True, total class number will be 81. default True Returns: (records, catname2clsid) 'records' is list of dict whose structure is: { 'im_file': im_fname, # image file name 'im_id': im_id, # image id 'h': im_h, # height of image 'w': im_w, # width 'is_crowd': is_crowd, 'gt_class': gt_class, 'gt_bbox': gt_bbox, 'gt_poly': gt_poly, } 'cname2id' is a dict to map category name to class id """ data_dir = os.path.dirname(anno_path) # mapping category name to class id # if with_background is True: # background:0, first_class:1, second_class:2, ... # if with_background is False: # first_class:0, second_class:1, ... records = [] ct = 0 cname2cid = {} if not use_default_label: label_path = os.path.join(data_dir, 'label_list.txt') with open(label_path, 'r') as fr: label_id = int(with_background) for line in fr.readlines(): cname2cid[line.strip()] = label_id label_id += 1 else: cname2cid = pascalvoc_label(with_background) with open(anno_path, 'r') as fr: while True: line = fr.readline() if not line: break img_file, xml_file = [os.path.join(data_dir, x) \ for x in line.strip().split()[:2]] if not os.path.isfile(xml_file): continue tree = ET.parse(xml_file) if tree.find('id') is None: im_id = np.array([ct]) else: im_id = np.array([int(tree.find('id').text)]) objs = tree.findall('object') im_w = float(tree.find('size').find('width').text) im_h = float(tree.find('size').find('height').text) gt_bbox = np.zeros((len(objs), 4), dtype=np.float32) gt_class = np.zeros((len(objs), 1), dtype=np.int32) gt_score = np.ones((len(objs), 1), dtype=np.float32) is_crowd = np.zeros((len(objs), 1), dtype=np.int32) difficult = np.zeros((len(objs), 1), dtype=np.int32) for i, obj in enumerate(objs): cname = obj.find('name').text gt_class[i][0] = cname2cid[cname] _difficult = int(obj.find('difficult').text) x1 = float(obj.find('bndbox').find('xmin').text) y1 = float(obj.find('bndbox').find('ymin').text) x2 = float(obj.find('bndbox').find('xmax').text) y2 = float(obj.find('bndbox').find('ymax').text) x1 = max(0, x1) y1 = max(0, y1) x2 = min(im_w - 1, x2) y2 = min(im_h - 1, y2) gt_bbox[i] = [x1, y1, x2, y2] is_crowd[i][0] = 0 difficult[i][0] = _difficult voc_rec = { 'im_file': img_file, 'im_id': im_id, 'h': im_h, 'w': im_w, 'is_crowd': is_crowd, 'gt_class': gt_class, 'gt_score': gt_score, 'gt_bbox': gt_bbox, 'gt_poly': [], 'difficult': difficult } if len(objs) != 0: records.append(voc_rec) ct += 1 if sample_num > 0 and ct >= sample_num: break assert len(records) > 0, 'not found any voc record in %s' % (anno_path) return [records, cname2cid] def pascalvoc_label(with_background=True): labels_map = { 'aeroplane': 1, 'bicycle': 2, 'bird': 3, 'boat': 4, 'bottle': 5, 'bus': 6, 'car': 7, 'cat': 8, 'chair': 9, 'cow': 10, 'diningtable': 11, 'dog': 12, 'horse': 13, 'motorbike': 14, 'person': 15, 'pottedplant': 16, 'sheep': 17, 'sofa': 18, 'train': 19, 'tvmonitor': 20 } if not with_background: labels_map = {k: v - 1 for k, v in labels_map.items()} return labels_map