From d8801ec0423e3989673308fbfda69bf282398648 Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Tue, 6 Apr 2021 15:33:20 +0800 Subject: [PATCH] update files (#2522) * update files * update for ci-travis --- .travis.yml | 2 +- ppdet/__init__.py | 2 +- ppdet/metrics/coco_utils.py | 2 +- ppdet/{py_op => metrics}/post_process.py | 0 ppdet/py_op/__init__.py | 1 - tools/x2coco.py | 449 +++++++++++++++++++++++ 6 files changed, 452 insertions(+), 4 deletions(-) rename ppdet/{py_op => metrics}/post_process.py (100%) delete mode 100644 ppdet/py_op/__init__.py create mode 100644 tools/x2coco.py diff --git a/.travis.yml b/.travis.yml index 91789d5f6..706e35f91 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ addons: before_install: - sudo pip install -U virtualenv pre-commit pip - docker pull paddlepaddle/paddle:latest - - git pull https://github.com/PaddlePaddle/PaddleDetection master + - git pull https://github.com/PaddlePaddle/PaddleDetection develop script: - exit_code=0 diff --git a/ppdet/__init__.py b/ppdet/__init__.py index 1d5bd814b..56b687dd9 100644 --- a/ppdet/__init__.py +++ b/ppdet/__init__.py @@ -13,4 +13,4 @@ # limitations under the License. from . import (core, data, engine, modeling, model_zoo, optimizer, metrics, - py_op, utils, slim) + utils, slim) diff --git a/ppdet/metrics/coco_utils.py b/ppdet/metrics/coco_utils.py index 9e45b1c2a..b2ae487e1 100644 --- a/ppdet/metrics/coco_utils.py +++ b/ppdet/metrics/coco_utils.py @@ -21,7 +21,7 @@ import sys import numpy as np import itertools -from ppdet.py_op.post_process import get_det_res, get_seg_res, get_solov2_segm_res +from ppdet.metrics.post_process import get_det_res, get_seg_res, get_solov2_segm_res from ppdet.metrics.map_utils import draw_pr_curve from ppdet.utils.logger import setup_logger diff --git a/ppdet/py_op/post_process.py b/ppdet/metrics/post_process.py similarity index 100% rename from ppdet/py_op/post_process.py rename to ppdet/metrics/post_process.py diff --git a/ppdet/py_op/__init__.py b/ppdet/py_op/__init__.py deleted file mode 100644 index d48118906..000000000 --- a/ppdet/py_op/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .post_process import * diff --git a/tools/x2coco.py b/tools/x2coco.py new file mode 100644 index 000000000..ef2f0d717 --- /dev/null +++ b/tools/x2coco.py @@ -0,0 +1,449 @@ +#!/usr/bin/env python +# coding: utf-8 +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import glob +import json +import os +import os.path as osp +import sys +import shutil +import xml.etree.ElementTree as ET +from tqdm import tqdm +import re + +import numpy as np +import PIL.ImageDraw + +label_to_num = {} +categories_list = [] +labels_list = [] + + +class MyEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + else: + return super(MyEncoder, self).default(obj) + + +def images_labelme(data, num): + image = {} + image['height'] = data['imageHeight'] + image['width'] = data['imageWidth'] + image['id'] = num + 1 + if '\\' in data['imagePath']: + image['file_name'] = data['imagePath'].split('\\')[-1] + else: + image['file_name'] = data['imagePath'].split('/')[-1] + return image + + +def images_cityscape(data, num, img_file): + image = {} + image['height'] = data['imgHeight'] + image['width'] = data['imgWidth'] + image['id'] = num + 1 + image['file_name'] = img_file + return image + + +def categories(label, labels_list): + category = {} + category['supercategory'] = 'component' + category['id'] = len(labels_list) + 1 + category['name'] = label + return category + + +def annotations_rectangle(points, label, image_num, object_num, label_to_num): + annotation = {} + seg_points = np.asarray(points).copy() + seg_points[1, :] = np.asarray(points)[2, :] + seg_points[2, :] = np.asarray(points)[1, :] + annotation['segmentation'] = [list(seg_points.flatten())] + annotation['iscrowd'] = 0 + annotation['image_id'] = image_num + 1 + annotation['bbox'] = list( + map(float, [ + points[0][0], points[0][1], points[1][0] - points[0][0], points[1][ + 1] - points[0][1] + ])) + annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3] + annotation['category_id'] = label_to_num[label] + annotation['id'] = object_num + 1 + return annotation + + +def annotations_polygon(height, width, points, label, image_num, object_num, + label_to_num): + annotation = {} + annotation['segmentation'] = [list(np.asarray(points).flatten())] + annotation['iscrowd'] = 0 + annotation['image_id'] = image_num + 1 + annotation['bbox'] = list(map(float, get_bbox(height, width, points))) + annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3] + annotation['category_id'] = label_to_num[label] + annotation['id'] = object_num + 1 + return annotation + + +def get_bbox(height, width, points): + polygons = points + mask = np.zeros([height, width], dtype=np.uint8) + mask = PIL.Image.fromarray(mask) + xy = list(map(tuple, polygons)) + PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1) + mask = np.array(mask, dtype=bool) + index = np.argwhere(mask == 1) + rows = index[:, 0] + clos = index[:, 1] + left_top_r = np.min(rows) + left_top_c = np.min(clos) + right_bottom_r = np.max(rows) + right_bottom_c = np.max(clos) + return [ + left_top_c, left_top_r, right_bottom_c - left_top_c, + right_bottom_r - left_top_r + ] + + +def deal_json(ds_type, img_path, json_path): + data_coco = {} + images_list = [] + annotations_list = [] + image_num = -1 + object_num = -1 + for img_file in os.listdir(img_path): + img_label = os.path.splitext(img_file)[0] + if img_file.split('.')[ + -1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']: + continue + label_file = osp.join(json_path, img_label + '.json') + print('Generating dataset from:', label_file) + image_num = image_num + 1 + with open(label_file) as f: + data = json.load(f) + if ds_type == 'labelme': + images_list.append(images_labelme(data, image_num)) + elif ds_type == 'cityscape': + images_list.append(images_cityscape(data, image_num, img_file)) + if ds_type == 'labelme': + for shapes in data['shapes']: + object_num = object_num + 1 + label = shapes['label'] + if label not in labels_list: + categories_list.append(categories(label, labels_list)) + labels_list.append(label) + label_to_num[label] = len(labels_list) + p_type = shapes['shape_type'] + if p_type == 'polygon': + points = shapes['points'] + annotations_list.append( + annotations_polygon(data['imageHeight'], data[ + 'imageWidth'], points, label, image_num, + object_num, label_to_num)) + + if p_type == 'rectangle': + (x1, y1), (x2, y2) = shapes['points'] + x1, x2 = sorted([x1, x2]) + y1, y2 = sorted([y1, y2]) + points = [[x1, y1], [x2, y2], [x1, y2], [x2, y1]] + annotations_list.append( + annotations_rectangle(points, label, image_num, + object_num, label_to_num)) + elif ds_type == 'cityscape': + for shapes in data['objects']: + object_num = object_num + 1 + label = shapes['label'] + if label not in labels_list: + categories_list.append(categories(label, labels_list)) + labels_list.append(label) + label_to_num[label] = len(labels_list) + points = shapes['polygon'] + annotations_list.append( + annotations_polygon(data['imgHeight'], data[ + 'imgWidth'], points, label, image_num, object_num, + label_to_num)) + data_coco['images'] = images_list + data_coco['categories'] = categories_list + data_coco['annotations'] = annotations_list + return data_coco + + +def voc_get_label_anno(ann_dir_path, ann_ids_path, labels_path): + with open(labels_path, 'r') as f: + labels_str = f.read().split() + labels_ids = list(range(1, len(labels_str) + 1)) + + with open(ann_ids_path, 'r') as f: + ann_ids = f.read().split() + ann_paths = [] + for aid in ann_ids: + if aid.endswith('xml'): + ann_path = os.path.join(ann_dir_path, aid) + else: + ann_path = os.path.join(ann_dir_path, aid + '.xml') + ann_paths.append(ann_path) + + return dict(zip(labels_str, labels_ids)), ann_paths + + +def voc_get_image_info(annotation_root, im_id): + filename = annotation_root.findtext('filename') + assert filename is not None + img_name = os.path.basename(filename) + + size = annotation_root.find('size') + width = float(size.findtext('width')) + height = float(size.findtext('height')) + + image_info = { + 'file_name': filename, + 'height': height, + 'width': width, + 'id': im_id + } + return image_info + + +def voc_get_coco_annotation(obj, label2id): + label = obj.findtext('name') + assert label in label2id, "label is not in label2id." + category_id = label2id[label] + bndbox = obj.find('bndbox') + xmin = float(bndbox.findtext('xmin')) + ymin = float(bndbox.findtext('ymin')) + xmax = float(bndbox.findtext('xmax')) + ymax = float(bndbox.findtext('ymax')) + assert xmax > xmin and ymax > ymin, "Box size error." + o_width = xmax - xmin + o_height = ymax - ymin + anno = { + 'area': o_width * o_height, + 'iscrowd': 0, + 'bbox': [xmin, ymin, o_width, o_height], + 'category_id': category_id, + 'ignore': 0, + } + return anno + + +def voc_xmls_to_cocojson(annotation_paths, label2id, output_dir, output_file): + output_json_dict = { + "images": [], + "type": "instances", + "annotations": [], + "categories": [] + } + bnd_id = 1 # bounding box start id + im_id = 0 + print('Start converting !') + for a_path in tqdm(annotation_paths): + # Read annotation xml + ann_tree = ET.parse(a_path) + ann_root = ann_tree.getroot() + + img_info = voc_get_image_info(ann_root, im_id) + output_json_dict['images'].append(img_info) + + for obj in ann_root.findall('object'): + ann = voc_get_coco_annotation(obj=obj, label2id=label2id) + ann.update({'image_id': im_id, 'id': bnd_id}) + output_json_dict['annotations'].append(ann) + bnd_id = bnd_id + 1 + im_id += 1 + + for label, label_id in label2id.items(): + category_info = {'supercategory': 'none', 'id': label_id, 'name': label} + output_json_dict['categories'].append(category_info) + output_file = os.path.join(output_dir, output_file) + with open(output_file, 'w') as f: + output_json = json.dumps(output_json_dict) + f.write(output_json) + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '--dataset_type', + help='the type of dataset, can be `voc`, `labelme` or `cityscape`') + parser.add_argument('--json_input_dir', help='input annotated directory') + parser.add_argument('--image_input_dir', help='image directory') + parser.add_argument( + '--output_dir', help='output dataset directory', default='./') + parser.add_argument( + '--train_proportion', + help='the proportion of train dataset', + type=float, + default=1.0) + parser.add_argument( + '--val_proportion', + help='the proportion of validation dataset', + type=float, + default=0.0) + parser.add_argument( + '--test_proportion', + help='the proportion of test dataset', + type=float, + default=0.0) + parser.add_argument( + '--voc_anno_dir', + help='In Voc format dataset, path to annotation files directory.', + type=str, + default=None) + parser.add_argument( + '--voc_anno_list', + help='In Voc format dataset, path to annotation files ids list.', + type=str, + default=None) + parser.add_argument( + '--voc_label_list', + help='In Voc format dataset, path to label list. The content of each line is a category.', + type=str, + default=None) + parser.add_argument( + '--voc_out_name', + type=str, + default='voc.json', + help='In Voc format dataset, path to output json file') + args = parser.parse_args() + try: + assert args.dataset_type in ['voc', 'labelme', 'cityscape'] + except AssertionError as e: + print( + 'Now only support the voc, cityscape dataset and labelme dataset!!') + os._exit(0) + + if args.dataset_type == 'voc': + assert args.voc_anno_dir and args.voc_anno_list and args.voc_label_list + label2id, ann_paths = voc_get_label_anno( + args.voc_anno_dir, args.voc_anno_list, args.voc_label_list) + voc_xmls_to_cocojson( + annotation_paths=ann_paths, + label2id=label2id, + output_dir=args.output_dir, + output_file=args.voc_out_name) + else: + try: + assert os.path.exists(args.json_input_dir) + except AssertionError as e: + print('The json folder does not exist!') + os._exit(0) + try: + assert os.path.exists(args.image_input_dir) + except AssertionError as e: + print('The image folder does not exist!') + os._exit(0) + try: + assert abs(args.train_proportion + args.val_proportion \ + + args.test_proportion - 1.0) < 1e-5 + except AssertionError as e: + print( + 'The sum of pqoportion of training, validation and test datase must be 1!' + ) + os._exit(0) + + # Allocate the dataset. + total_num = len(glob.glob(osp.join(args.json_input_dir, '*.json'))) + if args.train_proportion != 0: + train_num = int(total_num * args.train_proportion) + out_dir = args.output_dir + '/train' + if not os.path.exists(out_dir): + os.makedirs(out_dir) + else: + train_num = 0 + if args.val_proportion == 0.0: + val_num = 0 + test_num = total_num - train_num + out_dir = args.output_dir + '/test' + if args.test_proportion != 0.0 and not os.path.exists(out_dir): + os.makedirs(out_dir) + else: + val_num = int(total_num * args.val_proportion) + test_num = total_num - train_num - val_num + val_out_dir = args.output_dir + '/val' + if not os.path.exists(val_out_dir): + os.makedirs(val_out_dir) + test_out_dir = args.output_dir + '/test' + if args.test_proportion != 0.0 and not os.path.exists(test_out_dir): + os.makedirs(test_out_dir) + count = 1 + for img_name in os.listdir(args.image_input_dir): + if count <= train_num: + if osp.exists(args.output_dir + '/train/'): + shutil.copyfile( + osp.join(args.image_input_dir, img_name), + osp.join(args.output_dir + '/train/', img_name)) + else: + if count <= train_num + val_num: + if osp.exists(args.output_dir + '/val/'): + shutil.copyfile( + osp.join(args.image_input_dir, img_name), + osp.join(args.output_dir + '/val/', img_name)) + else: + if osp.exists(args.output_dir + '/test/'): + shutil.copyfile( + osp.join(args.image_input_dir, img_name), + osp.join(args.output_dir + '/test/', img_name)) + count = count + 1 + + # Deal with the json files. + if not os.path.exists(args.output_dir + '/annotations'): + os.makedirs(args.output_dir + '/annotations') + if args.train_proportion != 0: + train_data_coco = deal_json(args.dataset_type, + args.output_dir + '/train', + args.json_input_dir) + train_json_path = osp.join(args.output_dir + '/annotations', + 'instance_train.json') + json.dump( + train_data_coco, + open(train_json_path, 'w'), + indent=4, + cls=MyEncoder) + if args.val_proportion != 0: + val_data_coco = deal_json(args.dataset_type, + args.output_dir + '/val', + args.json_input_dir) + val_json_path = osp.join(args.output_dir + '/annotations', + 'instance_val.json') + json.dump( + val_data_coco, + open(val_json_path, 'w'), + indent=4, + cls=MyEncoder) + if args.test_proportion != 0: + test_data_coco = deal_json(args.dataset_type, + args.output_dir + '/test', + args.json_input_dir) + test_json_path = osp.join(args.output_dir + '/annotations', + 'instance_test.json') + json.dump( + test_data_coco, + open(test_json_path, 'w'), + indent=4, + cls=MyEncoder) + + +if __name__ == '__main__': + main() -- GitLab