From 7bb8630ed50b961a4fcfcb342bfc4fedc488e2bc Mon Sep 17 00:00:00 2001 From: SunAhong1993 <48579383+SunAhong1993@users.noreply.github.com> Date: Mon, 28 Oct 2019 16:51:38 +0800 Subject: [PATCH] add x2coco.py (#3788) * add x2coco.py * Update DATA.md * Update DATA_cn.md --- PaddleCV/PaddleDetection/docs/DATA.md | 10 ++- PaddleCV/PaddleDetection/docs/DATA_cn.md | 8 +- .../data/tools/{labelme2coco.py => x2coco.py} | 77 +++++++++++++------ 3 files changed, 66 insertions(+), 29 deletions(-) rename PaddleCV/PaddleDetection/ppdet/data/tools/{labelme2coco.py => x2coco.py} (74%) diff --git a/PaddleCV/PaddleDetection/docs/DATA.md b/PaddleCV/PaddleDetection/docs/DATA.md index be9048c0..ac2244f4 100644 --- a/PaddleCV/PaddleDetection/docs/DATA.md +++ b/PaddleCV/PaddleDetection/docs/DATA.md @@ -181,16 +181,18 @@ whole data pipeline is fully customizable through the yaml configuration files. #### Custom Datasets -- Option 1: Convert the dataset to COCO or VOC format. +- Option 1: Convert the dataset to COCO format. ```sh - # a small utility (`tools/labelme2coco.py`) is provided to convert - # Labelme-annotated dataset to COCO format. - python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/ + # a small utility (`tools/x2coco.py`) is provided to convert + # Labelme-annotated dataset or cityscape dataset to COCO format. + python ./ppdet/data/tools/x2coco.py --dataset_type labelme + --json_input_dir ./labelme_annos/ --image_input_dir ./labelme_imgs/ --output_dir ./cocome/ --train_proportion 0.8 --val_proportion 0.2 --test_proportion 0.0 + # --dataset_type: The data format which is need to be converted. Currently supported are: 'labelme' and 'cityscape' # --json_input_dir:The path of json files which are annotated by Labelme. # --image_input_dir:The path of images. # --output_dir:The path of coverted COCO dataset. diff --git a/PaddleCV/PaddleDetection/docs/DATA_cn.md b/PaddleCV/PaddleDetection/docs/DATA_cn.md index 57970169..e34ba8e8 100644 --- a/PaddleCV/PaddleDetection/docs/DATA_cn.md +++ b/PaddleCV/PaddleDetection/docs/DATA_cn.md @@ -165,15 +165,17 @@ coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1) ``` #### 如何使用自定义数据集? -- 选择1:将数据集转换为VOC格式或者COCO格式。 +- 选择1:将数据集转换为COCO格式。 ``` - # 在./tools/中提供了labelme2coco.py用于将labelme标注的数据集转换为COCO数据集 - python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/ + # 在./tools/中提供了x2coco.py用于将labelme标注的数据集或cityscape数据集转换为COCO数据集 + python ./ppdet/data/tools/x2coco.py --dataset_type labelme + --json_input_dir ./labelme_annos/ --image_input_dir ./labelme_imgs/ --output_dir ./cocome/ --train_proportion 0.8 --val_proportion 0.2 --test_proportion 0.0 + # --dataset_type:需要转换的数据格式,目前支持:’labelme‘和’cityscape‘ # --json_input_dir:使用labelme标注的json文件所在文件夹 # --image_input_dir:图像文件所在文件夹 # --output_dir:转换后的COCO格式数据集存放位置 diff --git a/PaddleCV/PaddleDetection/ppdet/data/tools/labelme2coco.py b/PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py similarity index 74% rename from PaddleCV/PaddleDetection/ppdet/data/tools/labelme2coco.py rename to PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py index 4cacd064..da8e4aef 100644 --- a/PaddleCV/PaddleDetection/ppdet/data/tools/labelme2coco.py +++ b/PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py @@ -44,7 +44,7 @@ def getbbox(self, points): return self.mask2box(mask) -def images(data, num): +def images_labelme(data, num): image = {} image['height'] = data['imageHeight'] image['width'] = data['imageWidth'] @@ -52,6 +52,14 @@ def images(data, num): image['file_name'] = data['imagePath'].split('/')[-1] return image +def images_cityscape(data, num, img_file): + image = {} + image['height'] = data['imgHeight'] + image['width'] = data['imgWidth'] + image['id'] = num + 1 + image['file_name'] = img_file + return image + def categories(label, labels_list): category = {} @@ -112,7 +120,7 @@ def get_bbox(height, width, points): ] -def deal_json(img_path, json_path): +def deal_json(ds_type, img_path, json_path): data_coco = {} label_to_num = {} images_list = [] @@ -120,34 +128,52 @@ def deal_json(img_path, json_path): annotations_list = [] labels_list = [] image_num = -1 + object_num = -1 for img_file in os.listdir(img_path): img_label = img_file.split('.')[0] + if img_file.split('.')[-1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']: + continue label_file = osp.join(json_path, img_label + '.json') print('Generating dataset from:', label_file) image_num = image_num + 1 with open(label_file) as f: data = json.load(f) - images_list.append(images(data, image_num)) - object_num = -1 - for shapes in data['shapes']: - object_num = object_num + 1 - label = shapes['label'] - if label not in labels_list: - categories_list.append(categories(label, labels_list)) - labels_list.append(label) - label_to_num[label] = len(labels_list) - points = shapes['points'] - p_type = shapes['shape_type'] - if p_type == 'polygon': - annotations_list.append( - annotations_polygon(data['imageHeight'], data[ - 'imageWidth'], points, label, image_num, object_num, label_to_num)) + if ds_type == 'labelme': + images_list.append(images_labelme(data, image_num)) + elif ds_type == 'cityscape': + images_list.append(images_cityscape(data, image_num, img_file)) + if ds_type == 'labelme': + for shapes in data['shapes']: + object_num = object_num + 1 + label = shapes['label'] + if label not in labels_list: + categories_list.append(categories(label, labels_list)) + labels_list.append(label) + label_to_num[label] = len(labels_list) + points = shapes['points'] + p_type = shapes['shape_type'] + if p_type == 'polygon': + annotations_list.append( + annotations_polygon(data['imageHeight'], data[ + 'imageWidth'], points, label, image_num, object_num, label_to_num)) - if p_type == 'rectangle': - points.append([points[0][0], points[1][1]]) - points.append([points[1][0], points[0][1]]) + if p_type == 'rectangle': + points.append([points[0][0], points[1][1]]) + points.append([points[1][0], points[0][1]]) + annotations_list.append( + annotations_rectangle(points, label, image_num, object_num, label_to_num)) + elif ds_type == 'cityscape': + for shapes in data['objects']: + object_num = object_num + 1 + label = shapes['label'] + if label not in labels_list: + categories_list.append(categories(label, labels_list)) + labels_list.append(label) + label_to_num[label] = len(labels_list) + points = shapes['polygon'] annotations_list.append( - annotations_rectangle(points, label, image_num, object_num, label_to_num)) + annotations_polygon(data['imgHeight'], data[ + 'imgWidth'], points, label, image_num, object_num, label_to_num)) data_coco['images'] = images_list data_coco['categories'] = categories_list data_coco['annotations'] = annotations_list @@ -157,6 +183,7 @@ def deal_json(img_path, json_path): def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('--dataset_type', help='the type of dataset') parser.add_argument('--json_input_dir', help='input annotated directory') parser.add_argument('--image_input_dir', help='image directory') parser.add_argument( @@ -177,6 +204,11 @@ def main(): type=float, default=0.0) args = parser.parse_args() + try: + assert args.dataset_type in ['labelme', 'cityscape'] + except AssertionError as e: + print('Now only support the cityscape dataset and labelme dataset!!') + os._exit(0) try: assert os.path.exists(args.json_input_dir) except AssertionError as e: @@ -234,7 +266,8 @@ def main(): if not os.path.exists(args.output_dir + '/annotations'): os.makedirs(args.output_dir + '/annotations') if args.train_proportion != 0: - train_data_coco = deal_json(args.output_dir + '/train', + train_data_coco = deal_json(args.dataset_type, + args.output_dir + '/train', args.json_input_dir) train_json_path = osp.join(args.output_dir + '/annotations', 'instance_train.json') -- GitLab