From 7bb8630ed50b961a4fcfcb342bfc4fedc488e2bc Mon Sep 17 00:00:00 2001
From: SunAhong1993 <48579383+SunAhong1993@users.noreply.github.com>
Date: Mon, 28 Oct 2019 16:51:38 +0800
Subject: [PATCH] add x2coco.py (#3788)

* add x2coco.py

* Update DATA.md

* Update DATA_cn.md
---
 PaddleCV/PaddleDetection/docs/DATA.md         | 10 ++-
 PaddleCV/PaddleDetection/docs/DATA_cn.md      |  8 +-
 .../data/tools/{labelme2coco.py => x2coco.py} | 77 +++++++++++++------
 3 files changed, 66 insertions(+), 29 deletions(-)
 rename PaddleCV/PaddleDetection/ppdet/data/tools/{labelme2coco.py => x2coco.py} (74%)

diff --git a/PaddleCV/PaddleDetection/docs/DATA.md b/PaddleCV/PaddleDetection/docs/DATA.md
index be9048c0..ac2244f4 100644
--- a/PaddleCV/PaddleDetection/docs/DATA.md
+++ b/PaddleCV/PaddleDetection/docs/DATA.md
@@ -181,16 +181,18 @@ whole data pipeline is fully customizable through the yaml configuration files.
 
 #### Custom Datasets
 
-- Option 1: Convert the dataset to COCO or VOC format.
+- Option 1: Convert the dataset to COCO format.
 ```sh
- # a small utility (`tools/labelme2coco.py`) is provided to convert
- # Labelme-annotated dataset to COCO format.
- python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/
+ # a small utility (`tools/x2coco.py`) is provided to convert
+ # Labelme-annotated dataset or cityscape dataset to COCO format.
+ python ./ppdet/data/tools/x2coco.py --dataset_type labelme
+                                --json_input_dir ./labelme_annos/
                                 --image_input_dir ./labelme_imgs/
                                 --output_dir ./cocome/
                                 --train_proportion 0.8
                                 --val_proportion 0.2
                                 --test_proportion 0.0
+ # --dataset_type: The data format which is need to be converted. Currently supported are: 'labelme' and 'cityscape'
  # --json_input_dir：The path of json files which are annotated by Labelme.
  # --image_input_dir：The path of images.
  # --output_dir：The path of coverted COCO dataset.
diff --git a/PaddleCV/PaddleDetection/docs/DATA_cn.md b/PaddleCV/PaddleDetection/docs/DATA_cn.md
index 57970169..e34ba8e8 100644
--- a/PaddleCV/PaddleDetection/docs/DATA_cn.md
+++ b/PaddleCV/PaddleDetection/docs/DATA_cn.md
@@ -165,15 +165,17 @@ coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1)
 ```
 #### 如何使用自定义数据集？
 
-- 选择1：将数据集转换为VOC格式或者COCO格式。
+- 选择1：将数据集转换为COCO格式。
 ```
- # 在./tools/中提供了labelme2coco.py用于将labelme标注的数据集转换为COCO数据集
- python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/
+ # 在./tools/中提供了x2coco.py用于将labelme标注的数据集或cityscape数据集转换为COCO数据集
+ python ./ppdet/data/tools/x2coco.py --dataset_type labelme
+                                --json_input_dir ./labelme_annos/
                                 --image_input_dir ./labelme_imgs/
                                 --output_dir ./cocome/
                                 --train_proportion 0.8
                                 --val_proportion 0.2
                                 --test_proportion 0.0
+ # --dataset_type：需要转换的数据格式，目前支持：’labelme‘和’cityscape‘
  # --json_input_dir：使用labelme标注的json文件所在文件夹
  # --image_input_dir：图像文件所在文件夹
  # --output_dir：转换后的COCO格式数据集存放位置
diff --git a/PaddleCV/PaddleDetection/ppdet/data/tools/labelme2coco.py b/PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py
similarity index 74%
rename from PaddleCV/PaddleDetection/ppdet/data/tools/labelme2coco.py
rename to PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py
index 4cacd064..da8e4aef 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/tools/labelme2coco.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py
@@ -44,7 +44,7 @@ def getbbox(self, points):
     return self.mask2box(mask)
 
 
-def images(data, num):
+def images_labelme(data, num):
     image = {}
     image['height'] = data['imageHeight']
     image['width'] = data['imageWidth']
@@ -52,6 +52,14 @@ def images(data, num):
     image['file_name'] = data['imagePath'].split('/')[-1]
     return image
 
+def images_cityscape(data, num, img_file):
+    image = {}
+    image['height'] = data['imgHeight']
+    image['width'] = data['imgWidth']
+    image['id'] = num + 1
+    image['file_name'] = img_file
+    return image 
+
 
 def categories(label, labels_list):
     category = {}
@@ -112,7 +120,7 @@ def get_bbox(height, width, points):
     ]
 
 
-def deal_json(img_path, json_path):
+def deal_json(ds_type, img_path, json_path):
     data_coco = {}
     label_to_num = {}
     images_list = []
@@ -120,34 +128,52 @@ def deal_json(img_path, json_path):
     annotations_list = []
     labels_list = []
     image_num = -1
+    object_num = -1
     for img_file in os.listdir(img_path):
         img_label = img_file.split('.')[0]
+        if img_file.split('.')[-1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']:
+            continue
         label_file = osp.join(json_path, img_label + '.json')
         print('Generating dataset from:', label_file)
         image_num = image_num + 1
         with open(label_file) as f:
             data = json.load(f)
-            images_list.append(images(data, image_num))
-            object_num = -1
-            for shapes in data['shapes']:
-                object_num = object_num + 1
-                label = shapes['label']
-                if label not in labels_list:
-                    categories_list.append(categories(label, labels_list))
-                    labels_list.append(label)
-                    label_to_num[label] = len(labels_list)
-                points = shapes['points']
-                p_type = shapes['shape_type']
-                if p_type == 'polygon':
-                    annotations_list.append(
-                        annotations_polygon(data['imageHeight'], data[
-                            'imageWidth'], points, label, image_num, object_num, label_to_num))
+            if ds_type == 'labelme':
+                images_list.append(images_labelme(data, image_num))
+            elif ds_type == 'cityscape':
+                images_list.append(images_cityscape(data, image_num, img_file)) 
+            if ds_type == 'labelme':
+                for shapes in data['shapes']:
+                    object_num = object_num + 1
+                    label = shapes['label']
+                    if label not in labels_list:
+                        categories_list.append(categories(label, labels_list))
+                        labels_list.append(label)
+                        label_to_num[label] = len(labels_list)
+                    points = shapes['points']
+                    p_type = shapes['shape_type']
+                    if p_type == 'polygon':
+                        annotations_list.append(
+                            annotations_polygon(data['imageHeight'], data[
+                                'imageWidth'], points, label, image_num, object_num, label_to_num))
 
-                if p_type == 'rectangle':
-                    points.append([points[0][0], points[1][1]])
-                    points.append([points[1][0], points[0][1]])
+                    if p_type == 'rectangle':
+                        points.append([points[0][0], points[1][1]])
+                        points.append([points[1][0], points[0][1]])
+                        annotations_list.append(
+                            annotations_rectangle(points, label, image_num, object_num, label_to_num))
+            elif ds_type == 'cityscape':
+                for shapes in data['objects']:
+                    object_num = object_num + 1
+                    label = shapes['label']
+                    if label not in labels_list:
+                        categories_list.append(categories(label, labels_list))
+                        labels_list.append(label)
+                        label_to_num[label] = len(labels_list)
+                    points = shapes['polygon']
                     annotations_list.append(
-                        annotations_rectangle(points, label, image_num, object_num, label_to_num))
+                        annotations_polygon(data['imgHeight'], data[
+                            'imgWidth'], points, label, image_num, object_num, label_to_num))
     data_coco['images'] = images_list
     data_coco['categories'] = categories_list
     data_coco['annotations'] = annotations_list
@@ -157,6 +183,7 @@ def deal_json(img_path, json_path):
 def main():
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--dataset_type', help='the type of dataset')
     parser.add_argument('--json_input_dir', help='input annotated directory')
     parser.add_argument('--image_input_dir', help='image directory')
     parser.add_argument(
@@ -177,6 +204,11 @@ def main():
         type=float,
         default=0.0)
     args = parser.parse_args()
+    try:
+        assert args.dataset_type in ['labelme', 'cityscape']
+    except AssertionError as e:
+        print('Now only support the cityscape dataset and labelme dataset!!')
+        os._exit(0)
     try:
         assert os.path.exists(args.json_input_dir)
     except AssertionError as e:
@@ -234,7 +266,8 @@ def main():
     if not os.path.exists(args.output_dir + '/annotations'):
         os.makedirs(args.output_dir + '/annotations')
     if args.train_proportion != 0:
-        train_data_coco = deal_json(args.output_dir + '/train',
+        train_data_coco = deal_json(args.dataset_type,
+                                    args.output_dir + '/train',
                                     args.json_input_dir)
         train_json_path = osp.join(args.output_dir + '/annotations',
                                    'instance_train.json')
-- 
GitLab