提交 412bf53e 编写于 作者: S SunAhong1993 提交者: GitHub

add x2coco.py (#3788)

* add x2coco.py

* Update DATA.md

* Update DATA_cn.md
上级 1ea40a8d
...@@ -181,16 +181,18 @@ whole data pipeline is fully customizable through the yaml configuration files. ...@@ -181,16 +181,18 @@ whole data pipeline is fully customizable through the yaml configuration files.
#### Custom Datasets #### Custom Datasets
- Option 1: Convert the dataset to COCO or VOC format. - Option 1: Convert the dataset to COCO format.
```sh ```sh
# a small utility (`tools/labelme2coco.py`) is provided to convert # a small utility (`tools/x2coco.py`) is provided to convert
# Labelme-annotated dataset to COCO format. # Labelme-annotated dataset or cityscape dataset to COCO format.
python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/ python ./ppdet/data/tools/x2coco.py --dataset_type labelme
--json_input_dir ./labelme_annos/
--image_input_dir ./labelme_imgs/ --image_input_dir ./labelme_imgs/
--output_dir ./cocome/ --output_dir ./cocome/
--train_proportion 0.8 --train_proportion 0.8
--val_proportion 0.2 --val_proportion 0.2
--test_proportion 0.0 --test_proportion 0.0
# --dataset_type: The data format which is need to be converted. Currently supported are: 'labelme' and 'cityscape'
# --json_input_dir:The path of json files which are annotated by Labelme. # --json_input_dir:The path of json files which are annotated by Labelme.
# --image_input_dir:The path of images. # --image_input_dir:The path of images.
# --output_dir:The path of coverted COCO dataset. # --output_dir:The path of coverted COCO dataset.
......
...@@ -165,15 +165,17 @@ coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1) ...@@ -165,15 +165,17 @@ coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1)
``` ```
#### 如何使用自定义数据集? #### 如何使用自定义数据集?
- 选择1:将数据集转换为VOC格式或者COCO格式。 - 选择1:将数据集转换为COCO格式。
``` ```
# 在./tools/中提供了labelme2coco.py用于将labelme标注的数据集转换为COCO数据集 # 在./tools/中提供了x2coco.py用于将labelme标注的数据集或cityscape数据集转换为COCO数据集
python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/ python ./ppdet/data/tools/x2coco.py --dataset_type labelme
--json_input_dir ./labelme_annos/
--image_input_dir ./labelme_imgs/ --image_input_dir ./labelme_imgs/
--output_dir ./cocome/ --output_dir ./cocome/
--train_proportion 0.8 --train_proportion 0.8
--val_proportion 0.2 --val_proportion 0.2
--test_proportion 0.0 --test_proportion 0.0
# --dataset_type:需要转换的数据格式,目前支持:’labelme‘和’cityscape‘
# --json_input_dir:使用labelme标注的json文件所在文件夹 # --json_input_dir:使用labelme标注的json文件所在文件夹
# --image_input_dir:图像文件所在文件夹 # --image_input_dir:图像文件所在文件夹
# --output_dir:转换后的COCO格式数据集存放位置 # --output_dir:转换后的COCO格式数据集存放位置
......
...@@ -44,7 +44,7 @@ def getbbox(self, points): ...@@ -44,7 +44,7 @@ def getbbox(self, points):
return self.mask2box(mask) return self.mask2box(mask)
def images(data, num): def images_labelme(data, num):
image = {} image = {}
image['height'] = data['imageHeight'] image['height'] = data['imageHeight']
image['width'] = data['imageWidth'] image['width'] = data['imageWidth']
...@@ -52,6 +52,14 @@ def images(data, num): ...@@ -52,6 +52,14 @@ def images(data, num):
image['file_name'] = data['imagePath'].split('/')[-1] image['file_name'] = data['imagePath'].split('/')[-1]
return image return image
def images_cityscape(data, num, img_file):
image = {}
image['height'] = data['imgHeight']
image['width'] = data['imgWidth']
image['id'] = num + 1
image['file_name'] = img_file
return image
def categories(label, labels_list): def categories(label, labels_list):
category = {} category = {}
...@@ -112,7 +120,7 @@ def get_bbox(height, width, points): ...@@ -112,7 +120,7 @@ def get_bbox(height, width, points):
] ]
def deal_json(img_path, json_path): def deal_json(ds_type, img_path, json_path):
data_coco = {} data_coco = {}
label_to_num = {} label_to_num = {}
images_list = [] images_list = []
...@@ -120,15 +128,21 @@ def deal_json(img_path, json_path): ...@@ -120,15 +128,21 @@ def deal_json(img_path, json_path):
annotations_list = [] annotations_list = []
labels_list = [] labels_list = []
image_num = -1 image_num = -1
object_num = -1
for img_file in os.listdir(img_path): for img_file in os.listdir(img_path):
img_label = img_file.split('.')[0] img_label = img_file.split('.')[0]
if img_file.split('.')[-1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']:
continue
label_file = osp.join(json_path, img_label + '.json') label_file = osp.join(json_path, img_label + '.json')
print('Generating dataset from:', label_file) print('Generating dataset from:', label_file)
image_num = image_num + 1 image_num = image_num + 1
with open(label_file) as f: with open(label_file) as f:
data = json.load(f) data = json.load(f)
images_list.append(images(data, image_num)) if ds_type == 'labelme':
object_num = -1 images_list.append(images_labelme(data, image_num))
elif ds_type == 'cityscape':
images_list.append(images_cityscape(data, image_num, img_file))
if ds_type == 'labelme':
for shapes in data['shapes']: for shapes in data['shapes']:
object_num = object_num + 1 object_num = object_num + 1
label = shapes['label'] label = shapes['label']
...@@ -148,6 +162,18 @@ def deal_json(img_path, json_path): ...@@ -148,6 +162,18 @@ def deal_json(img_path, json_path):
points.append([points[1][0], points[0][1]]) points.append([points[1][0], points[0][1]])
annotations_list.append( annotations_list.append(
annotations_rectangle(points, label, image_num, object_num, label_to_num)) annotations_rectangle(points, label, image_num, object_num, label_to_num))
elif ds_type == 'cityscape':
for shapes in data['objects']:
object_num = object_num + 1
label = shapes['label']
if label not in labels_list:
categories_list.append(categories(label, labels_list))
labels_list.append(label)
label_to_num[label] = len(labels_list)
points = shapes['polygon']
annotations_list.append(
annotations_polygon(data['imgHeight'], data[
'imgWidth'], points, label, image_num, object_num, label_to_num))
data_coco['images'] = images_list data_coco['images'] = images_list
data_coco['categories'] = categories_list data_coco['categories'] = categories_list
data_coco['annotations'] = annotations_list data_coco['annotations'] = annotations_list
...@@ -157,6 +183,7 @@ def deal_json(img_path, json_path): ...@@ -157,6 +183,7 @@ def deal_json(img_path, json_path):
def main(): def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter) formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--dataset_type', help='the type of dataset')
parser.add_argument('--json_input_dir', help='input annotated directory') parser.add_argument('--json_input_dir', help='input annotated directory')
parser.add_argument('--image_input_dir', help='image directory') parser.add_argument('--image_input_dir', help='image directory')
parser.add_argument( parser.add_argument(
...@@ -177,6 +204,11 @@ def main(): ...@@ -177,6 +204,11 @@ def main():
type=float, type=float,
default=0.0) default=0.0)
args = parser.parse_args() args = parser.parse_args()
try:
assert args.dataset_type in ['labelme', 'cityscape']
except AssertionError as e:
print('Now only support the cityscape dataset and labelme dataset!!')
os._exit(0)
try: try:
assert os.path.exists(args.json_input_dir) assert os.path.exists(args.json_input_dir)
except AssertionError as e: except AssertionError as e:
...@@ -234,7 +266,8 @@ def main(): ...@@ -234,7 +266,8 @@ def main():
if not os.path.exists(args.output_dir + '/annotations'): if not os.path.exists(args.output_dir + '/annotations'):
os.makedirs(args.output_dir + '/annotations') os.makedirs(args.output_dir + '/annotations')
if args.train_proportion != 0: if args.train_proportion != 0:
train_data_coco = deal_json(args.output_dir + '/train', train_data_coco = deal_json(args.dataset_type,
args.output_dir + '/train',
args.json_input_dir) args.json_input_dir)
train_json_path = osp.join(args.output_dir + '/annotations', train_json_path = osp.join(args.output_dir + '/annotations',
'instance_train.json') 'instance_train.json')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册