提交 412bf53e 编写于 作者: S SunAhong1993 提交者: GitHub

add x2coco.py (#3788)

* add x2coco.py

* Update DATA.md

* Update DATA_cn.md
上级 1ea40a8d
......@@ -181,16 +181,18 @@ whole data pipeline is fully customizable through the yaml configuration files.
#### Custom Datasets
- Option 1: Convert the dataset to COCO or VOC format.
- Option 1: Convert the dataset to COCO format.
```sh
# a small utility (`tools/labelme2coco.py`) is provided to convert
# Labelme-annotated dataset to COCO format.
python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/
# a small utility (`tools/x2coco.py`) is provided to convert
# Labelme-annotated dataset or cityscape dataset to COCO format.
python ./ppdet/data/tools/x2coco.py --dataset_type labelme
--json_input_dir ./labelme_annos/
--image_input_dir ./labelme_imgs/
--output_dir ./cocome/
--train_proportion 0.8
--val_proportion 0.2
--test_proportion 0.0
# --dataset_type: The data format which is need to be converted. Currently supported are: 'labelme' and 'cityscape'
# --json_input_dir:The path of json files which are annotated by Labelme.
# --image_input_dir:The path of images.
# --output_dir:The path of coverted COCO dataset.
......
......@@ -165,15 +165,17 @@ coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1)
```
#### 如何使用自定义数据集?
- 选择1:将数据集转换为VOC格式或者COCO格式。
- 选择1:将数据集转换为COCO格式。
```
# 在./tools/中提供了labelme2coco.py用于将labelme标注的数据集转换为COCO数据集
python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/
# 在./tools/中提供了x2coco.py用于将labelme标注的数据集或cityscape数据集转换为COCO数据集
python ./ppdet/data/tools/x2coco.py --dataset_type labelme
--json_input_dir ./labelme_annos/
--image_input_dir ./labelme_imgs/
--output_dir ./cocome/
--train_proportion 0.8
--val_proportion 0.2
--test_proportion 0.0
# --dataset_type:需要转换的数据格式,目前支持:’labelme‘和’cityscape‘
# --json_input_dir:使用labelme标注的json文件所在文件夹
# --image_input_dir:图像文件所在文件夹
# --output_dir:转换后的COCO格式数据集存放位置
......
......@@ -44,7 +44,7 @@ def getbbox(self, points):
return self.mask2box(mask)
def images(data, num):
def images_labelme(data, num):
image = {}
image['height'] = data['imageHeight']
image['width'] = data['imageWidth']
......@@ -52,6 +52,14 @@ def images(data, num):
image['file_name'] = data['imagePath'].split('/')[-1]
return image
def images_cityscape(data, num, img_file):
image = {}
image['height'] = data['imgHeight']
image['width'] = data['imgWidth']
image['id'] = num + 1
image['file_name'] = img_file
return image
def categories(label, labels_list):
category = {}
......@@ -112,7 +120,7 @@ def get_bbox(height, width, points):
]
def deal_json(img_path, json_path):
def deal_json(ds_type, img_path, json_path):
data_coco = {}
label_to_num = {}
images_list = []
......@@ -120,34 +128,52 @@ def deal_json(img_path, json_path):
annotations_list = []
labels_list = []
image_num = -1
object_num = -1
for img_file in os.listdir(img_path):
img_label = img_file.split('.')[0]
if img_file.split('.')[-1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']:
continue
label_file = osp.join(json_path, img_label + '.json')
print('Generating dataset from:', label_file)
image_num = image_num + 1
with open(label_file) as f:
data = json.load(f)
images_list.append(images(data, image_num))
object_num = -1
for shapes in data['shapes']:
object_num = object_num + 1
label = shapes['label']
if label not in labels_list:
categories_list.append(categories(label, labels_list))
labels_list.append(label)
label_to_num[label] = len(labels_list)
points = shapes['points']
p_type = shapes['shape_type']
if p_type == 'polygon':
annotations_list.append(
annotations_polygon(data['imageHeight'], data[
'imageWidth'], points, label, image_num, object_num, label_to_num))
if ds_type == 'labelme':
images_list.append(images_labelme(data, image_num))
elif ds_type == 'cityscape':
images_list.append(images_cityscape(data, image_num, img_file))
if ds_type == 'labelme':
for shapes in data['shapes']:
object_num = object_num + 1
label = shapes['label']
if label not in labels_list:
categories_list.append(categories(label, labels_list))
labels_list.append(label)
label_to_num[label] = len(labels_list)
points = shapes['points']
p_type = shapes['shape_type']
if p_type == 'polygon':
annotations_list.append(
annotations_polygon(data['imageHeight'], data[
'imageWidth'], points, label, image_num, object_num, label_to_num))
if p_type == 'rectangle':
points.append([points[0][0], points[1][1]])
points.append([points[1][0], points[0][1]])
if p_type == 'rectangle':
points.append([points[0][0], points[1][1]])
points.append([points[1][0], points[0][1]])
annotations_list.append(
annotations_rectangle(points, label, image_num, object_num, label_to_num))
elif ds_type == 'cityscape':
for shapes in data['objects']:
object_num = object_num + 1
label = shapes['label']
if label not in labels_list:
categories_list.append(categories(label, labels_list))
labels_list.append(label)
label_to_num[label] = len(labels_list)
points = shapes['polygon']
annotations_list.append(
annotations_rectangle(points, label, image_num, object_num, label_to_num))
annotations_polygon(data['imgHeight'], data[
'imgWidth'], points, label, image_num, object_num, label_to_num))
data_coco['images'] = images_list
data_coco['categories'] = categories_list
data_coco['annotations'] = annotations_list
......@@ -157,6 +183,7 @@ def deal_json(img_path, json_path):
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--dataset_type', help='the type of dataset')
parser.add_argument('--json_input_dir', help='input annotated directory')
parser.add_argument('--image_input_dir', help='image directory')
parser.add_argument(
......@@ -177,6 +204,11 @@ def main():
type=float,
default=0.0)
args = parser.parse_args()
try:
assert args.dataset_type in ['labelme', 'cityscape']
except AssertionError as e:
print('Now only support the cityscape dataset and labelme dataset!!')
os._exit(0)
try:
assert os.path.exists(args.json_input_dir)
except AssertionError as e:
......@@ -234,7 +266,8 @@ def main():
if not os.path.exists(args.output_dir + '/annotations'):
os.makedirs(args.output_dir + '/annotations')
if args.train_proportion != 0:
train_data_coco = deal_json(args.output_dir + '/train',
train_data_coco = deal_json(args.dataset_type,
args.output_dir + '/train',
args.json_input_dir)
train_json_path = osp.join(args.output_dir + '/annotations',
'instance_train.json')
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册