提交 e2eccbac 编写于 作者: K Kentaro Wada

Convert bounding box annotated files to VOC-like dataset

上级 cd504642
......@@ -80,6 +80,7 @@ script:
- labelme --help
- labelme --version
- (cd examples/tutorial && rm -rf apc2016_obj3_json && labelme_json_to_dataset apc2016_obj3.json && python load_label_png.py && git checkout -- .)
- (cd examples/bbox_detection && rm -rf data_dataset_voc && ./labelme2voc.py labels.txt data_annotated data_dataset_voc && git checkout -- .)
- (cd examples/semantic_segmentation && rm -rf data_dataset_voc && ./labelme2voc.py labels.txt data_annotated data_dataset_voc && git checkout -- .)
- (cd examples/instance_segmentation && rm -rf data_dataset_voc && ./labelme2voc.py labels.txt data_annotated data_dataset_voc && git checkout -- .)
- (cd examples/video_annotation && rm -rf data_dataset_voc && ./labelme2voc.py labels.txt data_annotated data_dataset_voc && git checkout -- .)
......
......@@ -8,3 +8,18 @@ labelme data_annotated --labels labels.txt --nodata --autosave
```
![](.readme/annotation.jpg)
## Convert to VOC-like Dataset
```bash
# It generates:
# - data_dataset_voc/JPEGImages
# - data_dataset_voc/Annotations
# - data_dataset_voc/AnnotationsVisualization
./labelme2voc.py labels.txt data_annotated data_dataset_voc
```
<img src="data_dataset_voc/JPEGImages/2011_000003.jpg" width="33%" /> <img src="data_dataset_voc/AnnotationsVisualization/2011_000003.jpg" width="33%" />
<i>Fig1. JPEG image (left), Bounding box annotation visualization (right).</i>
<annotation>
<folder/>
<filename>2011_000003.jpg</filename>
<database/>
<annotation/>
<image/>
<size>
<height>338</height>
<width>500</width>
<depth>3</depth>
</size>
<segmented/>
<object>
<name>person</name>
<pose/>
<truncated/>
<difficult/>
<bndbox>
<xmin>191</xmin>
<ymin>107</ymin>
<xmax>313</xmax>
<ymax>329</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose/>
<truncated/>
<difficult/>
<bndbox>
<xmin>365</xmin>
<ymin>83</ymin>
<xmax>500</xmax>
<ymax>333</ymax>
</bndbox>
</object>
</annotation>
<annotation>
<folder/>
<filename>2011_000006.jpg</filename>
<database/>
<annotation/>
<image/>
<size>
<height>375</height>
<width>500</width>
<depth>3</depth>
</size>
<segmented/>
<object>
<name>person</name>
<pose/>
<truncated/>
<difficult/>
<bndbox>
<xmin>91</xmin>
<ymin>107</ymin>
<xmax>240</xmax>
<ymax>330</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose/>
<truncated/>
<difficult/>
<bndbox>
<xmin>178</xmin>
<ymin>110</ymin>
<xmax>298</xmax>
<ymax>282</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose/>
<truncated/>
<difficult/>
<bndbox>
<xmin>254</xmin>
<ymin>115</ymin>
<xmax>369</xmax>
<ymax>292</ymax>
</bndbox>
</object>
<object>
<name>person</name>
<pose/>
<truncated/>
<difficult/>
<bndbox>
<xmin>395</xmin>
<ymin>81</ymin>
<xmax>447</xmax>
<ymax>117</ymax>
</bndbox>
</object>
</annotation>
<annotation>
<folder/>
<filename>2011_000025.jpg</filename>
<database/>
<annotation/>
<image/>
<size>
<height>375</height>
<width>500</width>
<depth>3</depth>
</size>
<segmented/>
<object>
<name>bus</name>
<pose/>
<truncated/>
<difficult/>
<bndbox>
<xmin>84</xmin>
<ymin>20</ymin>
<xmax>435</xmax>
<ymax>373</ymax>
</bndbox>
</object>
<object>
<name>bus</name>
<pose/>
<truncated/>
<difficult/>
<bndbox>
<xmin>1</xmin>
<ymin>99</ymin>
<xmax>107</xmax>
<ymax>282</ymax>
</bndbox>
</object>
<object>
<name>car</name>
<pose/>
<truncated/>
<difficult/>
<bndbox>
<xmin>409</xmin>
<ymin>167</ymin>
<xmax>500</xmax>
<ymax>266</ymax>
</bndbox>
</object>
</annotation>
_background_
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
potted plant
sheep
sofa
train
tv/monitor
\ No newline at end of file
#!/usr/bin/env python
from __future__ import print_function
import argparse
import glob
import json
import os
import os.path as osp
import lxml.builder
import lxml.etree
import numpy as np
import PIL.Image
import labelme
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('labels_file')
parser.add_argument('in_dir', help='input dir with annotated files')
parser.add_argument('out_dir', help='output dataset directory')
args = parser.parse_args()
if osp.exists(args.out_dir):
print('Output directory already exists:', args.out_dir)
quit(1)
os.makedirs(args.out_dir)
os.makedirs(osp.join(args.out_dir, 'JPEGImages'))
os.makedirs(osp.join(args.out_dir, 'Annotations'))
os.makedirs(osp.join(args.out_dir, 'AnnotationsVisualization'))
print('Creating dataset:', args.out_dir)
class_names = []
class_name_to_id = {}
for i, line in enumerate(open(args.labels_file).readlines()):
class_id = i - 1 # starts with -1
class_name = line.strip()
class_name_to_id[class_name] = class_id
if class_id == -1:
assert class_name == '__ignore__'
continue
elif class_id == 0:
assert class_name == '_background_'
class_names.append(class_name)
class_names = tuple(class_names)
print('class_names:', class_names)
out_class_names_file = osp.join(args.out_dir, 'class_names.txt')
with open(out_class_names_file, 'w') as f:
f.writelines('\n'.join(class_names))
print('Saved class_names:', out_class_names_file)
for label_file in glob.glob(osp.join(args.in_dir, '*.json')):
print('Generating dataset from:', label_file)
with open(label_file) as f:
data = json.load(f)
base = osp.splitext(osp.basename(label_file))[0]
out_img_file = osp.join(
args.out_dir, 'JPEGImages', base + '.jpg')
out_xml_file = osp.join(
args.out_dir, 'Annotations', base + '.xml')
out_viz_file = osp.join(
args.out_dir, 'AnnotationsVisualization', base + '.jpg')
img_file = osp.join(osp.dirname(label_file), data['imagePath'])
img = np.asarray(PIL.Image.open(img_file))
PIL.Image.fromarray(img).save(out_img_file)
maker = lxml.builder.ElementMaker()
xml = maker.annotation(
maker.folder(),
maker.filename(base + '.jpg'),
maker.database(), # e.g., The VOC2007 Database
maker.annotation(), # e.g., Pascal VOC2007
maker.image(), # e.g., flickr
maker.size(
maker.height(str(img.shape[0])),
maker.width(str(img.shape[1])),
maker.depth(str(img.shape[2])),
),
maker.segmented(),
)
bboxes = []
labels = []
for shape in data['shapes']:
if shape['shape_type'] != 'rectangle':
print('Skipping shape: label={label}, shape_type={shape_type}'
.format(**shape))
continue
class_name = shape['label']
class_id = class_names.index(class_name)
(xmin, ymin), (xmax, ymax) = shape['points']
bboxes.append([xmin, ymin, xmax, ymax])
labels.append(class_id)
xml.append(
maker.object(
maker.name(shape['label']),
maker.pose(),
maker.truncated(),
maker.difficult(),
maker.bndbox(
maker.xmin(str(xmin)),
maker.ymin(str(ymin)),
maker.xmax(str(xmax)),
maker.ymax(str(ymax)),
),
)
)
captions = [class_names[l] for l in labels]
viz = labelme.utils.draw_instances(
img, bboxes, labels, captions=captions
)
PIL.Image.fromarray(viz).save(out_viz_file)
with open(out_xml_file, 'wb') as f:
f.write(lxml.etree.tostring(xml, pretty_print=True))
if __name__ == '__main__':
main()
......@@ -10,6 +10,7 @@ from .shape import polygons_to_mask
from .shape import shape_to_mask
from .shape import shapes_to_label
from .draw import draw_instances
from .draw import draw_label
from .draw import label_colormap
from .draw import label2rgb
......
import io
import os.path as osp
import numpy as np
import PIL.Image
import PIL.ImageDraw
import PIL.ImageFont
def label_colormap(N=256):
......@@ -116,3 +118,40 @@ def draw_label(label, img=None, label_names=None, colormap=None, **kwargs):
out = PIL.Image.open(f).resize(out_size, PIL.Image.BILINEAR).convert('RGB')
out = np.asarray(out)
return out
def draw_instances(
image=None,
bboxes=None,
labels=None,
masks=None,
captions=None,
):
import matplotlib
# TODO(wkentaro)
assert image is not None
assert bboxes is not None
assert labels is not None
assert masks is None
assert captions is not None
viz = PIL.Image.fromarray(image)
draw = PIL.ImageDraw.ImageDraw(viz)
font_path = osp.join(
osp.dirname(matplotlib.__file__),
'mpl-data/fonts/ttf/DejaVuSans.ttf'
)
font = PIL.ImageFont.truetype(font_path)
colormap = label_colormap(255)
for bbox, label, caption in zip(bboxes, labels, captions):
color = colormap[label]
color = tuple((color * 255).astype(np.uint8).tolist())
xmin, ymin, xmax, ymax = bbox
draw.rectangle((xmin, ymin, xmax, ymax), outline=color)
draw.text((xmin, ymin), caption, font=font)
return np.asarray(viz)
......@@ -19,6 +19,7 @@ del here
install_requires = [
'lxml',
'matplotlib',
'numpy',
'Pillow>=2.8.0',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册