Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
7bb8630e
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
7bb8630e
编写于
10月 28, 2019
作者:
S
SunAhong1993
提交者:
GitHub
10月 28, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add x2coco.py (#3788)
* add x2coco.py * Update DATA.md * Update DATA_cn.md
上级
160b7fa2
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
66 addition
and
29 deletion
+66
-29
PaddleCV/PaddleDetection/docs/DATA.md
PaddleCV/PaddleDetection/docs/DATA.md
+6
-4
PaddleCV/PaddleDetection/docs/DATA_cn.md
PaddleCV/PaddleDetection/docs/DATA_cn.md
+5
-3
PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py
PaddleCV/PaddleDetection/ppdet/data/tools/x2coco.py
+55
-22
未找到文件。
PaddleCV/PaddleDetection/docs/DATA.md
浏览文件 @
7bb8630e
...
...
@@ -181,16 +181,18 @@ whole data pipeline is fully customizable through the yaml configuration files.
#### Custom Datasets
-
Option 1: Convert the dataset to COCO
or VOC
format.
-
Option 1: Convert the dataset to COCO format.
```
sh
# a small utility (`tools/labelme2coco.py`) is provided to convert
# Labelme-annotated dataset to COCO format.
python ./ppdet/data/tools/labelme2coco.py
--json_input_dir
./labelme_annos/
# a small utility (`tools/x2coco.py`) is provided to convert
# Labelme-annotated dataset or cityscape dataset to COCO format.
python ./ppdet/data/tools/x2coco.py
--dataset_type
labelme
--json_input_dir
./labelme_annos/
--image_input_dir
./labelme_imgs/
--output_dir
./cocome/
--train_proportion
0.8
--val_proportion
0.2
--test_proportion
0.0
# --dataset_type: The data format which is need to be converted. Currently supported are: 'labelme' and 'cityscape'
# --json_input_dir:The path of json files which are annotated by Labelme.
# --image_input_dir:The path of images.
# --output_dir:The path of coverted COCO dataset.
...
...
PaddleCV/PaddleDetection/docs/DATA_cn.md
浏览文件 @
7bb8630e
...
...
@@ -165,15 +165,17 @@ coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1)
```
#### 如何使用自定义数据集?
-
选择1:将数据集转换为
VOC格式或者
COCO格式。
-
选择1:将数据集转换为COCO格式。
```
# 在./tools/中提供了labelme2coco.py用于将labelme标注的数据集转换为COCO数据集
python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/
# 在./tools/中提供了x2coco.py用于将labelme标注的数据集或cityscape数据集转换为COCO数据集
python ./ppdet/data/tools/x2coco.py --dataset_type labelme
--json_input_dir ./labelme_annos/
--image_input_dir ./labelme_imgs/
--output_dir ./cocome/
--train_proportion 0.8
--val_proportion 0.2
--test_proportion 0.0
# --dataset_type:需要转换的数据格式,目前支持:’labelme‘和’cityscape‘
# --json_input_dir:使用labelme标注的json文件所在文件夹
# --image_input_dir:图像文件所在文件夹
# --output_dir:转换后的COCO格式数据集存放位置
...
...
PaddleCV/PaddleDetection/ppdet/data/tools/
labelme
2coco.py
→
PaddleCV/PaddleDetection/ppdet/data/tools/
x
2coco.py
浏览文件 @
7bb8630e
...
...
@@ -44,7 +44,7 @@ def getbbox(self, points):
return
self
.
mask2box
(
mask
)
def
images
(
data
,
num
):
def
images
_labelme
(
data
,
num
):
image
=
{}
image
[
'height'
]
=
data
[
'imageHeight'
]
image
[
'width'
]
=
data
[
'imageWidth'
]
...
...
@@ -52,6 +52,14 @@ def images(data, num):
image
[
'file_name'
]
=
data
[
'imagePath'
].
split
(
'/'
)[
-
1
]
return
image
def
images_cityscape
(
data
,
num
,
img_file
):
image
=
{}
image
[
'height'
]
=
data
[
'imgHeight'
]
image
[
'width'
]
=
data
[
'imgWidth'
]
image
[
'id'
]
=
num
+
1
image
[
'file_name'
]
=
img_file
return
image
def
categories
(
label
,
labels_list
):
category
=
{}
...
...
@@ -112,7 +120,7 @@ def get_bbox(height, width, points):
]
def
deal_json
(
img_path
,
json_path
):
def
deal_json
(
ds_type
,
img_path
,
json_path
):
data_coco
=
{}
label_to_num
=
{}
images_list
=
[]
...
...
@@ -120,34 +128,52 @@ def deal_json(img_path, json_path):
annotations_list
=
[]
labels_list
=
[]
image_num
=
-
1
object_num
=
-
1
for
img_file
in
os
.
listdir
(
img_path
):
img_label
=
img_file
.
split
(
'.'
)[
0
]
if
img_file
.
split
(
'.'
)[
-
1
]
not
in
[
'bmp'
,
'jpg'
,
'jpeg'
,
'png'
,
'JPEG'
,
'JPG'
,
'PNG'
]:
continue
label_file
=
osp
.
join
(
json_path
,
img_label
+
'.json'
)
print
(
'Generating dataset from:'
,
label_file
)
image_num
=
image_num
+
1
with
open
(
label_file
)
as
f
:
data
=
json
.
load
(
f
)
images_list
.
append
(
images
(
data
,
image_num
))
object_num
=
-
1
for
shapes
in
data
[
'shapes'
]:
object_num
=
object_num
+
1
label
=
shapes
[
'label'
]
if
label
not
in
labels_list
:
categories_list
.
append
(
categories
(
label
,
labels_list
))
labels_list
.
append
(
label
)
label_to_num
[
label
]
=
len
(
labels_list
)
points
=
shapes
[
'points'
]
p_type
=
shapes
[
'shape_type'
]
if
p_type
==
'polygon'
:
annotations_list
.
append
(
annotations_polygon
(
data
[
'imageHeight'
],
data
[
'imageWidth'
],
points
,
label
,
image_num
,
object_num
,
label_to_num
))
if
ds_type
==
'labelme'
:
images_list
.
append
(
images_labelme
(
data
,
image_num
))
elif
ds_type
==
'cityscape'
:
images_list
.
append
(
images_cityscape
(
data
,
image_num
,
img_file
))
if
ds_type
==
'labelme'
:
for
shapes
in
data
[
'shapes'
]:
object_num
=
object_num
+
1
label
=
shapes
[
'label'
]
if
label
not
in
labels_list
:
categories_list
.
append
(
categories
(
label
,
labels_list
))
labels_list
.
append
(
label
)
label_to_num
[
label
]
=
len
(
labels_list
)
points
=
shapes
[
'points'
]
p_type
=
shapes
[
'shape_type'
]
if
p_type
==
'polygon'
:
annotations_list
.
append
(
annotations_polygon
(
data
[
'imageHeight'
],
data
[
'imageWidth'
],
points
,
label
,
image_num
,
object_num
,
label_to_num
))
if
p_type
==
'rectangle'
:
points
.
append
([
points
[
0
][
0
],
points
[
1
][
1
]])
points
.
append
([
points
[
1
][
0
],
points
[
0
][
1
]])
if
p_type
==
'rectangle'
:
points
.
append
([
points
[
0
][
0
],
points
[
1
][
1
]])
points
.
append
([
points
[
1
][
0
],
points
[
0
][
1
]])
annotations_list
.
append
(
annotations_rectangle
(
points
,
label
,
image_num
,
object_num
,
label_to_num
))
elif
ds_type
==
'cityscape'
:
for
shapes
in
data
[
'objects'
]:
object_num
=
object_num
+
1
label
=
shapes
[
'label'
]
if
label
not
in
labels_list
:
categories_list
.
append
(
categories
(
label
,
labels_list
))
labels_list
.
append
(
label
)
label_to_num
[
label
]
=
len
(
labels_list
)
points
=
shapes
[
'polygon'
]
annotations_list
.
append
(
annotations_rectangle
(
points
,
label
,
image_num
,
object_num
,
label_to_num
))
annotations_polygon
(
data
[
'imgHeight'
],
data
[
'imgWidth'
],
points
,
label
,
image_num
,
object_num
,
label_to_num
))
data_coco
[
'images'
]
=
images_list
data_coco
[
'categories'
]
=
categories_list
data_coco
[
'annotations'
]
=
annotations_list
...
...
@@ -157,6 +183,7 @@ def deal_json(img_path, json_path):
def
main
():
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
)
parser
.
add_argument
(
'--dataset_type'
,
help
=
'the type of dataset'
)
parser
.
add_argument
(
'--json_input_dir'
,
help
=
'input annotated directory'
)
parser
.
add_argument
(
'--image_input_dir'
,
help
=
'image directory'
)
parser
.
add_argument
(
...
...
@@ -177,6 +204,11 @@ def main():
type
=
float
,
default
=
0.0
)
args
=
parser
.
parse_args
()
try
:
assert
args
.
dataset_type
in
[
'labelme'
,
'cityscape'
]
except
AssertionError
as
e
:
print
(
'Now only support the cityscape dataset and labelme dataset!!'
)
os
.
_exit
(
0
)
try
:
assert
os
.
path
.
exists
(
args
.
json_input_dir
)
except
AssertionError
as
e
:
...
...
@@ -234,7 +266,8 @@ def main():
if
not
os
.
path
.
exists
(
args
.
output_dir
+
'/annotations'
):
os
.
makedirs
(
args
.
output_dir
+
'/annotations'
)
if
args
.
train_proportion
!=
0
:
train_data_coco
=
deal_json
(
args
.
output_dir
+
'/train'
,
train_data_coco
=
deal_json
(
args
.
dataset_type
,
args
.
output_dir
+
'/train'
,
args
.
json_input_dir
)
train_json_path
=
osp
.
join
(
args
.
output_dir
+
'/annotations'
,
'instance_train.json'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录