Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
23d3745d
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
23d3745d
编写于
10月 28, 2019
作者:
R
root
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'ppdet_split' of /paddle/work/paddle-fork/models into init_ppdet
上级
db2d30dd
27545a84
变更
22
显示空白变更内容
内联
并排
Showing
22 changed file
with
466 addition
and
207 deletion
+466
-207
configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml
configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml
+18
-0
configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
...n/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
+17
-0
configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml
configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml
+40
-0
configs/ssd/ssd_mobilenet_v1_voc.yml
configs/ssd/ssd_mobilenet_v1_voc.yml
+2
-4
configs/ssd/ssd_vgg16_300_voc.yml
configs/ssd/ssd_vgg16_300_voc.yml
+2
-4
configs/ssd/ssd_vgg16_512_voc.yml
configs/ssd/ssd_vgg16_512_voc.yml
+2
-4
configs/yolov3_darknet_voc.yml
configs/yolov3_darknet_voc.yml
+2
-4
configs/yolov3_mobilenet_v1_fruit.yml
configs/yolov3_mobilenet_v1_fruit.yml
+2
-5
configs/yolov3_mobilenet_v1_voc.yml
configs/yolov3_mobilenet_v1_voc.yml
+2
-4
configs/yolov3_r34_voc.yml
configs/yolov3_r34_voc.yml
+2
-4
dataset/voc/create_list.py
dataset/voc/create_list.py
+25
-0
dataset/voc/label_list.txt
dataset/voc/label_list.txt
+20
-0
docs/DATA.md
docs/DATA.md
+39
-28
docs/DATA_cn.md
docs/DATA_cn.md
+40
-31
docs/INSTALL.md
docs/INSTALL.md
+63
-0
docs/INSTALL_cn.md
docs/INSTALL_cn.md
+62
-0
ppdet/data/data_feed.py
ppdet/data/data_feed.py
+8
-9
ppdet/data/source/roidb_source.py
ppdet/data/source/roidb_source.py
+1
-1
ppdet/data/source/voc_loader.py
ppdet/data/source/voc_loader.py
+13
-25
ppdet/data/tools/x2coco.py
ppdet/data/tools/x2coco.py
+55
-22
ppdet/utils/download.py
ppdet/utils/download.py
+34
-32
ppdet/utils/voc_utils.py
ppdet/utils/voc_utils.py
+17
-30
未找到文件。
configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml
浏览文件 @
23d3745d
...
@@ -233,6 +233,24 @@ MaskRCNNTestFeed:
...
@@ -233,6 +233,24 @@ MaskRCNNTestFeed:
batch_size
:
1
batch_size
:
1
dataset
:
dataset
:
annotation
:
dataset/coco/annotations/instances_val2017.json
annotation
:
dataset/coco/annotations/instances_val2017.json
sample_transforms
:
-
!DecodeImage
to_rgb
:
False
with_mixup
:
False
-
!NormalizeImage
is_channel_first
:
false
is_scale
:
False
mean
:
-
102.9801
-
115.9465
-
122.7717
std
:
-
1.0
-
1.0
-
1.0
-
!Permute
channel_first
:
true
to_bgr
:
false
batch_transforms
:
batch_transforms
:
-
!PadBatch
-
!PadBatch
pad_to_stride
:
32
pad_to_stride
:
32
...
...
configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
浏览文件 @
23d3745d
...
@@ -249,6 +249,23 @@ MaskRCNNTestFeed:
...
@@ -249,6 +249,23 @@ MaskRCNNTestFeed:
batch_size
:
1
batch_size
:
1
dataset
:
dataset
:
annotation
:
dataset/coco/annotations/instances_val2017.json
annotation
:
dataset/coco/annotations/instances_val2017.json
sample_transforms
:
-
!DecodeImage
to_rgb
:
False
-
!NormalizeImage
is_channel_first
:
false
is_scale
:
False
mean
:
-
102.9801
-
115.9465
-
122.7717
std
:
-
1.0
-
1.0
-
1.0
-
!Permute
channel_first
:
true
to_bgr
:
false
batch_transforms
:
batch_transforms
:
-
!PadBatch
-
!PadBatch
pad_to_stride
:
32
pad_to_stride
:
32
...
...
configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml
浏览文件 @
23d3745d
...
@@ -192,6 +192,28 @@ FasterRCNNEvalFeed:
...
@@ -192,6 +192,28 @@ FasterRCNNEvalFeed:
dataset_dir
:
dataset/objects365
dataset_dir
:
dataset/objects365
annotation
:
annotations/val.json
annotation
:
annotations/val.json
image_dir
:
val
image_dir
:
val
sample_transforms
:
-
!DecodeImage
to_rgb
:
False
with_mixup
:
False
-
!NormalizeImage
is_channel_first
:
false
is_scale
:
False
mean
:
-
102.9801
-
115.9465
-
122.7717
std
:
-
1.0
-
1.0
-
1.0
-
!ResizeImage
target_size
:
800
max_size
:
1333
interp
:
1
-
!Permute
channel_first
:
true
to_bgr
:
false
batch_transforms
:
batch_transforms
:
-
!PadBatch
-
!PadBatch
pad_to_stride
:
32
pad_to_stride
:
32
...
@@ -200,6 +222,24 @@ FasterRCNNTestFeed:
...
@@ -200,6 +222,24 @@ FasterRCNNTestFeed:
batch_size
:
1
batch_size
:
1
dataset
:
dataset
:
annotation
:
dataset/obj365/annotations/val.json
annotation
:
dataset/obj365/annotations/val.json
sample_transforms
:
-
!DecodeImage
to_rgb
:
False
with_mixup
:
False
-
!NormalizeImage
is_channel_first
:
false
is_scale
:
False
mean
:
-
102.9801
-
115.9465
-
122.7717
std
:
-
1.0
-
1.0
-
1.0
-
!Permute
channel_first
:
true
to_bgr
:
false
batch_transforms
:
batch_transforms
:
-
!PadBatch
-
!PadBatch
pad_to_stride
:
32
pad_to_stride
:
32
...
...
configs/ssd/ssd_mobilenet_v1_voc.yml
浏览文件 @
23d3745d
...
@@ -61,8 +61,7 @@ SSDTrainFeed:
...
@@ -61,8 +61,7 @@ SSDTrainFeed:
use_process
:
true
use_process
:
true
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/train.txt
annotation
:
trainval.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
SSDEvalFeed
:
SSDEvalFeed
:
...
@@ -70,8 +69,7 @@ SSDEvalFeed:
...
@@ -70,8 +69,7 @@ SSDEvalFeed:
use_process
:
true
use_process
:
true
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/val.txt
annotation
:
test.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
drop_last
:
false
drop_last
:
false
...
...
configs/ssd/ssd_vgg16_300_voc.yml
浏览文件 @
23d3745d
...
@@ -64,8 +64,7 @@ SSDTrainFeed:
...
@@ -64,8 +64,7 @@ SSDTrainFeed:
batch_size
:
8
batch_size
:
8
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/train.txt
annotation
:
trainval.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
image_shape
:
[
3
,
300
,
300
]
image_shape
:
[
3
,
300
,
300
]
sample_transforms
:
sample_transforms
:
...
@@ -109,8 +108,7 @@ SSDEvalFeed:
...
@@ -109,8 +108,7 @@ SSDEvalFeed:
batch_size
:
32
batch_size
:
32
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/val.txt
annotation
:
test.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
drop_last
:
false
drop_last
:
false
image_shape
:
[
3
,
300
,
300
]
image_shape
:
[
3
,
300
,
300
]
...
...
configs/ssd/ssd_vgg16_512_voc.yml
浏览文件 @
23d3745d
...
@@ -68,8 +68,7 @@ SSDTrainFeed:
...
@@ -68,8 +68,7 @@ SSDTrainFeed:
batch_size
:
8
batch_size
:
8
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/train.txt
annotation
:
trainval.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
image_shape
:
[
3
,
512
,
512
]
image_shape
:
[
3
,
512
,
512
]
sample_transforms
:
sample_transforms
:
...
@@ -113,8 +112,7 @@ SSDEvalFeed:
...
@@ -113,8 +112,7 @@ SSDEvalFeed:
batch_size
:
32
batch_size
:
32
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/val.txt
annotation
:
test.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
drop_last
:
false
drop_last
:
false
image_shape
:
[
3
,
512
,
512
]
image_shape
:
[
3
,
512
,
512
]
...
...
configs/yolov3_darknet_voc.yml
浏览文件 @
23d3745d
...
@@ -62,8 +62,7 @@ YoloTrainFeed:
...
@@ -62,8 +62,7 @@ YoloTrainFeed:
batch_size
:
8
batch_size
:
8
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/train.txt
annotation
:
trainval.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
num_workers
:
8
num_workers
:
8
bufsize
:
128
bufsize
:
128
...
@@ -75,8 +74,7 @@ YoloEvalFeed:
...
@@ -75,8 +74,7 @@ YoloEvalFeed:
image_shape
:
[
3
,
608
,
608
]
image_shape
:
[
3
,
608
,
608
]
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/val.txt
annotation
:
test.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
YoloTestFeed
:
YoloTestFeed
:
...
...
configs/yolov3_mobilenet_v1_fruit.yml
浏览文件 @
23d3745d
...
@@ -64,8 +64,7 @@ YoloTrainFeed:
...
@@ -64,8 +64,7 @@ YoloTrainFeed:
batch_size
:
1
batch_size
:
1
dataset
:
dataset
:
dataset_dir
:
dataset/fruit/fruit-detection
dataset_dir
:
dataset/fruit/fruit-detection
annotation
:
./ImageSets/Main/train.txt
annotation
:
train.txt
image_dir
:
./JPEGImages
use_default_label
:
false
use_default_label
:
false
num_workers
:
16
num_workers
:
16
bufsize
:
128
bufsize
:
128
...
@@ -111,8 +110,7 @@ YoloEvalFeed:
...
@@ -111,8 +110,7 @@ YoloEvalFeed:
image_shape
:
[
3
,
608
,
608
]
image_shape
:
[
3
,
608
,
608
]
dataset
:
dataset
:
dataset_dir
:
dataset/fruit/fruit-detection
dataset_dir
:
dataset/fruit/fruit-detection
annotation
:
./ImageSets/Main/val.txt
annotation
:
val.txt
image_dir
:
./JPEGImages
use_default_label
:
false
use_default_label
:
false
...
@@ -121,5 +119,4 @@ YoloTestFeed:
...
@@ -121,5 +119,4 @@ YoloTestFeed:
image_shape
:
[
3
,
608
,
608
]
image_shape
:
[
3
,
608
,
608
]
dataset
:
dataset
:
dataset_dir
:
dataset/fruit/fruit-detection
dataset_dir
:
dataset/fruit/fruit-detection
annotation
:
./ImageSets/Main/label_list.txt
use_default_label
:
false
use_default_label
:
false
configs/yolov3_mobilenet_v1_voc.yml
浏览文件 @
23d3745d
...
@@ -63,8 +63,7 @@ YoloTrainFeed:
...
@@ -63,8 +63,7 @@ YoloTrainFeed:
batch_size
:
8
batch_size
:
8
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/train.txt
annotation
:
trainval.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
num_workers
:
8
num_workers
:
8
bufsize
:
128
bufsize
:
128
...
@@ -76,8 +75,7 @@ YoloEvalFeed:
...
@@ -76,8 +75,7 @@ YoloEvalFeed:
image_shape
:
[
3
,
608
,
608
]
image_shape
:
[
3
,
608
,
608
]
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/val.txt
annotation
:
test.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
YoloTestFeed
:
YoloTestFeed
:
...
...
configs/yolov3_r34_voc.yml
浏览文件 @
23d3745d
...
@@ -65,8 +65,7 @@ YoloTrainFeed:
...
@@ -65,8 +65,7 @@ YoloTrainFeed:
batch_size
:
8
batch_size
:
8
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/train.txt
annotation
:
trainval.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
num_workers
:
8
num_workers
:
8
bufsize
:
128
bufsize
:
128
...
@@ -78,8 +77,7 @@ YoloEvalFeed:
...
@@ -78,8 +77,7 @@ YoloEvalFeed:
image_shape
:
[
3
,
608
,
608
]
image_shape
:
[
3
,
608
,
608
]
dataset
:
dataset
:
dataset_dir
:
dataset/voc
dataset_dir
:
dataset/voc
annotation
:
VOCdevkit/VOC_all/ImageSets/Main/val.txt
annotation
:
test.txt
image_dir
:
VOCdevkit/VOC_all/JPEGImages
use_default_label
:
true
use_default_label
:
true
YoloTestFeed
:
YoloTestFeed
:
...
...
dataset/voc/create_list.py
0 → 100644
浏览文件 @
23d3745d
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
import
os.path
as
osp
import
logging
from
ppdet.utils.download
import
create_voc_list
logging
.
basicConfig
(
level
=
logging
.
INFO
)
voc_path
=
osp
.
split
(
osp
.
realpath
(
sys
.
argv
[
0
]))[
0
]
create_voc_list
(
voc_path
)
dataset/voc/label_list.txt
0 → 100644
浏览文件 @
23d3745d
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor
docs/DATA.md
浏览文件 @
23d3745d
...
@@ -27,6 +27,7 @@ Parses various data sources and creates `data.Dataset` instances. Currently,
...
@@ -27,6 +27,7 @@ Parses various data sources and creates `data.Dataset` instances. Currently,
following data sources are supported:
following data sources are supported:
-
COCO data source
-
COCO data source
Loads
`COCO`
type datasets with directory structures like this:
Loads
`COCO`
type datasets with directory structures like this:
```
```
...
@@ -36,46 +37,54 @@ Loads `COCO` type datasets with directory structures like this:
...
@@ -36,46 +37,54 @@ Loads `COCO` type datasets with directory structures like this:
│ ├── instances_train2017.json
│ ├── instances_train2017.json
│ ├── instances_val2014.json
│ ├── instances_val2014.json
│ ├── instances_val2017.json
│ ├── instances_val2017.json
| ...
│
| ...
├── train2017
├── train2017
│ ├── 000000000009.jpg
│ ├── 000000000009.jpg
│ ├── 000000580008.jpg
│ ├── 000000580008.jpg
| ...
│
| ...
├── val2017
├── val2017
│ ├── 000000000139.jpg
│ ├── 000000000139.jpg
│ ├── 000000000285.jpg
│ ├── 000000000285.jpg
│ | ...
| ...
| ...
```
```
-
Pascal VOC data source
-
Pascal VOC data source
Loads
`Pascal VOC`
like datasets with directory structure like this:
Loads
`Pascal VOC`
like datasets with directory structure like this:
```
```
data/pascalvoc/
dataset/voc/
├──Annotations
├── train.txt
│ ├── i000050.jpg
├── val.txt
├── test.txt
├── label_list.txt (optional)
├── VOCdevkit/VOC2007
│ ├── Annotations
│ ├── 001789.xml
│ | ...
│ ├── JPEGImages
│ ├── 001789.xml
│ | ...
│ ├── ImageSets
│ | ...
├── VOCdevkit/VOC2012
│ ├── Annotations
│ ├── 003876.xml
│ ├── 003876.xml
| ...
│ | ...
├── ImageSets
│ ├── JPEGImages
│ ├──Main
│ ├── 003876.xml
└── train.txt
│ | ...
└── val.txt
│ ├── ImageSets
└── test.txt
│ | ...
└── dog_train.txt
└── dog_trainval.txt
└── dog_val.txt
└── dog_test.txt
└── ...
│ ├──Layout
└──...
│ ├── Segmentation
└──...
├── JPEGImages
│ ├── 000050.jpg
│ ├── 003876.jpg
| ...
| ...
```
```
**NOTE:**
If you set
`use_default_label=False`
in yaml configs, the
`label_list.txt`
of Pascal VOC dataset will be read, otherwise,
`label_list.txt`
is unnecessary and
the default Pascal VOC label list which defined in
[
voc\_loader.py
](
../ppdet/data/source/voc_loader.py
)
will be used.
-
Roidb data source
-
Roidb data source
A generalized data source serialized as pickle files, which have the following
A generalized data source serialized as pickle files, which have the following
structure:
structure:
...
@@ -181,16 +190,18 @@ whole data pipeline is fully customizable through the yaml configuration files.
...
@@ -181,16 +190,18 @@ whole data pipeline is fully customizable through the yaml configuration files.
#### Custom Datasets
#### Custom Datasets
-
Option 1: Convert the dataset to COCO
or VOC
format.
-
Option 1: Convert the dataset to COCO format.
```
sh
```
sh
# a small utility (`tools/labelme2coco.py`) is provided to convert
# a small utility (`tools/x2coco.py`) is provided to convert
# Labelme-annotated dataset to COCO format.
# Labelme-annotated dataset or cityscape dataset to COCO format.
python ./ppdet/data/tools/labelme2coco.py
--json_input_dir
./labelme_annos/
python ./ppdet/data/tools/x2coco.py
--dataset_type
labelme
--json_input_dir
./labelme_annos/
--image_input_dir
./labelme_imgs/
--image_input_dir
./labelme_imgs/
--output_dir
./cocome/
--output_dir
./cocome/
--train_proportion
0.8
--train_proportion
0.8
--val_proportion
0.2
--val_proportion
0.2
--test_proportion
0.0
--test_proportion
0.0
# --dataset_type: The data format which is need to be converted. Currently supported are: 'labelme' and 'cityscape'
# --json_input_dir:The path of json files which are annotated by Labelme.
# --json_input_dir:The path of json files which are annotated by Labelme.
# --image_input_dir:The path of images.
# --image_input_dir:The path of images.
# --output_dir:The path of coverted COCO dataset.
# --output_dir:The path of coverted COCO dataset.
...
...
docs/DATA_cn.md
浏览文件 @
23d3745d
...
@@ -11,9 +11,12 @@
...
@@ -11,9 +11,12 @@
子功能介绍:
子功能介绍:
1.
数据解析
1.
数据解析
数据解析得到的是
`data.Dataset`
,实现逻辑位于
`data.source`
中。通过它可以实现解析不同格式的数据集,已支持的数据源包括:
数据解析得到的是
`data.Dataset`
,实现逻辑位于
`data.source`
中。通过它可以实现解析不同格式的数据集,已支持的数据源包括:
-
COCO数据源
-
COCO数据源
该数据集目前分为COCO2012和COCO2017,主要由json文件和image文件组成,其组织结构如下所示:
该数据集目前分为COCO2014和COCO2017,主要由json文件和image文件组成,其组织结构如下所示:
```
```
dataset/coco/
dataset/coco/
...
@@ -22,49 +25,53 @@
...
@@ -22,49 +25,53 @@
│ ├── instances_train2017.json
│ ├── instances_train2017.json
│ ├── instances_val2014.json
│ ├── instances_val2014.json
│ ├── instances_val2017.json
│ ├── instances_val2017.json
| ...
│
| ...
├── train2017
├── train2017
│ ├── 000000000009.jpg
│ ├── 000000000009.jpg
│ ├── 000000580008.jpg
│ ├── 000000580008.jpg
| ...
│
| ...
├── val2017
├── val2017
│ ├── 000000000139.jpg
│ ├── 000000000139.jpg
│ ├── 000000000285.jpg
│ ├── 000000000285.jpg
│ | ...
| ...
| ...
```
```
-
Pascal VOC数据源
-
Pascal VOC数据源
该数据集目前分为VOC2007和VOC2012,主要由xml文件和image文件组成,其组织结构如下所示:
该数据集目前分为VOC2007和VOC2012,主要由xml文件和image文件组成,其组织结构如下所示:
```
```
data/pascalvoc/
dataset/voc/
├──Annotations
├── train.txt
│ ├── i000050.jpg
├── val.txt
├── test.txt
├── label_list.txt (optional)
├── VOCdevkit/VOC2007
│ ├── Annotations
│ ├── 001789.xml
│ | ...
│ ├── JPEGImages
│ ├── 001789.xml
│ | ...
│ ├── ImageSets
│ | ...
├── VOCdevkit/VOC2012
│ ├── Annotations
│ ├── 003876.xml
│ ├── 003876.xml
| ...
│ | ...
├── ImageSets
│ ├── JPEGImages
│ ├──Main
│ ├── 003876.xml
└── train.txt
│ | ...
└── val.txt
│ ├── ImageSets
└── test.txt
│ | ...
└── dog_train.txt
└── dog_trainval.txt
└── dog_val.txt
└── dog_test.txt
└── ...
│ ├──Layout
└──...
│ ├── Segmentation
└──...
├── JPEGImages
│ ├── 000050.jpg
│ ├── 003876.jpg
| ...
| ...
```
```
**说明:**
如果你在yaml配置文件中设置
`use_default_label=False`
, 将从
`label_list.txt`
中读取类别列表,反之则可以没有
`label_list.txt`
文件,检测库会使用Pascal VOC数据集的默
认类别列表,默认类别列表定义在
[
voc\_loader.py
](
../ppdet/data/source/voc_loader.py
)
-
Roidb数据源
-
Roidb数据源
该数据集主要由COCO数据集和Pascal VOC数据集转换而成的pickle文件,包含一个dict,而dict中只包含一个命名为‘records’的list(可能还有一个命名为‘cname2cid’的字典),其内容如下所示:
该数据集主要由COCO数据集和Pascal VOC数据集转换而成的pickle文件,包含一个dict,而dict中只包含一个命名为‘records’的list(可能还有一个命名为‘cname2cid’的字典),其内容如下所示:
...
@@ -165,15 +172,17 @@ coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1)
...
@@ -165,15 +172,17 @@ coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1)
```
```
#### 如何使用自定义数据集?
#### 如何使用自定义数据集?
-
选择1:将数据集转换为
VOC格式或者
COCO格式。
-
选择1:将数据集转换为COCO格式。
```
```
# 在./tools/中提供了labelme2coco.py用于将labelme标注的数据集转换为COCO数据集
# 在./tools/中提供了x2coco.py用于将labelme标注的数据集或cityscape数据集转换为COCO数据集
python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/
python ./ppdet/data/tools/x2coco.py --dataset_type labelme
--json_input_dir ./labelme_annos/
--image_input_dir ./labelme_imgs/
--image_input_dir ./labelme_imgs/
--output_dir ./cocome/
--output_dir ./cocome/
--train_proportion 0.8
--train_proportion 0.8
--val_proportion 0.2
--val_proportion 0.2
--test_proportion 0.0
--test_proportion 0.0
# --dataset_type:需要转换的数据格式,目前支持:’labelme‘和’cityscape‘
# --json_input_dir:使用labelme标注的json文件所在文件夹
# --json_input_dir:使用labelme标注的json文件所在文件夹
# --image_input_dir:图像文件所在文件夹
# --image_input_dir:图像文件所在文件夹
# --output_dir:转换后的COCO格式数据集存放位置
# --output_dir:转换后的COCO格式数据集存放位置
...
...
docs/INSTALL.md
浏览文件 @
23d3745d
...
@@ -111,6 +111,13 @@ ln -sf <path/to/coco> <path/to/paddle_detection>/dataset/coco
...
@@ -111,6 +111,13 @@ ln -sf <path/to/coco> <path/to/paddle_detection>/dataset/coco
ln -sf <path/to/voc> <path/to/paddle_detection>/dataset/voc
ln -sf <path/to/voc> <path/to/paddle_detection>/dataset/voc
```
```
For Pascal VOC dataset, you should create file list by:
```
export PYTHONPATH=$PYTHONPATH:.
python dataset/voc/create_list.py
```
**Download datasets manually:**
**Download datasets manually:**
On the other hand, to download the datasets, run the following commands:
On the other hand, to download the datasets, run the following commands:
...
@@ -122,13 +129,69 @@ export PYTHONPATH=$PYTHONPATH:.
...
@@ -122,13 +129,69 @@ export PYTHONPATH=$PYTHONPATH:.
python dataset/coco/download_coco.py
python dataset/coco/download_coco.py
```
```
`COCO`
dataset with directory structures like this:
```
dataset/coco/
├── annotations
│ ├── instances_train2014.json
│ ├── instances_train2017.json
│ ├── instances_val2014.json
│ ├── instances_val2017.json
│ | ...
├── train2017
│ ├── 000000000009.jpg
│ ├── 000000580008.jpg
│ | ...
├── val2017
│ ├── 000000000139.jpg
│ ├── 000000000285.jpg
│ | ...
| ...
```
-
Pascal VOC
-
Pascal VOC
```
```
export PYTHONPATH=$PYTHONPATH:.
export PYTHONPATH=$PYTHONPATH:.
python dataset/voc/download_voc.py
python dataset/voc/download_voc.py
python dataset/voc/create_list.py
```
```
`Pascal VOC`
dataset with directory structure like this:
```
dataset/voc/
├── train.txt
├── val.txt
├── test.txt
├── label_list.txt (optional)
├── VOCdevkit/VOC2007
│ ├── Annotations
│ ├── 001789.xml
│ | ...
│ ├── JPEGImages
│ ├── 001789.xml
│ | ...
│ ├── ImageSets
│ | ...
├── VOCdevkit/VOC2012
│ ├── Annotations
│ ├── 003876.xml
│ | ...
│ ├── JPEGImages
│ ├── 003876.xml
│ | ...
│ ├── ImageSets
│ | ...
| ...
```
**NOTE:**
If you set
`use_default_label=False`
in yaml configs, the
`label_list.txt`
of Pascal VOC dataset will be read, otherwise,
`label_list.txt`
is unnecessary and
the default Pascal VOC label list which defined in
[
voc\_loader.py
](
../ppdet/data/source/voc_loader.py
)
will be used.
**Download datasets automatically:**
**Download datasets automatically:**
If a training session is started but the dataset is not setup properly (e.g,
If a training session is started but the dataset is not setup properly (e.g,
...
...
docs/INSTALL_cn.md
浏览文件 @
23d3745d
...
@@ -108,6 +108,13 @@ ln -sf <path/to/coco> <path/to/paddle_detection>/dataset/coco
...
@@ -108,6 +108,13 @@ ln -sf <path/to/coco> <path/to/paddle_detection>/dataset/coco
ln -sf <path/to/voc> <path/to/paddle_detection>/dataset/voc
ln -sf <path/to/voc> <path/to/paddle_detection>/dataset/voc
```
```
对于Pascal VOC数据集,需通过如下命令创建文件列表:
```
export PYTHONPATH=$PYTHONPATH:.
python dataset/voc/create_list.py
```
**手动下载数据集:**
**手动下载数据集:**
若您本地没有数据集,可通过如下命令下载:
若您本地没有数据集,可通过如下命令下载:
...
@@ -119,13 +126,68 @@ export PYTHONPATH=$PYTHONPATH:.
...
@@ -119,13 +126,68 @@ export PYTHONPATH=$PYTHONPATH:.
python dataset/coco/download_coco.py
python dataset/coco/download_coco.py
```
```
`COCO`
数据集目录结构如下:
```
dataset/coco/
├── annotations
│ ├── instances_train2014.json
│ ├── instances_train2017.json
│ ├── instances_val2014.json
│ ├── instances_val2017.json
│ | ...
├── train2017
│ ├── 000000000009.jpg
│ ├── 000000580008.jpg
│ | ...
├── val2017
│ ├── 000000000139.jpg
│ ├── 000000000285.jpg
│ | ...
| ...
```
-
Pascal VOC
-
Pascal VOC
```
```
export PYTHONPATH=$PYTHONPATH:.
export PYTHONPATH=$PYTHONPATH:.
python dataset/voc/download_voc.py
python dataset/voc/download_voc.py
python dataset/voc/create_list.py
```
```
`Pascal VOC`
数据集目录结构如下:
```
dataset/voc/
├── train.txt
├── val.txt
├── test.txt
├── label_list.txt (optional)
├── VOCdevkit/VOC2007
│ ├── Annotations
│ ├── 001789.xml
│ | ...
│ ├── JPEGImages
│ ├── 001789.xml
│ | ...
│ ├── ImageSets
│ | ...
├── VOCdevkit/VOC2012
│ ├── Annotations
│ ├── 003876.xml
│ | ...
│ ├── JPEGImages
│ ├── 003876.xml
│ | ...
│ ├── ImageSets
│ | ...
| ...
```
**说明:**
如果你在yaml配置文件中设置
`use_default_label=False`
, 将从
`label_list.txt`
中读取类别列表,反之则可以没有
`label_list.txt`
文件,检测库会使用Pascal VOC数据集的默
认类别列表,默认类别列表定义在
[
voc\_loader.py
](
../ppdet/data/source/voc_loader.py
)
**自动下载数据集:**
**自动下载数据集:**
若您在数据集未成功设置(例如,在
`dataset/coco`
或
`dataset/voc`
中找不到)的情况下开始运行,
若您在数据集未成功设置(例如,在
`dataset/coco`
或
`dataset/voc`
中找不到)的情况下开始运行,
...
...
ppdet/data/data_feed.py
浏览文件 @
23d3745d
...
@@ -219,7 +219,7 @@ class DataSet(object):
...
@@ -219,7 +219,7 @@ class DataSet(object):
def
__init__
(
self
,
def
__init__
(
self
,
annotation
,
annotation
,
image_dir
,
image_dir
=
None
,
dataset_dir
=
None
,
dataset_dir
=
None
,
use_default_label
=
None
):
use_default_label
=
None
):
super
(
DataSet
,
self
).
__init__
()
super
(
DataSet
,
self
).
__init__
()
...
@@ -229,7 +229,7 @@ class DataSet(object):
...
@@ -229,7 +229,7 @@ class DataSet(object):
self
.
use_default_label
=
use_default_label
self
.
use_default_label
=
use_default_label
COCO_DATASET_DIR
=
'coco'
COCO_DATASET_DIR
=
'
dataset/
coco'
COCO_TRAIN_ANNOTATION
=
'annotations/instances_train2017.json'
COCO_TRAIN_ANNOTATION
=
'annotations/instances_train2017.json'
COCO_TRAIN_IMAGE_DIR
=
'train2017'
COCO_TRAIN_IMAGE_DIR
=
'train2017'
COCO_VAL_ANNOTATION
=
'annotations/instances_val2017.json'
COCO_VAL_ANNOTATION
=
'annotations/instances_val2017.json'
...
@@ -246,12 +246,11 @@ class CocoDataSet(DataSet):
...
@@ -246,12 +246,11 @@ class CocoDataSet(DataSet):
dataset_dir
=
dataset_dir
,
annotation
=
annotation
,
image_dir
=
image_dir
)
dataset_dir
=
dataset_dir
,
annotation
=
annotation
,
image_dir
=
image_dir
)
VOC_DATASET_DIR
=
'pascalvoc'
VOC_DATASET_DIR
=
'dataset/voc'
VOC_TRAIN_ANNOTATION
=
'VOCdevkit/VOC_all/ImageSets/Main/train.txt'
VOC_TRAIN_ANNOTATION
=
'train.txt'
VOC_VAL_ANNOTATION
=
'VOCdevkit/VOC_all/ImageSets/Main/val.txt'
VOC_VAL_ANNOTATION
=
'val.txt'
VOC_TEST_ANNOTATION
=
'VOCdevkit/VOC_all/ImageSets/Main/test.txt'
VOC_IMAGE_DIR
=
None
VOC_IMAGE_DIR
=
'VOCdevkit/VOC_all/JPEGImages'
VOC_USE_DEFAULT_LABEL
=
True
VOC_USE_DEFAULT_LABEL
=
None
@
serializable
@
serializable
...
@@ -843,7 +842,7 @@ class SSDTestFeed(DataFeed):
...
@@ -843,7 +842,7 @@ class SSDTestFeed(DataFeed):
__doc__
=
DataFeed
.
__doc__
__doc__
=
DataFeed
.
__doc__
def
__init__
(
self
,
def
__init__
(
self
,
dataset
=
SimpleDataSet
(
VOC_
TEST
_ANNOTATION
).
__dict__
,
dataset
=
SimpleDataSet
(
VOC_
VAL
_ANNOTATION
).
__dict__
,
fields
=
[
'image'
,
'im_id'
,
'im_shape'
],
fields
=
[
'image'
,
'im_id'
,
'im_shape'
],
image_shape
=
[
3
,
300
,
300
],
image_shape
=
[
3
,
300
,
300
],
sample_transforms
=
[
sample_transforms
=
[
...
...
ppdet/data/source/roidb_source.py
浏览文件 @
23d3745d
...
@@ -62,7 +62,7 @@ class RoiDbSource(Dataset):
...
@@ -62,7 +62,7 @@ class RoiDbSource(Dataset):
assert
os
.
path
.
isfile
(
anno_file
)
or
os
.
path
.
isdir
(
anno_file
),
\
assert
os
.
path
.
isfile
(
anno_file
)
or
os
.
path
.
isdir
(
anno_file
),
\
'anno_file {} is not a file or a directory'
.
format
(
anno_file
)
'anno_file {} is not a file or a directory'
.
format
(
anno_file
)
self
.
_fname
=
anno_file
self
.
_fname
=
anno_file
self
.
_image_dir
=
image_dir
self
.
_image_dir
=
image_dir
if
image_dir
is
not
None
else
''
if
image_dir
is
not
None
:
if
image_dir
is
not
None
:
assert
os
.
path
.
isdir
(
image_dir
),
\
assert
os
.
path
.
isdir
(
image_dir
),
\
'image_dir {} is not a directory'
.
format
(
image_dir
)
'image_dir {} is not a directory'
.
format
(
image_dir
)
...
...
ppdet/data/source/voc_loader.py
浏览文件 @
23d3745d
...
@@ -26,8 +26,7 @@ def get_roidb(anno_path,
...
@@ -26,8 +26,7 @@ def get_roidb(anno_path,
Load VOC records with annotations in xml directory 'anno_path'
Load VOC records with annotations in xml directory 'anno_path'
Notes:
Notes:
${anno_path}/ImageSets/Main/train.txt must contains xml file names for annotations
${anno_path} must contains xml file and image file path for annotations
${anno_path}/Annotations/xxx.xml must contain annotation info for one record
Args:
Args:
anno_path (str): root directory for voc annotation data
anno_path (str): root directory for voc annotation data
...
@@ -53,11 +52,7 @@ def get_roidb(anno_path,
...
@@ -53,11 +52,7 @@ def get_roidb(anno_path,
'cname2id' is a dict to map category name to class id
'cname2id' is a dict to map category name to class id
"""
"""
txt_file
=
anno_path
data_dir
=
os
.
path
.
dirname
(
anno_path
)
part
=
txt_file
.
split
(
'ImageSets'
)
xml_path
=
os
.
path
.
join
(
part
[
0
],
'Annotations'
)
assert
os
.
path
.
isfile
(
txt_file
)
and
\
os
.
path
.
isdir
(
xml_path
),
'invalid xml path'
records
=
[]
records
=
[]
ct
=
0
ct
=
0
...
@@ -67,17 +62,16 @@ def get_roidb(anno_path,
...
@@ -67,17 +62,16 @@ def get_roidb(anno_path,
# mapping category name to class id
# mapping category name to class id
# background:0, first_class:1, second_class:2, ...
# background:0, first_class:1, second_class:2, ...
with
open
(
txt_file
,
'r'
)
as
fr
:
with
open
(
anno_path
,
'r'
)
as
fr
:
while
True
:
while
True
:
line
=
fr
.
readline
()
line
=
fr
.
readline
()
if
not
line
:
if
not
line
:
break
break
fname
=
line
.
strip
()
+
'.xml'
img_file
,
xml_file
=
[
os
.
path
.
join
(
data_dir
,
x
)
\
xml_file
=
os
.
path
.
join
(
xml_path
,
fname
)
for
x
in
line
.
strip
().
split
()[:
2
]]
if
not
os
.
path
.
isfile
(
xml_file
):
if
not
os
.
path
.
isfile
(
xml_file
):
continue
continue
tree
=
ET
.
parse
(
xml_file
)
tree
=
ET
.
parse
(
xml_file
)
im_fname
=
tree
.
find
(
'filename'
).
text
if
tree
.
find
(
'id'
)
is
None
:
if
tree
.
find
(
'id'
)
is
None
:
im_id
=
np
.
array
([
ct
])
im_id
=
np
.
array
([
ct
])
else
:
else
:
...
@@ -114,7 +108,7 @@ def get_roidb(anno_path,
...
@@ -114,7 +108,7 @@ def get_roidb(anno_path,
is_crowd
[
i
][
0
]
=
0
is_crowd
[
i
][
0
]
=
0
difficult
[
i
][
0
]
=
_difficult
difficult
[
i
][
0
]
=
_difficult
voc_rec
=
{
voc_rec
=
{
'im_file'
:
im
_fnam
e
,
'im_file'
:
im
g_fil
e
,
'im_id'
:
im_id
,
'im_id'
:
im_id
,
'h'
:
im_h
,
'h'
:
im_h
,
'w'
:
im_w
,
'w'
:
im_w
,
...
@@ -144,8 +138,7 @@ def load(anno_path,
...
@@ -144,8 +138,7 @@ def load(anno_path,
xml directory 'anno_path'
xml directory 'anno_path'
Notes:
Notes:
${anno_path}/ImageSets/Main/train.txt must contains xml file names for annotations
${anno_path} must contains xml file and image file path for annotations
${anno_path}/Annotations/xxx.xml must contain annotation info for one record
Args:
Args:
@anno_path (str): root directory for voc annotation data
@anno_path (str): root directory for voc annotation data
...
@@ -171,11 +164,7 @@ def load(anno_path,
...
@@ -171,11 +164,7 @@ def load(anno_path,
'cname2id' is a dict to map category name to class id
'cname2id' is a dict to map category name to class id
"""
"""
txt_file
=
anno_path
data_dir
=
os
.
path
.
dirname
(
anno_path
)
part
=
txt_file
.
split
(
'ImageSets'
)
xml_path
=
os
.
path
.
join
(
part
[
0
],
'Annotations'
)
assert
os
.
path
.
isfile
(
txt_file
)
and
\
os
.
path
.
isdir
(
xml_path
),
'invalid xml path'
# mapping category name to class id
# mapping category name to class id
# if with_background is True:
# if with_background is True:
...
@@ -186,7 +175,7 @@ def load(anno_path,
...
@@ -186,7 +175,7 @@ def load(anno_path,
ct
=
0
ct
=
0
cname2cid
=
{}
cname2cid
=
{}
if
not
use_default_label
:
if
not
use_default_label
:
label_path
=
os
.
path
.
join
(
part
[
0
],
'ImageSets/Main/
label_list.txt'
)
label_path
=
os
.
path
.
join
(
data_dir
,
'
label_list.txt'
)
with
open
(
label_path
,
'r'
)
as
fr
:
with
open
(
label_path
,
'r'
)
as
fr
:
label_id
=
int
(
with_background
)
label_id
=
int
(
with_background
)
for
line
in
fr
.
readlines
():
for
line
in
fr
.
readlines
():
...
@@ -195,17 +184,16 @@ def load(anno_path,
...
@@ -195,17 +184,16 @@ def load(anno_path,
else
:
else
:
cname2cid
=
pascalvoc_label
(
with_background
)
cname2cid
=
pascalvoc_label
(
with_background
)
with
open
(
txt_file
,
'r'
)
as
fr
:
with
open
(
anno_path
,
'r'
)
as
fr
:
while
True
:
while
True
:
line
=
fr
.
readline
()
line
=
fr
.
readline
()
if
not
line
:
if
not
line
:
break
break
fname
=
line
.
strip
()
+
'.xml'
img_file
,
xml_file
=
[
os
.
path
.
join
(
data_dir
,
x
)
\
xml_file
=
os
.
path
.
join
(
xml_path
,
fname
)
for
x
in
line
.
strip
().
split
()[:
2
]]
if
not
os
.
path
.
isfile
(
xml_file
):
if
not
os
.
path
.
isfile
(
xml_file
):
continue
continue
tree
=
ET
.
parse
(
xml_file
)
tree
=
ET
.
parse
(
xml_file
)
im_fname
=
tree
.
find
(
'filename'
).
text
if
tree
.
find
(
'id'
)
is
None
:
if
tree
.
find
(
'id'
)
is
None
:
im_id
=
np
.
array
([
ct
])
im_id
=
np
.
array
([
ct
])
else
:
else
:
...
@@ -235,7 +223,7 @@ def load(anno_path,
...
@@ -235,7 +223,7 @@ def load(anno_path,
is_crowd
[
i
][
0
]
=
0
is_crowd
[
i
][
0
]
=
0
difficult
[
i
][
0
]
=
_difficult
difficult
[
i
][
0
]
=
_difficult
voc_rec
=
{
voc_rec
=
{
'im_file'
:
im
_fnam
e
,
'im_file'
:
im
g_fil
e
,
'im_id'
:
im_id
,
'im_id'
:
im_id
,
'h'
:
im_h
,
'h'
:
im_h
,
'w'
:
im_w
,
'w'
:
im_w
,
...
...
ppdet/data/tools/
labelme
2coco.py
→
ppdet/data/tools/
x
2coco.py
浏览文件 @
23d3745d
...
@@ -44,7 +44,7 @@ def getbbox(self, points):
...
@@ -44,7 +44,7 @@ def getbbox(self, points):
return
self
.
mask2box
(
mask
)
return
self
.
mask2box
(
mask
)
def
images
(
data
,
num
):
def
images
_labelme
(
data
,
num
):
image
=
{}
image
=
{}
image
[
'height'
]
=
data
[
'imageHeight'
]
image
[
'height'
]
=
data
[
'imageHeight'
]
image
[
'width'
]
=
data
[
'imageWidth'
]
image
[
'width'
]
=
data
[
'imageWidth'
]
...
@@ -52,6 +52,14 @@ def images(data, num):
...
@@ -52,6 +52,14 @@ def images(data, num):
image
[
'file_name'
]
=
data
[
'imagePath'
].
split
(
'/'
)[
-
1
]
image
[
'file_name'
]
=
data
[
'imagePath'
].
split
(
'/'
)[
-
1
]
return
image
return
image
def
images_cityscape
(
data
,
num
,
img_file
):
image
=
{}
image
[
'height'
]
=
data
[
'imgHeight'
]
image
[
'width'
]
=
data
[
'imgWidth'
]
image
[
'id'
]
=
num
+
1
image
[
'file_name'
]
=
img_file
return
image
def
categories
(
label
,
labels_list
):
def
categories
(
label
,
labels_list
):
category
=
{}
category
=
{}
...
@@ -112,7 +120,7 @@ def get_bbox(height, width, points):
...
@@ -112,7 +120,7 @@ def get_bbox(height, width, points):
]
]
def
deal_json
(
img_path
,
json_path
):
def
deal_json
(
ds_type
,
img_path
,
json_path
):
data_coco
=
{}
data_coco
=
{}
label_to_num
=
{}
label_to_num
=
{}
images_list
=
[]
images_list
=
[]
...
@@ -120,15 +128,21 @@ def deal_json(img_path, json_path):
...
@@ -120,15 +128,21 @@ def deal_json(img_path, json_path):
annotations_list
=
[]
annotations_list
=
[]
labels_list
=
[]
labels_list
=
[]
image_num
=
-
1
image_num
=
-
1
object_num
=
-
1
for
img_file
in
os
.
listdir
(
img_path
):
for
img_file
in
os
.
listdir
(
img_path
):
img_label
=
img_file
.
split
(
'.'
)[
0
]
img_label
=
img_file
.
split
(
'.'
)[
0
]
if
img_file
.
split
(
'.'
)[
-
1
]
not
in
[
'bmp'
,
'jpg'
,
'jpeg'
,
'png'
,
'JPEG'
,
'JPG'
,
'PNG'
]:
continue
label_file
=
osp
.
join
(
json_path
,
img_label
+
'.json'
)
label_file
=
osp
.
join
(
json_path
,
img_label
+
'.json'
)
print
(
'Generating dataset from:'
,
label_file
)
print
(
'Generating dataset from:'
,
label_file
)
image_num
=
image_num
+
1
image_num
=
image_num
+
1
with
open
(
label_file
)
as
f
:
with
open
(
label_file
)
as
f
:
data
=
json
.
load
(
f
)
data
=
json
.
load
(
f
)
images_list
.
append
(
images
(
data
,
image_num
))
if
ds_type
==
'labelme'
:
object_num
=
-
1
images_list
.
append
(
images_labelme
(
data
,
image_num
))
elif
ds_type
==
'cityscape'
:
images_list
.
append
(
images_cityscape
(
data
,
image_num
,
img_file
))
if
ds_type
==
'labelme'
:
for
shapes
in
data
[
'shapes'
]:
for
shapes
in
data
[
'shapes'
]:
object_num
=
object_num
+
1
object_num
=
object_num
+
1
label
=
shapes
[
'label'
]
label
=
shapes
[
'label'
]
...
@@ -148,6 +162,18 @@ def deal_json(img_path, json_path):
...
@@ -148,6 +162,18 @@ def deal_json(img_path, json_path):
points
.
append
([
points
[
1
][
0
],
points
[
0
][
1
]])
points
.
append
([
points
[
1
][
0
],
points
[
0
][
1
]])
annotations_list
.
append
(
annotations_list
.
append
(
annotations_rectangle
(
points
,
label
,
image_num
,
object_num
,
label_to_num
))
annotations_rectangle
(
points
,
label
,
image_num
,
object_num
,
label_to_num
))
elif
ds_type
==
'cityscape'
:
for
shapes
in
data
[
'objects'
]:
object_num
=
object_num
+
1
label
=
shapes
[
'label'
]
if
label
not
in
labels_list
:
categories_list
.
append
(
categories
(
label
,
labels_list
))
labels_list
.
append
(
label
)
label_to_num
[
label
]
=
len
(
labels_list
)
points
=
shapes
[
'polygon'
]
annotations_list
.
append
(
annotations_polygon
(
data
[
'imgHeight'
],
data
[
'imgWidth'
],
points
,
label
,
image_num
,
object_num
,
label_to_num
))
data_coco
[
'images'
]
=
images_list
data_coco
[
'images'
]
=
images_list
data_coco
[
'categories'
]
=
categories_list
data_coco
[
'categories'
]
=
categories_list
data_coco
[
'annotations'
]
=
annotations_list
data_coco
[
'annotations'
]
=
annotations_list
...
@@ -157,6 +183,7 @@ def deal_json(img_path, json_path):
...
@@ -157,6 +183,7 @@ def deal_json(img_path, json_path):
def
main
():
def
main
():
parser
=
argparse
.
ArgumentParser
(
parser
=
argparse
.
ArgumentParser
(
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
)
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
)
parser
.
add_argument
(
'--dataset_type'
,
help
=
'the type of dataset'
)
parser
.
add_argument
(
'--json_input_dir'
,
help
=
'input annotated directory'
)
parser
.
add_argument
(
'--json_input_dir'
,
help
=
'input annotated directory'
)
parser
.
add_argument
(
'--image_input_dir'
,
help
=
'image directory'
)
parser
.
add_argument
(
'--image_input_dir'
,
help
=
'image directory'
)
parser
.
add_argument
(
parser
.
add_argument
(
...
@@ -177,6 +204,11 @@ def main():
...
@@ -177,6 +204,11 @@ def main():
type
=
float
,
type
=
float
,
default
=
0.0
)
default
=
0.0
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
try
:
assert
args
.
dataset_type
in
[
'labelme'
,
'cityscape'
]
except
AssertionError
as
e
:
print
(
'Now only support the cityscape dataset and labelme dataset!!'
)
os
.
_exit
(
0
)
try
:
try
:
assert
os
.
path
.
exists
(
args
.
json_input_dir
)
assert
os
.
path
.
exists
(
args
.
json_input_dir
)
except
AssertionError
as
e
:
except
AssertionError
as
e
:
...
@@ -234,7 +266,8 @@ def main():
...
@@ -234,7 +266,8 @@ def main():
if
not
os
.
path
.
exists
(
args
.
output_dir
+
'/annotations'
):
if
not
os
.
path
.
exists
(
args
.
output_dir
+
'/annotations'
):
os
.
makedirs
(
args
.
output_dir
+
'/annotations'
)
os
.
makedirs
(
args
.
output_dir
+
'/annotations'
)
if
args
.
train_proportion
!=
0
:
if
args
.
train_proportion
!=
0
:
train_data_coco
=
deal_json
(
args
.
output_dir
+
'/train'
,
train_data_coco
=
deal_json
(
args
.
dataset_type
,
args
.
output_dir
+
'/train'
,
args
.
json_input_dir
)
args
.
json_input_dir
)
train_json_path
=
osp
.
join
(
args
.
output_dir
+
'/annotations'
,
train_json_path
=
osp
.
join
(
args
.
output_dir
+
'/annotations'
,
'instance_train.json'
)
'instance_train.json'
)
...
...
ppdet/utils/download.py
浏览文件 @
23d3745d
...
@@ -25,7 +25,7 @@ import hashlib
...
@@ -25,7 +25,7 @@ import hashlib
import
tarfile
import
tarfile
import
zipfile
import
zipfile
from
.voc_utils
import
merge_and_
create_list
from
.voc_utils
import
create_list
import
logging
import
logging
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -59,7 +59,7 @@ DATASETS = {
...
@@ -59,7 +59,7 @@ DATASETS = {
(
(
'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar'
,
'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar'
,
'b6e924de25625d8de591ea690078ad9f'
,
),
'b6e924de25625d8de591ea690078ad9f'
,
),
],
[
"VOCdevkit/VOC
_all
"
]),
],
[
"VOCdevkit/VOC
2012"
,
"VOCdevkit/VOC2007
"
]),
'wider_face'
:
([
'wider_face'
:
([
(
(
'https://dataset.bj.bcebos.com/wider_face/WIDER_train.zip'
,
'https://dataset.bj.bcebos.com/wider_face/WIDER_train.zip'
,
...
@@ -85,7 +85,8 @@ def get_weights_path(url):
...
@@ -85,7 +85,8 @@ def get_weights_path(url):
"""Get weights path from WEIGHT_HOME, if not exists,
"""Get weights path from WEIGHT_HOME, if not exists,
download it from url.
download it from url.
"""
"""
return
get_path
(
url
,
WEIGHTS_HOME
)
path
,
_
=
get_path
(
url
,
WEIGHTS_HOME
)
return
path
def
get_dataset_path
(
path
,
annotation
,
image_dir
):
def
get_dataset_path
(
path
,
annotation
,
image_dir
):
...
@@ -107,19 +108,26 @@ def get_dataset_path(path, annotation, image_dir):
...
@@ -107,19 +108,26 @@ def get_dataset_path(path, annotation, image_dir):
"{}"
.
format
(
path
,
name
))
"{}"
.
format
(
path
,
name
))
data_dir
=
osp
.
join
(
DATASET_HOME
,
name
)
data_dir
=
osp
.
join
(
DATASET_HOME
,
name
)
# For voc, only check
merged dir VOC_all
# For voc, only check
dir VOCdevkit/VOC2012, VOCdevkit/VOC2007
if
name
==
'voc'
:
if
name
==
'voc'
:
check_dir
=
osp
.
join
(
data_dir
,
dataset
[
1
][
0
])
exists
=
True
for
sub_dir
in
dataset
[
1
]:
check_dir
=
osp
.
join
(
data_dir
,
sub_dir
)
if
osp
.
exists
(
check_dir
):
if
osp
.
exists
(
check_dir
):
logger
.
info
(
"Found {}"
.
format
(
check_dir
))
logger
.
info
(
"Found {}"
.
format
(
check_dir
))
else
:
exists
=
False
if
exists
:
return
data_dir
return
data_dir
# voc exist is checked above, voc is not exist here
check_exist
=
name
!=
'voc'
for
url
,
md5sum
in
dataset
[
0
]:
for
url
,
md5sum
in
dataset
[
0
]:
get_path
(
url
,
data_dir
,
md5sum
)
get_path
(
url
,
data_dir
,
md5sum
,
check_exist
)
# voc should
merge dir and
create list after download
# voc should create list after download
if
name
==
'voc'
:
if
name
==
'voc'
:
_merge_voc_dir
(
data_dir
,
dataset
[
1
][
0
]
)
create_voc_list
(
data_dir
)
return
data_dir
return
data_dir
# not match any dataset in DATASETS
# not match any dataset in DATASETS
...
@@ -129,26 +137,17 @@ def get_dataset_path(path, annotation, image_dir):
...
@@ -129,26 +137,17 @@ def get_dataset_path(path, annotation, image_dir):
osp
.
split
(
path
)[
-
1
]))
osp
.
split
(
path
)[
-
1
]))
def
_merge_voc_dir
(
data_dir
,
output_subdir
):
def
create_voc_list
(
data_dir
,
devkit_subdir
=
'VOCdevkit'
):
logger
.
info
(
"Download voc dataset successed, merge "
logger
.
info
(
"Create voc file list..."
)
"VOC2007 and VOC2012 to VOC_all..."
)
devkit_dir
=
osp
.
join
(
data_dir
,
devkit_subdir
)
output_dir
=
osp
.
join
(
data_dir
,
output_subdir
)
devkit_dir
=
"/"
.
join
(
output_dir
.
split
(
'/'
)[:
-
1
])
years
=
[
'2007'
,
'2012'
]
years
=
[
'2007'
,
'2012'
]
# merge dir in output_tmp_dir at first, move to
# output_dir after merge sucessed.
output_tmp_dir
=
osp
.
join
(
data_dir
,
'tmp'
)
if
osp
.
isdir
(
output_tmp_dir
):
shutil
.
rmtree
(
output_tmp_dir
)
# NOTE: since using auto download VOC
# NOTE: since using auto download VOC
# dataset, VOC default label list should be used,
# dataset, VOC default label list should be used,
# do not generate label_list.txt here. For default
# do not generate label_list.txt here. For default
# label, see ../data/source/voc_loader.py
# label, see ../data/source/voc_loader.py
merge_and_create_list
(
devkit_dir
,
years
,
output_tmp_dir
)
create_list
(
devkit_dir
,
years
,
data_dir
)
shutil
.
move
(
output_tmp_dir
,
output_dir
)
logger
.
info
(
"Create voc file list finished"
)
# remove source directory VOC2007 and VOC2012
shutil
.
rmtree
(
osp
.
join
(
devkit_dir
,
"VOC2007"
))
shutil
.
rmtree
(
osp
.
join
(
devkit_dir
,
"VOC2012"
))
def
map_path
(
url
,
root_dir
):
def
map_path
(
url
,
root_dir
):
...
@@ -161,7 +160,7 @@ def map_path(url, root_dir):
...
@@ -161,7 +160,7 @@ def map_path(url, root_dir):
return
osp
.
join
(
root_dir
,
fpath
)
return
osp
.
join
(
root_dir
,
fpath
)
def
get_path
(
url
,
root_dir
,
md5sum
=
None
):
def
get_path
(
url
,
root_dir
,
md5sum
=
None
,
check_exist
=
True
):
""" Download from given url to root_dir.
""" Download from given url to root_dir.
if file or directory specified by url is exists under
if file or directory specified by url is exists under
root_dir, return the path directly, otherwise download
root_dir, return the path directly, otherwise download
...
@@ -178,20 +177,25 @@ def get_path(url, root_dir, md5sum=None):
...
@@ -178,20 +177,25 @@ def get_path(url, root_dir, md5sum=None):
# For same zip file, decompressed directory name different
# For same zip file, decompressed directory name different
# from zip file name, rename by following map
# from zip file name, rename by following map
decompress_name_map
=
{
decompress_name_map
=
{
"VOC"
:
"VOCdevkit/VOC_all"
,
"VOCtrainval_11-May-2012"
:
"VOCdevkit/VOC2012"
,
"VOCtrainval_06-Nov-2007"
:
"VOCdevkit/VOC2007"
,
"VOCtest_06-Nov-2007"
:
"VOCdevkit/VOC2007"
,
"annotations_trainval"
:
"annotations"
"annotations_trainval"
:
"annotations"
}
}
for
k
,
v
in
decompress_name_map
.
items
():
for
k
,
v
in
decompress_name_map
.
items
():
if
fullpath
.
find
(
k
)
>=
0
:
if
fullpath
.
find
(
k
)
>=
0
:
fullpath
=
'/'
.
join
(
fullpath
.
split
(
'/'
)[:
-
1
]
+
[
v
])
fullpath
=
'/'
.
join
(
fullpath
.
split
(
'/'
)[:
-
1
]
+
[
v
])
if
osp
.
exists
(
fullpath
):
exist_flag
=
False
if
osp
.
exists
(
fullpath
)
and
check_exist
:
exist_flag
=
True
logger
.
info
(
"Found {}"
.
format
(
fullpath
))
logger
.
info
(
"Found {}"
.
format
(
fullpath
))
else
:
else
:
exist_flag
=
False
fullname
=
_download
(
url
,
root_dir
,
md5sum
)
fullname
=
_download
(
url
,
root_dir
,
md5sum
)
_decompress
(
fullname
)
_decompress
(
fullname
)
return
fullpath
return
fullpath
,
exist_flag
def
download_dataset
(
path
,
dataset
=
None
):
def
download_dataset
(
path
,
dataset
=
None
):
...
@@ -201,9 +205,7 @@ def download_dataset(path, dataset=None):
...
@@ -201,9 +205,7 @@ def download_dataset(path, dataset=None):
return
return
dataset_info
=
DATASETS
[
dataset
][
0
]
dataset_info
=
DATASETS
[
dataset
][
0
]
for
info
in
dataset_info
:
for
info
in
dataset_info
:
get_path
(
info
[
0
],
path
,
info
[
1
])
get_path
(
info
[
0
],
path
,
info
[
1
],
False
)
if
dataset
==
'voc'
:
_merge_voc_dir
(
path
,
DATASETS
[
dataset
][
1
][
0
])
logger
.
info
(
"Download dataset {} finished."
.
format
(
dataset
))
logger
.
info
(
"Download dataset {} finished."
.
format
(
dataset
))
...
...
ppdet/utils/voc_utils.py
浏览文件 @
23d3745d
...
@@ -22,20 +22,15 @@ import re
...
@@ -22,20 +22,15 @@ import re
import
random
import
random
import
shutil
import
shutil
__all__
=
[
'
merge_and_
create_list'
]
__all__
=
[
'create_list'
]
def
merge_and_
create_list
(
devkit_dir
,
years
,
output_dir
):
def
create_list
(
devkit_dir
,
years
,
output_dir
):
"""
"""
Merge VOC2007 and VOC2012 to output_dir and create following list:
create following list:
1. train.txt
1. trainval.txt
2. val.txt
2. test.txt
3. test.txt
"""
"""
os
.
makedirs
(
osp
.
join
(
output_dir
,
'Annotations/'
))
os
.
makedirs
(
osp
.
join
(
output_dir
,
'ImageSets/Main/'
))
os
.
makedirs
(
osp
.
join
(
output_dir
,
'JPEGImages/'
))
trainval_list
=
[]
trainval_list
=
[]
test_list
=
[]
test_list
=
[]
for
year
in
years
:
for
year
in
years
:
...
@@ -43,20 +38,16 @@ def merge_and_create_list(devkit_dir, years, output_dir):
...
@@ -43,20 +38,16 @@ def merge_and_create_list(devkit_dir, years, output_dir):
trainval_list
.
extend
(
trainval
)
trainval_list
.
extend
(
trainval
)
test_list
.
extend
(
test
)
test_list
.
extend
(
test
)
main_dir
=
osp
.
join
(
output_dir
,
'ImageSets/Main/'
)
random
.
shuffle
(
trainval_list
)
random
.
shuffle
(
trainval_list
)
with
open
(
osp
.
join
(
main_dir
,
'train
.txt'
),
'w'
)
as
ftrainval
:
with
open
(
osp
.
join
(
output_dir
,
'trainval
.txt'
),
'w'
)
as
ftrainval
:
for
item
in
trainval_list
:
for
item
in
trainval_list
:
ftrainval
.
write
(
item
+
'
\n
'
)
ftrainval
.
write
(
item
[
0
]
+
' '
+
item
[
1
]
+
'
\n
'
)
with
open
(
osp
.
join
(
main_dir
,
'val.txt'
),
'w'
)
as
fval
:
with
open
(
osp
.
join
(
output_dir
,
'test.txt'
),
'w'
)
as
fval
:
with
open
(
osp
.
join
(
main_dir
,
'test.txt'
),
'w'
)
as
ftest
:
ct
=
0
ct
=
0
for
item
in
test_list
:
for
item
in
test_list
:
ct
+=
1
ct
+=
1
fval
.
write
(
item
+
'
\n
'
)
fval
.
write
(
item
[
0
]
+
' '
+
item
[
1
]
+
'
\n
'
)
if
ct
<=
1000
:
ftest
.
write
(
item
+
'
\n
'
)
def
_get_voc_dir
(
devkit_dir
,
year
,
type
):
def
_get_voc_dir
(
devkit_dir
,
year
,
type
):
...
@@ -86,14 +77,10 @@ def _walk_voc_dir(devkit_dir, year, output_dir):
...
@@ -86,14 +77,10 @@ def _walk_voc_dir(devkit_dir, year, output_dir):
if
name_prefix
in
added
:
if
name_prefix
in
added
:
continue
continue
added
.
add
(
name_prefix
)
added
.
add
(
name_prefix
)
ann_path
=
osp
.
join
(
annotation_dir
,
name_prefix
+
'.xml'
)
ann_path
=
osp
.
join
(
osp
.
relpath
(
annotation_dir
,
output_dir
),
img_path
=
osp
.
join
(
img_dir
,
name_prefix
+
'.jpg'
)
new_ann_path
=
osp
.
join
(
output_dir
,
'Annotations/'
,
name_prefix
+
'.xml'
)
name_prefix
+
'.xml'
)
new_img_path
=
osp
.
join
(
output_dir
,
'JPEGImages/'
,
img_path
=
osp
.
join
(
osp
.
relpath
(
img_dir
,
output_dir
)
,
name_prefix
+
'.jpg'
)
name_prefix
+
'.jpg'
)
shutil
.
copy
(
ann_path
,
new_ann_path
)
img_ann_list
.
append
((
img_path
,
ann_path
))
shutil
.
copy
(
img_path
,
new_img_path
)
img_ann_list
.
append
(
name_prefix
)
return
trainval_list
,
test_list
return
trainval_list
,
test_list
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录