Merge branch 'ppdet_split' of /paddle/work/paddle-fork/models into init_ppdet

23d3745d · root · db2d30dd · 27545a84 · 23d3745d · 23d3745d
22 changed file
--- a/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml
+++ b/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x.yml
@@ -233,6 +233,24 @@ MaskRCNNTestFeed:
  batch_size: 1
  dataset:
    annotation: dataset/coco/annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: False
+    with_mixup: False
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0
+    - 1.0
+    - 1.0
+  - !Permute
+    channel_first: true
+    to_bgr: false
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32

--- a/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
+++ b/configs/dcn/cascade_mask_rcnn_dcnv2_se154_vd_fpn_gn_s1x_ms_test.yml
@@ -249,6 +249,23 @@ MaskRCNNTestFeed:
  batch_size: 1
  dataset:
    annotation: dataset/coco/annotations/instances_val2017.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: False
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0
+    - 1.0
+    - 1.0
+  - !Permute
+    channel_first: true
+    to_bgr: false
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32

--- a/configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml
+++ b/configs/obj365/cascade_rcnn_dcnv2_se154_vd_fpn_gn_cas.yml
@@ -192,6 +192,28 @@ FasterRCNNEvalFeed:
    dataset_dir: dataset/objects365
    annotation: annotations/val.json
    image_dir: val
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+    with_mixup: False
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+  - !Permute
+    channel_first: true
+    to_bgr: false
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32
@@ -200,6 +222,24 @@ FasterRCNNTestFeed:
  batch_size: 1
  dataset:
    annotation: dataset/obj365/annotations/val.json
+  sample_transforms: 
+  - !DecodeImage
+    to_rgb: False
+    with_mixup: False
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: False
+    mean:
+    - 102.9801 
+    - 115.9465
+    - 122.7717
+    std:
+    - 1.0 
+    - 1.0 
+    - 1.0 
+  - !Permute
+    channel_first: true
+    to_bgr: false
  batch_transforms:
  - !PadBatch
    pad_to_stride: 32

--- a/configs/ssd/ssd_mobilenet_v1_voc.yml
+++ b/configs/ssd/ssd_mobilenet_v1_voc.yml
@@ -61,8 +61,7 @@ SSDTrainFeed:
  use_process: true
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
+    annotation: trainval.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
 SSDEvalFeed:
@@ -70,8 +69,7 @@ SSDEvalFeed:
  use_process: true
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
+    annotation: test.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
  drop_last: false

--- a/configs/ssd/ssd_vgg16_300_voc.yml
+++ b/configs/ssd/ssd_vgg16_300_voc.yml
@@ -64,8 +64,7 @@ SSDTrainFeed:
  batch_size: 8
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
+    annotation: trainval.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
  image_shape: [3, 300, 300]
  sample_transforms:
@@ -109,8 +108,7 @@ SSDEvalFeed:
  batch_size: 32
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
+    annotation: test.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
  drop_last: false
  image_shape: [3, 300, 300]

--- a/configs/ssd/ssd_vgg16_512_voc.yml
+++ b/configs/ssd/ssd_vgg16_512_voc.yml
@@ -68,8 +68,7 @@ SSDTrainFeed:
  batch_size: 8
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
+    annotation: trainval.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
  image_shape: [3, 512, 512]
  sample_transforms:
@@ -113,8 +112,7 @@ SSDEvalFeed:
  batch_size: 32
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
+    annotation: test.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
  drop_last: false
  image_shape: [3, 512, 512]

--- a/configs/yolov3_darknet_voc.yml
+++ b/configs/yolov3_darknet_voc.yml
@@ -62,8 +62,7 @@ YoloTrainFeed:
  batch_size: 8
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
+    annotation: trainval.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
  num_workers: 8
  bufsize: 128
@@ -75,8 +74,7 @@ YoloEvalFeed:
  image_shape: [3, 608, 608]
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
+    annotation: test.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
 YoloTestFeed:

--- a/configs/yolov3_mobilenet_v1_fruit.yml
+++ b/configs/yolov3_mobilenet_v1_fruit.yml
@@ -64,8 +64,7 @@ YoloTrainFeed:
  batch_size: 1
  dataset:
    dataset_dir: dataset/fruit/fruit-detection
-    annotation: ./ImageSets/Main/train.txt
+    annotation: train.txt
-    image_dir: ./JPEGImages
    use_default_label: false
  num_workers: 16
  bufsize: 128
@@ -111,8 +110,7 @@ YoloEvalFeed:
  image_shape: [3, 608, 608]
  dataset:
    dataset_dir: dataset/fruit/fruit-detection
-    annotation: ./ImageSets/Main/val.txt
+    annotation: val.txt
-    image_dir: ./JPEGImages
    use_default_label: false
@@ -121,5 +119,4 @@ YoloTestFeed:
  image_shape: [3, 608, 608]
  dataset:
    dataset_dir: dataset/fruit/fruit-detection
-    annotation: ./ImageSets/Main/label_list.txt
    use_default_label: false
--- a/configs/yolov3_mobilenet_v1_voc.yml
+++ b/configs/yolov3_mobilenet_v1_voc.yml
@@ -63,8 +63,7 @@ YoloTrainFeed:
  batch_size: 8
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
+    annotation: trainval.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
  num_workers: 8
  bufsize: 128
@@ -76,8 +75,7 @@ YoloEvalFeed:
  image_shape: [3, 608, 608]
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
+    annotation: test.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
 YoloTestFeed:

--- a/configs/yolov3_r34_voc.yml
+++ b/configs/yolov3_r34_voc.yml
@@ -65,8 +65,7 @@ YoloTrainFeed:
  batch_size: 8
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
+    annotation: trainval.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
  num_workers: 8
  bufsize: 128
@@ -78,8 +77,7 @@ YoloEvalFeed:
  image_shape: [3, 608, 608]
  dataset:
    dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
+    annotation: test.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
    use_default_label: true
 YoloTestFeed:

--- a/dataset/voc/create_list.py
+++ b/dataset/voc/create_list.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import os.path as osp
+import logging
+from ppdet.utils.download import create_voc_list
+logging.basicConfig(level=logging.INFO)
+voc_path = osp.split(osp.realpath(sys.argv[0]))[0]
+create_voc_list(voc_path)
--- a/dataset/voc/label_list.txt
+++ b/dataset/voc/label_list.txt
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
--- a/docs/DATA.md
+++ b/docs/DATA.md
@@ -27,6 +27,7 @@ Parses various data sources and creates `data.Dataset` instances. Currently,
 following data sources are supported:
 - COCO data source
 Loads `COCO` type datasets with directory structures like this:
  ```
@@ -36,46 +37,54 @@ Loads `COCO` type datasets with directory structures like this:
  │   ├── instances_train2017.json
  │   ├── instances_val2014.json
  │   ├── instances_val2017.json
-  |   ...
+  │   |   ...
  ├── train2017
  │   ├── 000000000009.jpg
  │   ├── 000000580008.jpg
-  |   ...
+  │   |   ...
  ├── val2017
  │   ├── 000000000139.jpg
  │   ├── 000000000285.jpg
+  │   |   ...
  |   ...
  ```
 - Pascal VOC data source
 Loads `Pascal VOC` like datasets with directory structure like this:
  ```
-  data/pascalvoc/
+  dataset/voc/
-  ├──Annotations
+  ├── train.txt
-  │   ├── i000050.jpg
+  ├── val.txt
+  ├── test.txt
+  ├── label_list.txt (optional)
+  ├── VOCdevkit/VOC2007
+  │   ├── Annotations
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  ├── VOCdevkit/VOC2012
+  │   ├── Annotations
  │       ├── 003876.xml
-  |   ...
+  │       |   ...
-  ├── ImageSets
+  │   ├── JPEGImages 
-  │   ├──Main
+  │       ├── 003876.xml
-              └── train.txt
+  │       |   ...
-              └── val.txt
+  │   ├── ImageSets
-              └── test.txt
+  │       |   ...
-              └── dog_train.txt
-              └── dog_trainval.txt
-              └── dog_val.txt
-              └── dog_test.txt
-              └── ...
-  │   ├──Layout
-               └──...
-  │   ├── Segmentation
-                └──...
-  ├── JPEGImages
-  │   ├── 000050.jpg
-  │   ├── 003876.jpg
  |   ...
  ```
+**NOTE:** If you set `use_default_label=False` in yaml configs, the `label_list.txt`
+of Pascal VOC dataset will be read, otherwise, `label_list.txt` is unnecessary and
+the default Pascal VOC label list which defined in 
+[voc\_loader.py](../ppdet/data/source/voc_loader.py) will be used.
 - Roidb data source
 A generalized data source serialized as pickle files, which have the following
 structure:
@@ -181,16 +190,18 @@ whole data pipeline is fully customizable through the yaml configuration files.
 #### Custom Datasets
- Option 1: Convert the dataset to COCO or VOC format.
+- Option 1: Convert the dataset to COCO format.
 ```sh
- # a small utility (`tools/labelme2coco.py`) is provided to convert
+ # a small utility (`tools/x2coco.py`) is provided to convert
- # Labelme-annotated dataset to COCO format.
+ # Labelme-annotated dataset or cityscape dataset to COCO format.
- python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/
+ python ./ppdet/data/tools/x2coco.py --dataset_type labelme
+                                --json_input_dir ./labelme_annos/
                                --image_input_dir ./labelme_imgs/
                                --output_dir ./cocome/
                                --train_proportion 0.8
                                --val_proportion 0.2
                                --test_proportion 0.0
+ # --dataset_type: The data format which is need to be converted. Currently supported are: 'labelme' and 'cityscape'
 # --json_input_dir：The path of json files which are annotated by Labelme.
 # --image_input_dir：The path of images.
 # --output_dir：The path of coverted COCO dataset.

--- a/docs/DATA_cn.md
+++ b/docs/DATA_cn.md
@@ -11,9 +11,12 @@
 子功能介绍：
 1. 数据解析  
-     数据解析得到的是`data.Dataset`,实现逻辑位于`data.source`中。通过它可以实现解析不同格式的数据集，已支持的数据源包括：
+数据解析得到的是`data.Dataset`,实现逻辑位于`data.source`中。通过它可以实现解析不同格式的数据集，已支持的数据源包括：
 - COCO数据源
-     该数据集目前分为COCO2012和COCO2017，主要由json文件和image文件组成，其组织结构如下所示：
+该数据集目前分为COCO2014和COCO2017，主要由json文件和image文件组成，其组织结构如下所示：
  ```
  dataset/coco/
@@ -22,49 +25,53 @@
  │   ├── instances_train2017.json
  │   ├── instances_val2014.json
  │   ├── instances_val2017.json
-  |   ...
+  │   |   ...
  ├── train2017
  │   ├── 000000000009.jpg
  │   ├── 000000580008.jpg
-  |   ...
+  │   |   ...
  ├── val2017
  │   ├── 000000000139.jpg
  │   ├── 000000000285.jpg
+  │   |   ...
  |   ...
  ```
 - Pascal VOC数据源
-     该数据集目前分为VOC2007和VOC2012，主要由xml文件和image文件组成，其组织结构如下所示：
+该数据集目前分为VOC2007和VOC2012，主要由xml文件和image文件组成，其组织结构如下所示：
  ```
-  data/pascalvoc/
+  dataset/voc/
-  ├──Annotations
+  ├── train.txt
-  │   ├── i000050.jpg
+  ├── val.txt
+  ├── test.txt
+  ├── label_list.txt (optional)
+  ├── VOCdevkit/VOC2007
+  │   ├── Annotations
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  ├── VOCdevkit/VOC2012
+  │   ├── Annotations
  │       ├── 003876.xml
-  |   ...
+  │       |   ...
-  ├── ImageSets
+  │   ├── JPEGImages 
-  │   ├──Main
+  │       ├── 003876.xml
-              └── train.txt
+  │       |   ...
-              └── val.txt
+  │   ├── ImageSets
-              └── test.txt
+  │       |   ...
-              └── dog_train.txt
-              └── dog_trainval.txt
-              └── dog_val.txt
-              └── dog_test.txt
-              └── ...
-  │   ├──Layout
-               └──...
-  │   ├── Segmentation
-                └──...
-  ├── JPEGImages
-  │   ├── 000050.jpg
-  │   ├── 003876.jpg
  |   ...
  ```
+**说明：** 如果你在yaml配置文件中设置`use_default_label=False`, 将从`label_list.txt`
+中读取类别列表，反之则可以没有`label_list.txt`文件，检测库会使用Pascal VOC数据集的默
+认类别列表，默认类别列表定义在[voc\_loader.py](../ppdet/data/source/voc_loader.py)
 - Roidb数据源
    该数据集主要由COCO数据集和Pascal VOC数据集转换而成的pickle文件，包含一个dict，而dict中只包含一个命名为‘records’的list（可能还有一个命名为‘cname2cid’的字典），其内容如下所示：
@@ -165,15 +172,17 @@ coco = Reader(ccfg.DATA, ccfg.TRANSFORM, maxiter=-1)
 ```
 #### 如何使用自定义数据集？
- 选择1：将数据集转换为VOC格式或者COCO格式。
+- 选择1：将数据集转换为COCO格式。
 ```
- # 在./tools/中提供了labelme2coco.py用于将labelme标注的数据集转换为COCO数据集
+ # 在./tools/中提供了x2coco.py用于将labelme标注的数据集或cityscape数据集转换为COCO数据集
- python ./ppdet/data/tools/labelme2coco.py --json_input_dir ./labelme_annos/
+ python ./ppdet/data/tools/x2coco.py --dataset_type labelme
+                                --json_input_dir ./labelme_annos/
                                --image_input_dir ./labelme_imgs/
                                --output_dir ./cocome/
                                --train_proportion 0.8
                                --val_proportion 0.2
                                --test_proportion 0.0
+ # --dataset_type：需要转换的数据格式，目前支持：’labelme‘和’cityscape‘
 # --json_input_dir：使用labelme标注的json文件所在文件夹
 # --image_input_dir：图像文件所在文件夹
 # --output_dir：转换后的COCO格式数据集存放位置

--- a/docs/INSTALL.md
+++ b/docs/INSTALL.md
@@ -111,6 +111,13 @@ ln -sf <path/to/coco> <path/to/paddle_detection>/dataset/coco
 ln -sf <path/to/voc> <path/to/paddle_detection>/dataset/voc
 ```
+For Pascal VOC dataset, you should create file list by:
+```
+export PYTHONPATH=$PYTHONPATH:.
+python dataset/voc/create_list.py
+```
 **Download datasets manually:**
 On the other hand, to download the datasets, run the following commands:
@@ -122,13 +129,69 @@ export PYTHONPATH=$PYTHONPATH:.
 python dataset/coco/download_coco.py
 ```
+`COCO` dataset with directory structures like this:
+  ```
+  dataset/coco/
+  ├── annotations
+  │   ├── instances_train2014.json
+  │   ├── instances_train2017.json
+  │   ├── instances_val2014.json
+  │   ├── instances_val2017.json
+  │   |   ...
+  ├── train2017
+  │   ├── 000000000009.jpg
+  │   ├── 000000580008.jpg
+  │   |   ...
+  ├── val2017
+  │   ├── 000000000139.jpg
+  │   ├── 000000000285.jpg
+  │   |   ...
+  |   ...
+  ```
 - Pascal VOC
 ```
 export PYTHONPATH=$PYTHONPATH:.
 python dataset/voc/download_voc.py
+python dataset/voc/create_list.py
 ```
+`Pascal VOC` dataset with directory structure like this:
+  ```
+  dataset/voc/
+  ├── train.txt
+  ├── val.txt
+  ├── test.txt
+  ├── label_list.txt (optional)
+  ├── VOCdevkit/VOC2007
+  │   ├── Annotations
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  ├── VOCdevkit/VOC2012
+  │   ├── Annotations
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  |   ...
+  ```
+**NOTE:** If you set `use_default_label=False` in yaml configs, the `label_list.txt`
+of Pascal VOC dataset will be read, otherwise, `label_list.txt` is unnecessary and
+the default Pascal VOC label list which defined in 
+[voc\_loader.py](../ppdet/data/source/voc_loader.py) will be used.
 **Download datasets automatically:**
 If a training session is started but the dataset is not setup properly (e.g,

--- a/docs/INSTALL_cn.md
+++ b/docs/INSTALL_cn.md
@@ -108,6 +108,13 @@ ln -sf <path/to/coco> <path/to/paddle_detection>/dataset/coco
 ln -sf <path/to/voc> <path/to/paddle_detection>/dataset/voc
 ```
+对于Pascal VOC数据集，需通过如下命令创建文件列表：
+```
+export PYTHONPATH=$PYTHONPATH:.
+python dataset/voc/create_list.py
+```
 **手动下载数据集：**
 若您本地没有数据集，可通过如下命令下载：
@@ -119,13 +126,68 @@ export PYTHONPATH=$PYTHONPATH:.
 python dataset/coco/download_coco.py
 ```
+`COCO` 数据集目录结构如下：
+  ```
+  dataset/coco/
+  ├── annotations
+  │   ├── instances_train2014.json
+  │   ├── instances_train2017.json
+  │   ├── instances_val2014.json
+  │   ├── instances_val2017.json
+  │   |   ...
+  ├── train2017
+  │   ├── 000000000009.jpg
+  │   ├── 000000580008.jpg
+  │   |   ...
+  ├── val2017
+  │   ├── 000000000139.jpg
+  │   ├── 000000000285.jpg
+  │   |   ...
+  |   ...
+  ```
 - Pascal VOC
 ```
 export PYTHONPATH=$PYTHONPATH:.
 python dataset/voc/download_voc.py
+python dataset/voc/create_list.py
 ```
+`Pascal VOC` 数据集目录结构如下：
+  ```
+  dataset/voc/
+  ├── train.txt
+  ├── val.txt
+  ├── test.txt
+  ├── label_list.txt (optional)
+  ├── VOCdevkit/VOC2007
+  │   ├── Annotations
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  ├── VOCdevkit/VOC2012
+  │   ├── Annotations
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  |   ...
+  ```
+**说明：** 如果你在yaml配置文件中设置`use_default_label=False`, 将从`label_list.txt`
+中读取类别列表，反之则可以没有`label_list.txt`文件，检测库会使用Pascal VOC数据集的默
+认类别列表，默认类别列表定义在[voc\_loader.py](../ppdet/data/source/voc_loader.py)
 **自动下载数据集：**
 若您在数据集未成功设置（例如，在`dataset/coco`或`dataset/voc`中找不到）的情况下开始运行，

--- a/ppdet/data/data_feed.py
+++ b/ppdet/data/data_feed.py
@@ -219,7 +219,7 @@ class DataSet(object):
    def __init__(self,
                 annotation,
-                 image_dir,
+                 image_dir=None,
                 dataset_dir=None,
                 use_default_label=None):
        super(DataSet, self).__init__()
@@ -229,7 +229,7 @@ class DataSet(object):
        self.use_default_label = use_default_label
-COCO_DATASET_DIR = 'coco'
+COCO_DATASET_DIR = 'dataset/coco'
 COCO_TRAIN_ANNOTATION = 'annotations/instances_train2017.json'
 COCO_TRAIN_IMAGE_DIR = 'train2017'
 COCO_VAL_ANNOTATION = 'annotations/instances_val2017.json'
@@ -246,12 +246,11 @@ class CocoDataSet(DataSet):
            dataset_dir=dataset_dir, annotation=annotation, image_dir=image_dir)
-VOC_DATASET_DIR = 'pascalvoc'
+VOC_DATASET_DIR = 'dataset/voc'
-VOC_TRAIN_ANNOTATION = 'VOCdevkit/VOC_all/ImageSets/Main/train.txt'
+VOC_TRAIN_ANNOTATION = 'train.txt'
-VOC_VAL_ANNOTATION = 'VOCdevkit/VOC_all/ImageSets/Main/val.txt'
+VOC_VAL_ANNOTATION = 'val.txt'
-VOC_TEST_ANNOTATION = 'VOCdevkit/VOC_all/ImageSets/Main/test.txt'
+VOC_IMAGE_DIR = None
-VOC_IMAGE_DIR = 'VOCdevkit/VOC_all/JPEGImages'
+VOC_USE_DEFAULT_LABEL = True
-VOC_USE_DEFAULT_LABEL = None
 @serializable
@@ -843,7 +842,7 @@ class SSDTestFeed(DataFeed):
    __doc__ = DataFeed.__doc__
    def __init__(self,
-                 dataset=SimpleDataSet(VOC_TEST_ANNOTATION).__dict__,
+                 dataset=SimpleDataSet(VOC_VAL_ANNOTATION).__dict__,
                 fields=['image', 'im_id', 'im_shape'],
                 image_shape=[3, 300, 300],
                 sample_transforms=[

--- a/ppdet/data/source/roidb_source.py
+++ b/ppdet/data/source/roidb_source.py
@@ -62,7 +62,7 @@ class RoiDbSource(Dataset):
        assert os.path.isfile(anno_file) or os.path.isdir(anno_file), \
                'anno_file {} is not a file or a directory'.format(anno_file)
        self._fname = anno_file
-        self._image_dir = image_dir
+        self._image_dir = image_dir if image_dir is not None else ''
        if image_dir is not None:
            assert os.path.isdir(image_dir), \
                    'image_dir {} is not a directory'.format(image_dir)

--- a/ppdet/data/source/voc_loader.py
+++ b/ppdet/data/source/voc_loader.py
@@ -26,8 +26,7 @@ def get_roidb(anno_path,
    Load VOC records with annotations in xml directory 'anno_path'
    Notes:
-    ${anno_path}/ImageSets/Main/train.txt must contains xml file names for annotations
+    ${anno_path} must contains xml file and image file path for annotations
-    ${anno_path}/Annotations/xxx.xml must contain annotation info for one record
    Args:
        anno_path (str): root directory for voc annotation data
@@ -53,11 +52,7 @@ def get_roidb(anno_path,
        'cname2id' is a dict to map category name to class id
    """
-    txt_file = anno_path
+    data_dir = os.path.dirname(anno_path)
-    part = txt_file.split('ImageSets')
-    xml_path = os.path.join(part[0], 'Annotations')
-    assert os.path.isfile(txt_file) and \
-        os.path.isdir(xml_path), 'invalid xml path'
    records = []
    ct = 0
@@ -67,17 +62,16 @@ def get_roidb(anno_path,
    # mapping category name to class id
    # background:0, first_class:1, second_class:2, ...
-    with open(txt_file, 'r') as fr:
+    with open(anno_path, 'r') as fr:
        while True:
            line = fr.readline()
            if not line:
                break
-            fname = line.strip() + '.xml'
+            img_file, xml_file = [os.path.join(data_dir, x) \
-            xml_file = os.path.join(xml_path, fname)
+                    for x in line.strip().split()[:2]]
            if not os.path.isfile(xml_file):
                continue
            tree = ET.parse(xml_file)
-            im_fname = tree.find('filename').text
            if tree.find('id') is None:
                im_id = np.array([ct])
            else:
@@ -114,7 +108,7 @@ def get_roidb(anno_path,
                is_crowd[i][0] = 0
                difficult[i][0] = _difficult
            voc_rec = {
-                'im_file': im_fname,
+                'im_file': img_file,
                'im_id': im_id,
                'h': im_h,
                'w': im_w,
@@ -144,8 +138,7 @@ def load(anno_path,
    xml directory 'anno_path'
    Notes:
-    ${anno_path}/ImageSets/Main/train.txt must contains xml file names for annotations
+    ${anno_path} must contains xml file and image file path for annotations
-    ${anno_path}/Annotations/xxx.xml must contain annotation info for one record
    Args:
        @anno_path (str): root directory for voc annotation data
@@ -171,11 +164,7 @@ def load(anno_path,
        'cname2id' is a dict to map category name to class id
    """
-    txt_file = anno_path
+    data_dir = os.path.dirname(anno_path)
-    part = txt_file.split('ImageSets')
-    xml_path = os.path.join(part[0], 'Annotations')
-    assert os.path.isfile(txt_file) and \
-        os.path.isdir(xml_path), 'invalid xml path'
    # mapping category name to class id
    # if with_background is True:
@@ -186,7 +175,7 @@ def load(anno_path,
    ct = 0
    cname2cid = {}
    if not use_default_label:
-        label_path = os.path.join(part[0], 'ImageSets/Main/label_list.txt')
+        label_path = os.path.join(data_dir, 'label_list.txt')
        with open(label_path, 'r') as fr:
            label_id = int(with_background)
            for line in fr.readlines():
@@ -195,17 +184,16 @@ def load(anno_path,
    else:
        cname2cid = pascalvoc_label(with_background)
-    with open(txt_file, 'r') as fr:
+    with open(anno_path, 'r') as fr:
        while True:
            line = fr.readline()
            if not line:
                break
-            fname = line.strip() + '.xml'
+            img_file, xml_file = [os.path.join(data_dir, x) \
-            xml_file = os.path.join(xml_path, fname)
+                    for x in line.strip().split()[:2]]
            if not os.path.isfile(xml_file):
                continue
            tree = ET.parse(xml_file)
-            im_fname = tree.find('filename').text
            if tree.find('id') is None:
                im_id = np.array([ct])
            else:
@@ -235,7 +223,7 @@ def load(anno_path,
                is_crowd[i][0] = 0
                difficult[i][0] = _difficult
            voc_rec = {
-                'im_file': im_fname,
+                'im_file': img_file,
                'im_id': im_id,
                'h': im_h,
                'w': im_w,

--- a/ppdet/data/tools/labelme2coco.py
+++ b/ppdet/data/tools/labelme2coco.py
@@ -44,7 +44,7 @@ def getbbox(self, points):
    return self.mask2box(mask)
-def images(data, num):
+def images_labelme(data, num):
    image = {}
    image['height'] = data['imageHeight']
    image['width'] = data['imageWidth']
@@ -52,6 +52,14 @@ def images(data, num):
    image['file_name'] = data['imagePath'].split('/')[-1]
    return image
+def images_cityscape(data, num, img_file):
+    image = {}
+    image['height'] = data['imgHeight']
+    image['width'] = data['imgWidth']
+    image['id'] = num + 1
+    image['file_name'] = img_file
+    return image 
 def categories(label, labels_list):
    category = {}
@@ -112,7 +120,7 @@ def get_bbox(height, width, points):
    ]
-def deal_json(img_path, json_path):
+def deal_json(ds_type, img_path, json_path):
    data_coco = {}
    label_to_num = {}
    images_list = []
@@ -120,15 +128,21 @@ def deal_json(img_path, json_path):
    annotations_list = []
    labels_list = []
    image_num = -1
+    object_num = -1
    for img_file in os.listdir(img_path):
        img_label = img_file.split('.')[0]
+        if img_file.split('.')[-1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']:
+            continue
        label_file = osp.join(json_path, img_label + '.json')
        print('Generating dataset from:', label_file)
        image_num = image_num + 1
        with open(label_file) as f:
            data = json.load(f)
-            images_list.append(images(data, image_num))
+            if ds_type == 'labelme':
-            object_num = -1
+                images_list.append(images_labelme(data, image_num))
+            elif ds_type == 'cityscape':
+                images_list.append(images_cityscape(data, image_num, img_file)) 
+            if ds_type == 'labelme':
                for shapes in data['shapes']:
                    object_num = object_num + 1
                    label = shapes['label']
@@ -148,6 +162,18 @@ def deal_json(img_path, json_path):
                        points.append([points[1][0], points[0][1]])
                        annotations_list.append(
                            annotations_rectangle(points, label, image_num, object_num, label_to_num))
+            elif ds_type == 'cityscape':
+                for shapes in data['objects']:
+                    object_num = object_num + 1
+                    label = shapes['label']
+                    if label not in labels_list:
+                        categories_list.append(categories(label, labels_list))
+                        labels_list.append(label)
+                        label_to_num[label] = len(labels_list)
+                    points = shapes['polygon']
+                    annotations_list.append(
+                        annotations_polygon(data['imgHeight'], data[
+                            'imgWidth'], points, label, image_num, object_num, label_to_num))
    data_coco['images'] = images_list
    data_coco['categories'] = categories_list
    data_coco['annotations'] = annotations_list
@@ -157,6 +183,7 @@ def deal_json(img_path, json_path):
 def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--dataset_type', help='the type of dataset')
    parser.add_argument('--json_input_dir', help='input annotated directory')
    parser.add_argument('--image_input_dir', help='image directory')
    parser.add_argument(
@@ -177,6 +204,11 @@ def main():
        type=float,
        default=0.0)
    args = parser.parse_args()
+    try:
+        assert args.dataset_type in ['labelme', 'cityscape']
+    except AssertionError as e:
+        print('Now only support the cityscape dataset and labelme dataset!!')
+        os._exit(0)
    try:
        assert os.path.exists(args.json_input_dir)
    except AssertionError as e:
@@ -234,7 +266,8 @@ def main():
    if not os.path.exists(args.output_dir + '/annotations'):
        os.makedirs(args.output_dir + '/annotations')
    if args.train_proportion != 0:
-        train_data_coco = deal_json(args.output_dir + '/train',
+        train_data_coco = deal_json(args.dataset_type,
+                                    args.output_dir + '/train',
                                    args.json_input_dir)
        train_json_path = osp.join(args.output_dir + '/annotations',
                                   'instance_train.json')

--- a/ppdet/utils/download.py
+++ b/ppdet/utils/download.py
@@ -25,7 +25,7 @@ import hashlib
 import tarfile
 import zipfile
-from .voc_utils import merge_and_create_list
+from .voc_utils import create_list
 import logging
 logger = logging.getLogger(__name__)
@@ -59,7 +59,7 @@ DATASETS = {
        (
            'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
            'b6e924de25625d8de591ea690078ad9f', ),
-    ], ["VOCdevkit/VOC_all"]),
+    ], ["VOCdevkit/VOC2012", "VOCdevkit/VOC2007"]),
    'wider_face': ([
        (
            'https://dataset.bj.bcebos.com/wider_face/WIDER_train.zip',
@@ -85,7 +85,8 @@ def get_weights_path(url):
    """Get weights path from WEIGHT_HOME, if not exists,
    download it from url.
    """
-    return get_path(url, WEIGHTS_HOME)
+    path, _ = get_path(url, WEIGHTS_HOME)
+    return path
 def get_dataset_path(path, annotation, image_dir):
@@ -107,19 +108,26 @@ def get_dataset_path(path, annotation, image_dir):
                        "{}".format(path, name))
            data_dir = osp.join(DATASET_HOME, name)
-            # For voc, only check merged dir VOC_all
+            # For voc, only check dir VOCdevkit/VOC2012, VOCdevkit/VOC2007
            if name == 'voc':
-                check_dir = osp.join(data_dir, dataset[1][0])
+                exists = True
+                for sub_dir in dataset[1]:
+                    check_dir = osp.join(data_dir, sub_dir)
                    if osp.exists(check_dir):
                        logger.info("Found {}".format(check_dir))
+                    else:
+                        exists = False
+                if exists:
                    return data_dir
+            # voc exist is checked above, voc is not exist here
+            check_exist = name != 'voc'
            for url, md5sum in dataset[0]:
-                get_path(url, data_dir, md5sum)
+                get_path(url, data_dir, md5sum, check_exist)
-            # voc should merge dir and create list after download
+            # voc should create list after download
            if name == 'voc':
-                _merge_voc_dir(data_dir, dataset[1][0])
+                create_voc_list(data_dir)
            return data_dir
    # not match any dataset in DATASETS
@@ -129,26 +137,17 @@ def get_dataset_path(path, annotation, image_dir):
                                                         osp.split(path)[-1]))
-def _merge_voc_dir(data_dir, output_subdir):
+def create_voc_list(data_dir, devkit_subdir='VOCdevkit'):
-    logger.info("Download voc dataset successed, merge "
+    logger.info("Create voc file list...")
-                "VOC2007 and VOC2012 to VOC_all...")
+    devkit_dir = osp.join(data_dir, devkit_subdir)
-    output_dir = osp.join(data_dir, output_subdir)
-    devkit_dir = "/".join(output_dir.split('/')[:-1])
    years = ['2007', '2012']
-    # merge dir in output_tmp_dir at first, move to 
-    # output_dir after merge sucessed.
-    output_tmp_dir = osp.join(data_dir, 'tmp')
-    if osp.isdir(output_tmp_dir):
-        shutil.rmtree(output_tmp_dir)
    # NOTE: since using auto download VOC
    # dataset, VOC default label list should be used, 
    # do not generate label_list.txt here. For default
    # label, see ../data/source/voc_loader.py
-    merge_and_create_list(devkit_dir, years, output_tmp_dir)
+    create_list(devkit_dir, years, data_dir)
-    shutil.move(output_tmp_dir, output_dir)
+    logger.info("Create voc file list finished")
-    # remove source directory VOC2007 and VOC2012
-    shutil.rmtree(osp.join(devkit_dir, "VOC2007"))
-    shutil.rmtree(osp.join(devkit_dir, "VOC2012"))
 def map_path(url, root_dir):
@@ -161,7 +160,7 @@ def map_path(url, root_dir):
    return osp.join(root_dir, fpath)
-def get_path(url, root_dir, md5sum=None):
+def get_path(url, root_dir, md5sum=None, check_exist=True):
    """ Download from given url to root_dir.
    if file or directory specified by url is exists under
    root_dir, return the path directly, otherwise download
@@ -178,20 +177,25 @@ def get_path(url, root_dir, md5sum=None):
    # For same zip file, decompressed directory name different
    # from zip file name, rename by following map
    decompress_name_map = {
-        "VOC": "VOCdevkit/VOC_all",
+        "VOCtrainval_11-May-2012": "VOCdevkit/VOC2012",
+        "VOCtrainval_06-Nov-2007": "VOCdevkit/VOC2007",
+        "VOCtest_06-Nov-2007": "VOCdevkit/VOC2007",
        "annotations_trainval": "annotations"
    }
    for k, v in decompress_name_map.items():
        if fullpath.find(k) >= 0:
            fullpath = '/'.join(fullpath.split('/')[:-1] + [v])
-    if osp.exists(fullpath):
+    exist_flag = False
+    if osp.exists(fullpath) and check_exist:
+        exist_flag = True
        logger.info("Found {}".format(fullpath))
    else:
+        exist_flag = False
        fullname = _download(url, root_dir, md5sum)
        _decompress(fullname)
-    return fullpath
+    return fullpath, exist_flag
 def download_dataset(path, dataset=None):
@@ -201,9 +205,7 @@ def download_dataset(path, dataset=None):
        return
    dataset_info = DATASETS[dataset][0]
    for info in dataset_info:
-        get_path(info[0], path, info[1])
+        get_path(info[0], path, info[1], False)
-    if dataset == 'voc':
-        _merge_voc_dir(path, DATASETS[dataset][1][0])
    logger.info("Download dataset {} finished.".format(dataset))

--- a/ppdet/utils/voc_utils.py
+++ b/ppdet/utils/voc_utils.py
@@ -22,20 +22,15 @@ import re
 import random
 import shutil
-__all__ = ['merge_and_create_list']
+__all__ = ['create_list']
-def merge_and_create_list(devkit_dir, years, output_dir):
+def create_list(devkit_dir, years, output_dir):
    """
-    Merge VOC2007 and VOC2012 to output_dir and create following list:
+    create following list:
-        1. train.txt
+        1. trainval.txt
-        2. val.txt
+        2. test.txt
-        3. test.txt
    """
-    os.makedirs(osp.join(output_dir, 'Annotations/'))
-    os.makedirs(osp.join(output_dir, 'ImageSets/Main/'))
-    os.makedirs(osp.join(output_dir, 'JPEGImages/'))
    trainval_list = []
    test_list = []
    for year in years:
@@ -43,20 +38,16 @@ def merge_and_create_list(devkit_dir, years, output_dir):
        trainval_list.extend(trainval)
        test_list.extend(test)
-    main_dir = osp.join(output_dir, 'ImageSets/Main/')
    random.shuffle(trainval_list)
-    with open(osp.join(main_dir, 'train.txt'), 'w') as ftrainval:
+    with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval:
        for item in trainval_list:
-            ftrainval.write(item + '\n')
+            ftrainval.write(item[0] + ' ' + item[1] + '\n')
-    with open(osp.join(main_dir, 'val.txt'), 'w') as fval:
+    with open(osp.join(output_dir, 'test.txt'), 'w') as fval:
-        with open(osp.join(main_dir, 'test.txt'), 'w') as ftest:
        ct = 0
        for item in test_list:
            ct += 1
-                fval.write(item + '\n')
+            fval.write(item[0] + ' ' + item[1] + '\n')
-                if ct <= 1000:
-                    ftest.write(item + '\n')
 def _get_voc_dir(devkit_dir, year, type):
@@ -86,14 +77,10 @@ def _walk_voc_dir(devkit_dir, year, output_dir):
                if name_prefix in added:
                    continue
                added.add(name_prefix)
-                ann_path = osp.join(annotation_dir, name_prefix + '.xml')
+                ann_path = osp.join(osp.relpath(annotation_dir, output_dir),
-                img_path = osp.join(img_dir, name_prefix + '.jpg')
-                new_ann_path = osp.join(output_dir, 'Annotations/',
                                    name_prefix + '.xml')
-                new_img_path = osp.join(output_dir, 'JPEGImages/',
+                img_path = osp.join(osp.relpath(img_dir, output_dir),
                                    name_prefix + '.jpg')
-                shutil.copy(ann_path, new_ann_path)
+                img_ann_list.append((img_path, ann_path))
-                shutil.copy(img_path, new_img_path)
-                img_ann_list.append(name_prefix)
    return trainval_list, test_list