From 40e33f8ebadaa49977e47b0b30decbfb04f94663 Mon Sep 17 00:00:00 2001
From: Kaipeng Deng <dengkaipeng@baidu.com>
Date: Mon, 28 Oct 2019 18:09:34 +0800
Subject: [PATCH] Change voc loader (#3781)

* change voc_loader
---
 .../configs/ssd/ssd_mobilenet_v1_voc.yml      |  6 +-
 .../configs/ssd/ssd_vgg16_300_voc.yml         |  6 +-
 .../configs/ssd/ssd_vgg16_512_voc.yml         |  6 +-
 .../configs/yolov3_darknet_voc.yml            |  6 +-
 .../configs/yolov3_mobilenet_v1_fruit.yml     |  7 +-
 .../configs/yolov3_mobilenet_v1_voc.yml       |  6 +-
 .../configs/yolov3_r34_voc.yml                |  6 +-
 .../dataset/voc/create_list.py                | 25 +++++++
 .../dataset/voc/label_list.txt                | 20 ++++++
 PaddleCV/PaddleDetection/docs/DATA.md         | 57 +++++++++-------
 PaddleCV/PaddleDetection/docs/DATA_cn.md      | 63 ++++++++++--------
 PaddleCV/PaddleDetection/docs/INSTALL.md      | 63 ++++++++++++++++++
 PaddleCV/PaddleDetection/docs/INSTALL_cn.md   | 62 +++++++++++++++++
 .../PaddleDetection/ppdet/data/data_feed.py   | 17 +++--
 .../ppdet/data/source/roidb_source.py         |  2 +-
 .../ppdet/data/source/voc_loader.py           | 38 ++++-------
 .../PaddleDetection/ppdet/utils/download.py   | 66 ++++++++++---------
 .../PaddleDetection/ppdet/utils/voc_utils.py  | 47 +++++--------
 18 files changed, 325 insertions(+), 178 deletions(-)
 create mode 100644 PaddleCV/PaddleDetection/dataset/voc/create_list.py
 create mode 100644 PaddleCV/PaddleDetection/dataset/voc/label_list.txt

diff --git a/PaddleCV/PaddleDetection/configs/ssd/ssd_mobilenet_v1_voc.yml b/PaddleCV/PaddleDetection/configs/ssd/ssd_mobilenet_v1_voc.yml
index 1d855464..a360830f 100644
--- a/PaddleCV/PaddleDetection/configs/ssd/ssd_mobilenet_v1_voc.yml
+++ b/PaddleCV/PaddleDetection/configs/ssd/ssd_mobilenet_v1_voc.yml
@@ -61,8 +61,7 @@ SSDTrainFeed:
   use_process: true
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: trainval.txt
     use_default_label: true
 
 SSDEvalFeed:
@@ -70,8 +69,7 @@ SSDEvalFeed:
   use_process: true
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: test.txt
     use_default_label: true
   drop_last: false
 
diff --git a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_300_voc.yml b/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_300_voc.yml
index 5d91ed14..36c05375 100644
--- a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_300_voc.yml
+++ b/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_300_voc.yml
@@ -64,8 +64,7 @@ SSDTrainFeed:
   batch_size: 8
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: trainval.txt
     use_default_label: true
   image_shape: [3, 300, 300]
   sample_transforms:
@@ -109,8 +108,7 @@ SSDEvalFeed:
   batch_size: 32
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: test.txt
     use_default_label: true
   drop_last: false
   image_shape: [3, 300, 300]
diff --git a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_512_voc.yml b/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_512_voc.yml
index fa274659..b2028e0b 100644
--- a/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_512_voc.yml
+++ b/PaddleCV/PaddleDetection/configs/ssd/ssd_vgg16_512_voc.yml
@@ -68,8 +68,7 @@ SSDTrainFeed:
   batch_size: 8
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: trainval.txt
     use_default_label: true
   image_shape: [3, 512, 512]
   sample_transforms:
@@ -113,8 +112,7 @@ SSDEvalFeed:
   batch_size: 32
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: test.txt
     use_default_label: true
   drop_last: false
   image_shape: [3, 512, 512]
diff --git a/PaddleCV/PaddleDetection/configs/yolov3_darknet_voc.yml b/PaddleCV/PaddleDetection/configs/yolov3_darknet_voc.yml
index 1f6c10b7..876d380f 100644
--- a/PaddleCV/PaddleDetection/configs/yolov3_darknet_voc.yml
+++ b/PaddleCV/PaddleDetection/configs/yolov3_darknet_voc.yml
@@ -62,8 +62,7 @@ YoloTrainFeed:
   batch_size: 8
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: trainval.txt
     use_default_label: true
   num_workers: 8
   bufsize: 128
@@ -75,8 +74,7 @@ YoloEvalFeed:
   image_shape: [3, 608, 608]
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: test.txt
     use_default_label: true
 
 YoloTestFeed:
diff --git a/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_fruit.yml b/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_fruit.yml
index b7922489..bbbcd7fb 100644
--- a/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_fruit.yml
+++ b/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_fruit.yml
@@ -64,8 +64,7 @@ YoloTrainFeed:
   batch_size: 1
   dataset:
     dataset_dir: dataset/fruit/fruit-detection
-    annotation: ./ImageSets/Main/train.txt
-    image_dir: ./JPEGImages
+    annotation: train.txt
     use_default_label: false
   num_workers: 16
   bufsize: 128
@@ -111,8 +110,7 @@ YoloEvalFeed:
   image_shape: [3, 608, 608]
   dataset:
     dataset_dir: dataset/fruit/fruit-detection
-    annotation: ./ImageSets/Main/val.txt
-    image_dir: ./JPEGImages
+    annotation: val.txt
     use_default_label: false
  
 
@@ -121,5 +119,4 @@ YoloTestFeed:
   image_shape: [3, 608, 608]
   dataset:
     dataset_dir: dataset/fruit/fruit-detection
-    annotation: ./ImageSets/Main/label_list.txt
     use_default_label: false
diff --git a/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_voc.yml b/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_voc.yml
index 8933773e..63e4cbea 100644
--- a/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_voc.yml
+++ b/PaddleCV/PaddleDetection/configs/yolov3_mobilenet_v1_voc.yml
@@ -63,8 +63,7 @@ YoloTrainFeed:
   batch_size: 8
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: trainval.txt
     use_default_label: true
   num_workers: 8
   bufsize: 128
@@ -76,8 +75,7 @@ YoloEvalFeed:
   image_shape: [3, 608, 608]
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: test.txt
     use_default_label: true
 
 YoloTestFeed:
diff --git a/PaddleCV/PaddleDetection/configs/yolov3_r34_voc.yml b/PaddleCV/PaddleDetection/configs/yolov3_r34_voc.yml
index 5bb6a136..aa152e8f 100644
--- a/PaddleCV/PaddleDetection/configs/yolov3_r34_voc.yml
+++ b/PaddleCV/PaddleDetection/configs/yolov3_r34_voc.yml
@@ -65,8 +65,7 @@ YoloTrainFeed:
   batch_size: 8
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: trainval.txt
     use_default_label: true
   num_workers: 8
   bufsize: 128
@@ -78,8 +77,7 @@ YoloEvalFeed:
   image_shape: [3, 608, 608]
   dataset:
     dataset_dir: dataset/voc
-    annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
-    image_dir: VOCdevkit/VOC_all/JPEGImages
+    annotation: test.txt
     use_default_label: true
 
 YoloTestFeed:
diff --git a/PaddleCV/PaddleDetection/dataset/voc/create_list.py b/PaddleCV/PaddleDetection/dataset/voc/create_list.py
new file mode 100644
index 00000000..1a237493
--- /dev/null
+++ b/PaddleCV/PaddleDetection/dataset/voc/create_list.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import sys
+import os.path as osp
+import logging
+
+from ppdet.utils.download import create_voc_list
+
+logging.basicConfig(level=logging.INFO)
+
+voc_path = osp.split(osp.realpath(sys.argv[0]))[0]
+create_voc_list(voc_path)
diff --git a/PaddleCV/PaddleDetection/dataset/voc/label_list.txt b/PaddleCV/PaddleDetection/dataset/voc/label_list.txt
new file mode 100644
index 00000000..8420ab35
--- /dev/null
+++ b/PaddleCV/PaddleDetection/dataset/voc/label_list.txt
@@ -0,0 +1,20 @@
+aeroplane
+bicycle
+bird
+boat
+bottle
+bus
+car
+cat
+chair
+cow
+diningtable
+dog
+horse
+motorbike
+person
+pottedplant
+sheep
+sofa
+train
+tvmonitor
diff --git a/PaddleCV/PaddleDetection/docs/DATA.md b/PaddleCV/PaddleDetection/docs/DATA.md
index ac2244f4..46640502 100644
--- a/PaddleCV/PaddleDetection/docs/DATA.md
+++ b/PaddleCV/PaddleDetection/docs/DATA.md
@@ -27,6 +27,7 @@ Parses various data sources and creates `data.Dataset` instances. Currently,
 following data sources are supported:
 
 - COCO data source
+
 Loads `COCO` type datasets with directory structures like this:
 
   ```
@@ -36,46 +37,54 @@ Loads `COCO` type datasets with directory structures like this:
   │   ├── instances_train2017.json
   │   ├── instances_val2014.json
   │   ├── instances_val2017.json
-  |   ...
+  │   |   ...
   ├── train2017
   │   ├── 000000000009.jpg
   │   ├── 000000580008.jpg
-  |   ...
+  │   |   ...
   ├── val2017
   │   ├── 000000000139.jpg
   │   ├── 000000000285.jpg
+  │   |   ...
   |   ...
   ```
 
 - Pascal VOC data source
+
 Loads `Pascal VOC` like datasets with directory structure like this:
 
   ```
-  data/pascalvoc/
-  ├──Annotations
-  │   ├── i000050.jpg
-  │   ├── 003876.xml
-  |   ...
-  ├── ImageSets
-  │   ├──Main
-              └── train.txt
-              └── val.txt
-              └── test.txt
-              └── dog_train.txt
-              └── dog_trainval.txt
-              └── dog_val.txt
-              └── dog_test.txt
-              └── ...
-  │   ├──Layout
-               └──...
-  │   ├── Segmentation
-                └──...
-  ├── JPEGImages
-  │   ├── 000050.jpg
-  │   ├── 003876.jpg
+  dataset/voc/
+  ├── train.txt
+  ├── val.txt
+  ├── test.txt
+  ├── label_list.txt (optional)
+  ├── VOCdevkit/VOC2007
+  │   ├── Annotations
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  ├── VOCdevkit/VOC2012
+  │   ├── Annotations
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
   |   ...
   ```
 
+**NOTE:** If you set `use_default_label=False` in yaml configs, the `label_list.txt`
+of Pascal VOC dataset will be read, otherwise, `label_list.txt` is unnecessary and
+the default Pascal VOC label list which defined in 
+[voc\_loader.py](../ppdet/data/source/voc_loader.py) will be used.
+
 - Roidb data source
 A generalized data source serialized as pickle files, which have the following
 structure:
diff --git a/PaddleCV/PaddleDetection/docs/DATA_cn.md b/PaddleCV/PaddleDetection/docs/DATA_cn.md
index e34ba8e8..332e81b0 100644
--- a/PaddleCV/PaddleDetection/docs/DATA_cn.md
+++ b/PaddleCV/PaddleDetection/docs/DATA_cn.md
@@ -11,9 +11,12 @@
 子功能介绍：
 
 1. 数据解析  
-     数据解析得到的是`data.Dataset`,实现逻辑位于`data.source`中。通过它可以实现解析不同格式的数据集，已支持的数据源包括：
+
+数据解析得到的是`data.Dataset`,实现逻辑位于`data.source`中。通过它可以实现解析不同格式的数据集，已支持的数据源包括：
+
 - COCO数据源
-     该数据集目前分为COCO2012和COCO2017，主要由json文件和image文件组成，其组织结构如下所示：
+
+该数据集目前分为COCO2014和COCO2017，主要由json文件和image文件组成，其组织结构如下所示：
 
   ```
   dataset/coco/
@@ -22,49 +25,53 @@
   │   ├── instances_train2017.json
   │   ├── instances_val2014.json
   │   ├── instances_val2017.json
-  |   ...
+  │   |   ...
   ├── train2017
   │   ├── 000000000009.jpg
   │   ├── 000000580008.jpg
-  |   ...
+  │   |   ...
   ├── val2017
   │   ├── 000000000139.jpg
   │   ├── 000000000285.jpg
+  │   |   ...
   |   ...
   ```
 
 
 - Pascal VOC数据源
-     该数据集目前分为VOC2007和VOC2012，主要由xml文件和image文件组成，其组织结构如下所示：
 
+该数据集目前分为VOC2007和VOC2012，主要由xml文件和image文件组成，其组织结构如下所示：
 
   ```
-  data/pascalvoc/
-  ├──Annotations
-  │   ├── i000050.jpg
-  │   ├── 003876.xml
-  |   ...
-  ├── ImageSets
-  │   ├──Main
-              └── train.txt
-              └── val.txt
-              └── test.txt
-              └── dog_train.txt
-              └── dog_trainval.txt
-              └── dog_val.txt
-              └── dog_test.txt
-              └── ...
-  │   ├──Layout
-               └──...
-  │   ├── Segmentation
-                └──...
-  ├── JPEGImages
-  │   ├── 000050.jpg
-  │   ├── 003876.jpg
+  dataset/voc/
+  ├── train.txt
+  ├── val.txt
+  ├── test.txt
+  ├── label_list.txt (optional)
+  ├── VOCdevkit/VOC2007
+  │   ├── Annotations
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  ├── VOCdevkit/VOC2012
+  │   ├── Annotations
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
   |   ...
   ```
 
-
+**说明：** 如果你在yaml配置文件中设置`use_default_label=False`, 将从`label_list.txt`
+中读取类别列表，反之则可以没有`label_list.txt`文件，检测库会使用Pascal VOC数据集的默
+认类别列表，默认类别列表定义在[voc\_loader.py](../ppdet/data/source/voc_loader.py)
 
 - Roidb数据源
     该数据集主要由COCO数据集和Pascal VOC数据集转换而成的pickle文件，包含一个dict，而dict中只包含一个命名为‘records’的list（可能还有一个命名为‘cname2cid’的字典），其内容如下所示：
diff --git a/PaddleCV/PaddleDetection/docs/INSTALL.md b/PaddleCV/PaddleDetection/docs/INSTALL.md
index 0761a240..6a9baf0a 100644
--- a/PaddleCV/PaddleDetection/docs/INSTALL.md
+++ b/PaddleCV/PaddleDetection/docs/INSTALL.md
@@ -111,6 +111,13 @@ ln -sf <path/to/coco> <path/to/paddle_detection>/dataset/coco
 ln -sf <path/to/voc> <path/to/paddle_detection>/dataset/voc
 ```
 
+For Pascal VOC dataset, you should create file list by:
+
+```
+export PYTHONPATH=$PYTHONPATH:.
+python dataset/voc/create_list.py
+```
+
 **Download datasets manually:**
 
 On the other hand, to download the datasets, run the following commands:
@@ -122,13 +129,69 @@ export PYTHONPATH=$PYTHONPATH:.
 python dataset/coco/download_coco.py
 ```
 
+`COCO` dataset with directory structures like this:
+
+  ```
+  dataset/coco/
+  ├── annotations
+  │   ├── instances_train2014.json
+  │   ├── instances_train2017.json
+  │   ├── instances_val2014.json
+  │   ├── instances_val2017.json
+  │   |   ...
+  ├── train2017
+  │   ├── 000000000009.jpg
+  │   ├── 000000580008.jpg
+  │   |   ...
+  ├── val2017
+  │   ├── 000000000139.jpg
+  │   ├── 000000000285.jpg
+  │   |   ...
+  |   ...
+  ```
+
 - Pascal VOC
 
 ```
 export PYTHONPATH=$PYTHONPATH:.
 python dataset/voc/download_voc.py
+python dataset/voc/create_list.py
 ```
 
+`Pascal VOC` dataset with directory structure like this:
+
+  ```
+  dataset/voc/
+  ├── train.txt
+  ├── val.txt
+  ├── test.txt
+  ├── label_list.txt (optional)
+  ├── VOCdevkit/VOC2007
+  │   ├── Annotations
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  ├── VOCdevkit/VOC2012
+  │   ├── Annotations
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  |   ...
+  ```
+
+**NOTE:** If you set `use_default_label=False` in yaml configs, the `label_list.txt`
+of Pascal VOC dataset will be read, otherwise, `label_list.txt` is unnecessary and
+the default Pascal VOC label list which defined in 
+[voc\_loader.py](../ppdet/data/source/voc_loader.py) will be used.
+
 **Download datasets automatically:**
 
 If a training session is started but the dataset is not setup properly (e.g,
diff --git a/PaddleCV/PaddleDetection/docs/INSTALL_cn.md b/PaddleCV/PaddleDetection/docs/INSTALL_cn.md
index eca4d41b..f8eee189 100644
--- a/PaddleCV/PaddleDetection/docs/INSTALL_cn.md
+++ b/PaddleCV/PaddleDetection/docs/INSTALL_cn.md
@@ -108,6 +108,13 @@ ln -sf <path/to/coco> <path/to/paddle_detection>/dataset/coco
 ln -sf <path/to/voc> <path/to/paddle_detection>/dataset/voc
 ```
 
+对于Pascal VOC数据集，需通过如下命令创建文件列表：
+
+```
+export PYTHONPATH=$PYTHONPATH:.
+python dataset/voc/create_list.py
+```
+
 **手动下载数据集：**
 
 若您本地没有数据集，可通过如下命令下载：
@@ -119,13 +126,68 @@ export PYTHONPATH=$PYTHONPATH:.
 python dataset/coco/download_coco.py
 ```
 
+`COCO` 数据集目录结构如下：
+
+  ```
+  dataset/coco/
+  ├── annotations
+  │   ├── instances_train2014.json
+  │   ├── instances_train2017.json
+  │   ├── instances_val2014.json
+  │   ├── instances_val2017.json
+  │   |   ...
+  ├── train2017
+  │   ├── 000000000009.jpg
+  │   ├── 000000580008.jpg
+  │   |   ...
+  ├── val2017
+  │   ├── 000000000139.jpg
+  │   ├── 000000000285.jpg
+  │   |   ...
+  |   ...
+  ```
+
 - Pascal VOC
 
 ```
 export PYTHONPATH=$PYTHONPATH:.
 python dataset/voc/download_voc.py
+python dataset/voc/create_list.py
 ```
 
+`Pascal VOC` 数据集目录结构如下：
+
+  ```
+  dataset/voc/
+  ├── train.txt
+  ├── val.txt
+  ├── test.txt
+  ├── label_list.txt (optional)
+  ├── VOCdevkit/VOC2007
+  │   ├── Annotations
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 001789.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  ├── VOCdevkit/VOC2012
+  │   ├── Annotations
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── JPEGImages 
+  │       ├── 003876.xml
+  │       |   ...
+  │   ├── ImageSets
+  │       |   ...
+  |   ...
+  ```
+
+**说明：** 如果你在yaml配置文件中设置`use_default_label=False`, 将从`label_list.txt`
+中读取类别列表，反之则可以没有`label_list.txt`文件，检测库会使用Pascal VOC数据集的默
+认类别列表，默认类别列表定义在[voc\_loader.py](../ppdet/data/source/voc_loader.py)
+
 **自动下载数据集：**
 
 若您在数据集未成功设置（例如，在`dataset/coco`或`dataset/voc`中找不到）的情况下开始运行，
diff --git a/PaddleCV/PaddleDetection/ppdet/data/data_feed.py b/PaddleCV/PaddleDetection/ppdet/data/data_feed.py
index b70f4be2..c384b2cb 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/data_feed.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/data_feed.py
@@ -219,7 +219,7 @@ class DataSet(object):
 
     def __init__(self,
                  annotation,
-                 image_dir,
+                 image_dir=None,
                  dataset_dir=None,
                  use_default_label=None):
         super(DataSet, self).__init__()
@@ -229,7 +229,7 @@ class DataSet(object):
         self.use_default_label = use_default_label
 
 
-COCO_DATASET_DIR = 'coco'
+COCO_DATASET_DIR = 'dataset/coco'
 COCO_TRAIN_ANNOTATION = 'annotations/instances_train2017.json'
 COCO_TRAIN_IMAGE_DIR = 'train2017'
 COCO_VAL_ANNOTATION = 'annotations/instances_val2017.json'
@@ -246,12 +246,11 @@ class CocoDataSet(DataSet):
             dataset_dir=dataset_dir, annotation=annotation, image_dir=image_dir)
 
 
-VOC_DATASET_DIR = 'pascalvoc'
-VOC_TRAIN_ANNOTATION = 'VOCdevkit/VOC_all/ImageSets/Main/train.txt'
-VOC_VAL_ANNOTATION = 'VOCdevkit/VOC_all/ImageSets/Main/val.txt'
-VOC_TEST_ANNOTATION = 'VOCdevkit/VOC_all/ImageSets/Main/test.txt'
-VOC_IMAGE_DIR = 'VOCdevkit/VOC_all/JPEGImages'
-VOC_USE_DEFAULT_LABEL = None
+VOC_DATASET_DIR = 'dataset/voc'
+VOC_TRAIN_ANNOTATION = 'train.txt'
+VOC_VAL_ANNOTATION = 'val.txt'
+VOC_IMAGE_DIR = None
+VOC_USE_DEFAULT_LABEL = True
 
 
 @serializable
@@ -843,7 +842,7 @@ class SSDTestFeed(DataFeed):
     __doc__ = DataFeed.__doc__
 
     def __init__(self,
-                 dataset=SimpleDataSet(VOC_TEST_ANNOTATION).__dict__,
+                 dataset=SimpleDataSet(VOC_VAL_ANNOTATION).__dict__,
                  fields=['image', 'im_id', 'im_shape'],
                  image_shape=[3, 300, 300],
                  sample_transforms=[
diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/roidb_source.py b/PaddleCV/PaddleDetection/ppdet/data/source/roidb_source.py
index 5eeb08c1..7d42e87b 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/source/roidb_source.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/source/roidb_source.py
@@ -62,7 +62,7 @@ class RoiDbSource(Dataset):
         assert os.path.isfile(anno_file) or os.path.isdir(anno_file), \
                 'anno_file {} is not a file or a directory'.format(anno_file)
         self._fname = anno_file
-        self._image_dir = image_dir
+        self._image_dir = image_dir if image_dir is not None else ''
         if image_dir is not None:
             assert os.path.isdir(image_dir), \
                     'image_dir {} is not a directory'.format(image_dir)
diff --git a/PaddleCV/PaddleDetection/ppdet/data/source/voc_loader.py b/PaddleCV/PaddleDetection/ppdet/data/source/voc_loader.py
index 4b25eecb..8fc1b795 100644
--- a/PaddleCV/PaddleDetection/ppdet/data/source/voc_loader.py
+++ b/PaddleCV/PaddleDetection/ppdet/data/source/voc_loader.py
@@ -26,8 +26,7 @@ def get_roidb(anno_path,
     Load VOC records with annotations in xml directory 'anno_path'
 
     Notes:
-    ${anno_path}/ImageSets/Main/train.txt must contains xml file names for annotations
-    ${anno_path}/Annotations/xxx.xml must contain annotation info for one record
+    ${anno_path} must contains xml file and image file path for annotations
 
     Args:
         anno_path (str): root directory for voc annotation data
@@ -53,11 +52,7 @@ def get_roidb(anno_path,
         'cname2id' is a dict to map category name to class id
     """
 
-    txt_file = anno_path
-    part = txt_file.split('ImageSets')
-    xml_path = os.path.join(part[0], 'Annotations')
-    assert os.path.isfile(txt_file) and \
-        os.path.isdir(xml_path), 'invalid xml path'
+    data_dir = os.path.dirname(anno_path)
 
     records = []
     ct = 0
@@ -67,17 +62,16 @@ def get_roidb(anno_path,
 
     # mapping category name to class id
     # background:0, first_class:1, second_class:2, ...
-    with open(txt_file, 'r') as fr:
+    with open(anno_path, 'r') as fr:
         while True:
             line = fr.readline()
             if not line:
                 break
-            fname = line.strip() + '.xml'
-            xml_file = os.path.join(xml_path, fname)
+            img_file, xml_file = [os.path.join(data_dir, x) \
+                    for x in line.strip().split()[:2]]
             if not os.path.isfile(xml_file):
                 continue
             tree = ET.parse(xml_file)
-            im_fname = tree.find('filename').text
             if tree.find('id') is None:
                 im_id = np.array([ct])
             else:
@@ -114,7 +108,7 @@ def get_roidb(anno_path,
                 is_crowd[i][0] = 0
                 difficult[i][0] = _difficult
             voc_rec = {
-                'im_file': im_fname,
+                'im_file': img_file,
                 'im_id': im_id,
                 'h': im_h,
                 'w': im_w,
@@ -144,8 +138,7 @@ def load(anno_path,
     xml directory 'anno_path'
 
     Notes:
-    ${anno_path}/ImageSets/Main/train.txt must contains xml file names for annotations
-    ${anno_path}/Annotations/xxx.xml must contain annotation info for one record
+    ${anno_path} must contains xml file and image file path for annotations
 
     Args:
         @anno_path (str): root directory for voc annotation data
@@ -171,11 +164,7 @@ def load(anno_path,
         'cname2id' is a dict to map category name to class id
     """
 
-    txt_file = anno_path
-    part = txt_file.split('ImageSets')
-    xml_path = os.path.join(part[0], 'Annotations')
-    assert os.path.isfile(txt_file) and \
-        os.path.isdir(xml_path), 'invalid xml path'
+    data_dir = os.path.dirname(anno_path)
 
     # mapping category name to class id
     # if with_background is True:
@@ -186,7 +175,7 @@ def load(anno_path,
     ct = 0
     cname2cid = {}
     if not use_default_label:
-        label_path = os.path.join(part[0], 'ImageSets/Main/label_list.txt')
+        label_path = os.path.join(data_dir, 'label_list.txt')
         with open(label_path, 'r') as fr:
             label_id = int(with_background)
             for line in fr.readlines():
@@ -195,17 +184,16 @@ def load(anno_path,
     else:
         cname2cid = pascalvoc_label(with_background)
 
-    with open(txt_file, 'r') as fr:
+    with open(anno_path, 'r') as fr:
         while True:
             line = fr.readline()
             if not line:
                 break
-            fname = line.strip() + '.xml'
-            xml_file = os.path.join(xml_path, fname)
+            img_file, xml_file = [os.path.join(data_dir, x) \
+                    for x in line.strip().split()[:2]]
             if not os.path.isfile(xml_file):
                 continue
             tree = ET.parse(xml_file)
-            im_fname = tree.find('filename').text
             if tree.find('id') is None:
                 im_id = np.array([ct])
             else:
@@ -235,7 +223,7 @@ def load(anno_path,
                 is_crowd[i][0] = 0
                 difficult[i][0] = _difficult
             voc_rec = {
-                'im_file': im_fname,
+                'im_file': img_file,
                 'im_id': im_id,
                 'h': im_h,
                 'w': im_w,
diff --git a/PaddleCV/PaddleDetection/ppdet/utils/download.py b/PaddleCV/PaddleDetection/ppdet/utils/download.py
index 473cf5ff..6de9138e 100644
--- a/PaddleCV/PaddleDetection/ppdet/utils/download.py
+++ b/PaddleCV/PaddleDetection/ppdet/utils/download.py
@@ -25,7 +25,7 @@ import hashlib
 import tarfile
 import zipfile
 
-from .voc_utils import merge_and_create_list
+from .voc_utils import create_list
 
 import logging
 logger = logging.getLogger(__name__)
@@ -59,7 +59,7 @@ DATASETS = {
         (
             'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
             'b6e924de25625d8de591ea690078ad9f', ),
-    ], ["VOCdevkit/VOC_all"]),
+    ], ["VOCdevkit/VOC2012", "VOCdevkit/VOC2007"]),
     'wider_face': ([
         (
             'https://dataset.bj.bcebos.com/wider_face/WIDER_train.zip',
@@ -85,7 +85,8 @@ def get_weights_path(url):
     """Get weights path from WEIGHT_HOME, if not exists,
     download it from url.
     """
-    return get_path(url, WEIGHTS_HOME)
+    path, _ = get_path(url, WEIGHTS_HOME)
+    return path
 
 
 def get_dataset_path(path, annotation, image_dir):
@@ -107,19 +108,26 @@ def get_dataset_path(path, annotation, image_dir):
                         "{}".format(path, name))
             data_dir = osp.join(DATASET_HOME, name)
 
-            # For voc, only check merged dir VOC_all
+            # For voc, only check dir VOCdevkit/VOC2012, VOCdevkit/VOC2007
             if name == 'voc':
-                check_dir = osp.join(data_dir, dataset[1][0])
-                if osp.exists(check_dir):
-                    logger.info("Found {}".format(check_dir))
+                exists = True
+                for sub_dir in dataset[1]:
+                    check_dir = osp.join(data_dir, sub_dir)
+                    if osp.exists(check_dir):
+                        logger.info("Found {}".format(check_dir))
+                    else:
+                        exists = False
+                if exists:
                     return data_dir
 
+            # voc exist is checked above, voc is not exist here
+            check_exist = name != 'voc'
             for url, md5sum in dataset[0]:
-                get_path(url, data_dir, md5sum)
+                get_path(url, data_dir, md5sum, check_exist)
 
-            # voc should merge dir and create list after download
+            # voc should create list after download
             if name == 'voc':
-                _merge_voc_dir(data_dir, dataset[1][0])
+                create_voc_list(data_dir)
             return data_dir
 
     # not match any dataset in DATASETS
@@ -129,26 +137,17 @@ def get_dataset_path(path, annotation, image_dir):
                                                          osp.split(path)[-1]))
 
 
-def _merge_voc_dir(data_dir, output_subdir):
-    logger.info("Download voc dataset successed, merge "
-                "VOC2007 and VOC2012 to VOC_all...")
-    output_dir = osp.join(data_dir, output_subdir)
-    devkit_dir = "/".join(output_dir.split('/')[:-1])
+def create_voc_list(data_dir, devkit_subdir='VOCdevkit'):
+    logger.info("Create voc file list...")
+    devkit_dir = osp.join(data_dir, devkit_subdir)
     years = ['2007', '2012']
-    # merge dir in output_tmp_dir at first, move to 
-    # output_dir after merge sucessed.
-    output_tmp_dir = osp.join(data_dir, 'tmp')
-    if osp.isdir(output_tmp_dir):
-        shutil.rmtree(output_tmp_dir)
+
     # NOTE: since using auto download VOC
     # dataset, VOC default label list should be used, 
     # do not generate label_list.txt here. For default
     # label, see ../data/source/voc_loader.py
-    merge_and_create_list(devkit_dir, years, output_tmp_dir)
-    shutil.move(output_tmp_dir, output_dir)
-    # remove source directory VOC2007 and VOC2012
-    shutil.rmtree(osp.join(devkit_dir, "VOC2007"))
-    shutil.rmtree(osp.join(devkit_dir, "VOC2012"))
+    create_list(devkit_dir, years, data_dir)
+    logger.info("Create voc file list finished")
 
 
 def map_path(url, root_dir):
@@ -161,7 +160,7 @@ def map_path(url, root_dir):
     return osp.join(root_dir, fpath)
 
 
-def get_path(url, root_dir, md5sum=None):
+def get_path(url, root_dir, md5sum=None, check_exist=True):
     """ Download from given url to root_dir.
     if file or directory specified by url is exists under
     root_dir, return the path directly, otherwise download
@@ -178,20 +177,25 @@ def get_path(url, root_dir, md5sum=None):
     # For same zip file, decompressed directory name different
     # from zip file name, rename by following map
     decompress_name_map = {
-        "VOC": "VOCdevkit/VOC_all",
+        "VOCtrainval_11-May-2012": "VOCdevkit/VOC2012",
+        "VOCtrainval_06-Nov-2007": "VOCdevkit/VOC2007",
+        "VOCtest_06-Nov-2007": "VOCdevkit/VOC2007",
         "annotations_trainval": "annotations"
     }
     for k, v in decompress_name_map.items():
         if fullpath.find(k) >= 0:
             fullpath = '/'.join(fullpath.split('/')[:-1] + [v])
 
-    if osp.exists(fullpath):
+    exist_flag = False
+    if osp.exists(fullpath) and check_exist:
+        exist_flag = True
         logger.info("Found {}".format(fullpath))
     else:
+        exist_flag = False
         fullname = _download(url, root_dir, md5sum)
         _decompress(fullname)
 
-    return fullpath
+    return fullpath, exist_flag
 
 
 def download_dataset(path, dataset=None):
@@ -201,9 +205,7 @@ def download_dataset(path, dataset=None):
         return
     dataset_info = DATASETS[dataset][0]
     for info in dataset_info:
-        get_path(info[0], path, info[1])
-    if dataset == 'voc':
-        _merge_voc_dir(path, DATASETS[dataset][1][0])
+        get_path(info[0], path, info[1], False)
     logger.info("Download dataset {} finished.".format(dataset))
 
 
diff --git a/PaddleCV/PaddleDetection/ppdet/utils/voc_utils.py b/PaddleCV/PaddleDetection/ppdet/utils/voc_utils.py
index 2d7fc4a3..88252e23 100644
--- a/PaddleCV/PaddleDetection/ppdet/utils/voc_utils.py
+++ b/PaddleCV/PaddleDetection/ppdet/utils/voc_utils.py
@@ -22,20 +22,15 @@ import re
 import random
 import shutil
 
-__all__ = ['merge_and_create_list']
+__all__ = ['create_list']
 
 
-def merge_and_create_list(devkit_dir, years, output_dir):
+def create_list(devkit_dir, years, output_dir):
     """
-    Merge VOC2007 and VOC2012 to output_dir and create following list:
-        1. train.txt
-        2. val.txt
-        3. test.txt
+    create following list:
+        1. trainval.txt
+        2. test.txt
     """
-    os.makedirs(osp.join(output_dir, 'Annotations/'))
-    os.makedirs(osp.join(output_dir, 'ImageSets/Main/'))
-    os.makedirs(osp.join(output_dir, 'JPEGImages/'))
-
     trainval_list = []
     test_list = []
     for year in years:
@@ -43,20 +38,16 @@ def merge_and_create_list(devkit_dir, years, output_dir):
         trainval_list.extend(trainval)
         test_list.extend(test)
 
-    main_dir = osp.join(output_dir, 'ImageSets/Main/')
     random.shuffle(trainval_list)
-    with open(osp.join(main_dir, 'train.txt'), 'w') as ftrainval:
+    with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval:
         for item in trainval_list:
-            ftrainval.write(item + '\n')
+            ftrainval.write(item[0] + ' ' + item[1] + '\n')
 
-    with open(osp.join(main_dir, 'val.txt'), 'w') as fval:
-        with open(osp.join(main_dir, 'test.txt'), 'w') as ftest:
-            ct = 0
-            for item in test_list:
-                ct += 1
-                fval.write(item + '\n')
-                if ct <= 1000:
-                    ftest.write(item + '\n')
+    with open(osp.join(output_dir, 'test.txt'), 'w') as fval:
+        ct = 0
+        for item in test_list:
+            ct += 1
+            fval.write(item[0] + ' ' + item[1] + '\n')
 
 
 def _get_voc_dir(devkit_dir, year, type):
@@ -86,14 +77,10 @@ def _walk_voc_dir(devkit_dir, year, output_dir):
                 if name_prefix in added:
                     continue
                 added.add(name_prefix)
-                ann_path = osp.join(annotation_dir, name_prefix + '.xml')
-                img_path = osp.join(img_dir, name_prefix + '.jpg')
-                new_ann_path = osp.join(output_dir, 'Annotations/',
-                                        name_prefix + '.xml')
-                new_img_path = osp.join(output_dir, 'JPEGImages/',
-                                        name_prefix + '.jpg')
-                shutil.copy(ann_path, new_ann_path)
-                shutil.copy(img_path, new_img_path)
-                img_ann_list.append(name_prefix)
+                ann_path = osp.join(osp.relpath(annotation_dir, output_dir),
+                                    name_prefix + '.xml')
+                img_path = osp.join(osp.relpath(img_dir, output_dir),
+                                    name_prefix + '.jpg')
+                img_ann_list.append((img_path, ann_path))
 
     return trainval_list, test_list
-- 
GitLab