未验证 提交 e5e91bc6 编写于 作者: K Kaipeng Deng 提交者: GitHub

Add voc merge_and_create_list (#2572)

* download 3 VOC dataset

* clean code

* fix name map

* refine log

* refine dataset exists check

* add merge_and_create_list

* remove create_list

* clean code

* refine TODO

* not generate label list

* refine comment

* remove useless import
上级 7e0072ab
...@@ -26,6 +26,8 @@ import hashlib ...@@ -26,6 +26,8 @@ import hashlib
import tarfile import tarfile
import zipfile import zipfile
from .voc_utils import merge_and_create_list
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -85,7 +87,7 @@ def get_dataset_path(path): ...@@ -85,7 +87,7 @@ def get_dataset_path(path):
"{}".format(path, name)) "{}".format(path, name))
data_dir = osp.join(DATASET_HOME, name) data_dir = osp.join(DATASET_HOME, name)
# For voc, only check merged dir # For voc, only check merged dir VOC_all
if name == 'voc': if name == 'voc':
check_dir = osp.join(data_dir, dataset[1][0]) check_dir = osp.join(data_dir, dataset[1][0])
if osp.exists(check_dir): if osp.exists(check_dir):
...@@ -95,10 +97,28 @@ def get_dataset_path(path): ...@@ -95,10 +97,28 @@ def get_dataset_path(path):
for url, md5sum in dataset[0]: for url, md5sum in dataset[0]:
get_path(url, data_dir, md5sum) get_path(url, data_dir, md5sum)
# voc should merge dir and create list after download
if name == 'voc': if name == 'voc':
logger.info("Download voc dataset successed, merge " logger.info("Download voc dataset successed, merge "
"VOC2007 and VOC2012 to VOC_all...") "VOC2007 and VOC2012 to VOC_all...")
# TODO(dengkaipeng): merge voc output_dir = osp.join(data_dir, dataset[1][0])
devkit_dir = "/".join(output_dir.split('/')[:-1])
years = ['2007', '2012']
# merge dir in output_tmp_dir at first, move to
# output_dir after merge sucessed.
output_tmp_dir = osp.join(data_dir, 'tmp')
if osp.isdir(output_tmp_dir):
shutil.rmtree(output_tmp_dir)
# NOTE(dengkaipeng): since using auto download VOC
# dataset, VOC default label list should be used,
# do not generate label_list.txt here. For default
# label, see ../data/source/voc_loader.py
merge_and_create_list(devkit_dir, years,
output_tmp_dir)
shutil.move(output_tmp_dir, output_dir)
# remove source directory VOC2007 and VOC2012
shutil.rmtree(osp.join(devkit_dir, "VOC2007"))
shutil.rmtree(osp.join(devkit_dir, "VOC2012"))
return data_dir return data_dir
# not match any dataset in DATASETS # not match any dataset in DATASETS
...@@ -230,7 +250,7 @@ def _decompress(fname): ...@@ -230,7 +250,7 @@ def _decompress(fname):
# For protecting decompressing interupted, # For protecting decompressing interupted,
# decompress to fpath_tmp directory firstly, if decompress # decompress to fpath_tmp directory firstly, if decompress
# successed, move decompress files to fpath and delete # successed, move decompress files to fpath and delete
# fpath_tmp and download file. # fpath_tmp and remove download compress file.
fpath = '/'.join(fname.split('/')[:-1]) fpath = '/'.join(fname.split('/')[:-1])
fpath_tmp = osp.join(fpath, 'tmp') fpath_tmp = osp.join(fpath, 'tmp')
if osp.isdir(fpath_tmp): if osp.isdir(fpath_tmp):
......
...@@ -12,24 +12,61 @@ ...@@ -12,24 +12,61 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os import os
import os.path as osp import os.path as osp
import re import re
import random import random
import shutil import shutil
devkit_dir = './VOCdevkit' __all__ = ['merge_and_create_list']
years = ['2007', '2012']
def merge_and_create_list(devkit_dir, years, output_dir):
"""
Merge VOC2007 and VOC2012 to output_dir and create following list:
1. train.txt
2. val.txt
3. test.txt
"""
os.makedirs(osp.join(output_dir, 'Annotations/'))
os.makedirs(osp.join(output_dir, 'ImageSets/Main/'))
os.makedirs(osp.join(output_dir, 'JPEGImages/'))
trainval_list = []
test_list = []
for year in years:
trainval, test = _walk_voc_dir(devkit_dir, year, output_dir)
trainval_list.extend(trainval)
test_list.extend(test)
main_dir = osp.join(output_dir, 'ImageSets/Main/')
random.shuffle(trainval_list)
with open(osp.join(main_dir, 'train.txt'), 'w') as ftrainval:
for item in trainval_list:
ftrainval.write(item + '\n')
with open(osp.join(main_dir, 'val.txt'), 'w') as fval:
with open(osp.join(main_dir, 'test.txt'), 'w') as ftest:
ct = 0
for item in test_list:
ct += 1
fval.write(item + '\n')
if ct <= 1000:
ftest.write(item + '\n')
def get_dir(devkit_dir, year, type):
def _get_voc_dir(devkit_dir, year, type):
return osp.join(devkit_dir, 'VOC' + year, type) return osp.join(devkit_dir, 'VOC' + year, type)
def walk_dir(devkit_dir, year): def _walk_voc_dir(devkit_dir, year, output_dir):
filelist_dir = get_dir(devkit_dir, year, 'ImageSets/Main') filelist_dir = _get_voc_dir(devkit_dir, year, 'ImageSets/Main')
annotation_dir = get_dir(devkit_dir, year, 'Annotations') annotation_dir = _get_voc_dir(devkit_dir, year, 'Annotations')
img_dir = get_dir(devkit_dir, year, 'JPEGImages') img_dir = _get_voc_dir(devkit_dir, year, 'JPEGImages')
trainval_list = [] trainval_list = []
test_list = [] test_list = []
added = set() added = set()
...@@ -51,36 +88,11 @@ def walk_dir(devkit_dir, year): ...@@ -51,36 +88,11 @@ def walk_dir(devkit_dir, year):
added.add(name_prefix) added.add(name_prefix)
ann_path = osp.join(annotation_dir, name_prefix + '.xml') ann_path = osp.join(annotation_dir, name_prefix + '.xml')
img_path = osp.join(img_dir, name_prefix + '.jpg') img_path = osp.join(img_dir, name_prefix + '.jpg')
new_ann_path = osp.join('./VOCdevkit/VOC_all/Annotations/', name_prefix + '.xml') new_ann_path = osp.join(output_dir, 'Annotations/', name_prefix + '.xml')
new_img_path = osp.join('./VOCdevkit/VOC_all/JPEGImages/', name_prefix + '.jpg') new_img_path = osp.join(output_dir, 'JPEGImages/', name_prefix + '.jpg')
shutil.copy(ann_path, new_ann_path) shutil.copy(ann_path, new_ann_path)
shutil.copy(img_path, new_img_path) shutil.copy(img_path, new_img_path)
img_ann_list.append(name_prefix) img_ann_list.append(name_prefix)
return trainval_list, test_list return trainval_list, test_list
def prepare_filelist(devkit_dir, years, output_dir):
os.makedirs('./VOCdevkit/VOC_all/Annotations/')
os.makedirs('./VOCdevkit/VOC_all/ImageSets/Main/')
os.makedirs('./VOCdevkit/VOC_all/JPEGImages/')
trainval_list = []
test_list = []
for year in years:
trainval, test = walk_dir(devkit_dir, year)
trainval_list.extend(trainval)
test_list.extend(test)
random.shuffle(trainval_list)
with open(osp.join(output_dir, 'train.txt'), 'w') as ftrainval:
for item in trainval_list:
ftrainval.write(item + '\n')
with open(osp.join(output_dir, 'val.txt'), 'w') as fval:
with open(osp.join(output_dir, 'test.txt'), 'w') as ftest:
ct = 0
for item in test_list:
ct += 1
fval.write(item + '\n')
if ct <= 1000:
ftest.write(item + '\n')
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册