提交 f292d930 编写于 作者: W wuyefeilin 提交者: wuzewu

add PASCAL VOC 2012 download and convert. add README.md (#47)

* add PASCAL VOC2012 download and convert scripts
上级 af0738b3
# 数据下载
## PASCAL VOC 2012数据集
下载 PASCAL VOC 2012数据集并将分割部分的假彩色标注图(`SegmentationClass`文件夹)转换成灰度图并存储在`SegmentationClassAug`文件夹, 并在文件夹`ImageSets/Segmentation`下重新生成列表文件`train.list、val.list和trainval.list。
```shell
# 下载数据集并进行解压转换
python download_and_convert_voc2012.py
```
如果已经下载好PASCAL VOC 2012数据集,将数据集移至dataset目录后使用下述命令直接进行转换即可。
```shell
# 数据集转换
python convert_voc2012.py
```
## Oxford-IIIT Pet数据集
我们使用了Oxford-IIIT中的猫和狗两个类别数据制作了一个小数据集mini_pet,更多关于数据集的介绍请参考[Oxford-IIIT Pet](https://www.robots.ox.ac.uk/~vgg/data/pets/)。
```shell
# 下载数据集并进行解压
python dataset/download_pet.py
```
## Cityscapes数据集
运行下述命令下载并解压Cityscapes数据集。
```shell
# 下载数据集并进行解压
python dataset/download_cityscapes.py
```
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import numpy as np
import os
from PIL import Image
import glob
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
def remove_colormap(filename):
gray_anno = np.array(Image.open(filename))
return gray_anno
def save_annotation(annotation, filename):
annotation = annotation.astype(dtype=np.uint8)
annotation = Image.fromarray(annotation)
annotation.save(filename)
def convert_list(origin_file, seg_file, output_folder):
with open(seg_file, 'w') as fid_seg:
with open(origin_file) as fid_ori:
lines = fid_ori.readlines()
for line in lines:
line = line.strip()
line = '.'.join([line, 'jpg'])
img_name = os.path.join("JPEGImages", line)
line = line.replace('jpg', 'png')
anno_name = os.path.join(output_folder.split(os.sep)[-1], line)
new_line = ' '.join([img_name, anno_name])
fid_seg.write(new_line + "\n")
if __name__ == "__main__":
pascal_root = "./VOCtrainval_11-May-2012/VOC2012"
pascal_root = os.path.join(LOCAL_PATH, pascal_root)
seg_folder = os.path.join(pascal_root, "SegmentationClass")
txt_folder = os.path.join(pascal_root, "ImageSets/Segmentation")
train_path = os.path.join(txt_folder, "train.txt")
val_path = os.path.join(txt_folder, "val.txt")
trainval_path = os.path.join(txt_folder, "trainval.txt")
# 标注图转换后存储目录
output_folder = os.path.join(pascal_root, "SegmentationClassAug")
print("annotation convert and file list convert")
if not os.path.exists(os.path.join(LOCAL_PATH, output_folder)):
os.mkdir(os.path.join(LOCAL_PATH, output_folder))
annotation_names = glob.glob(os.path.join(seg_folder, '*.png'))
for annotation_name in annotation_names:
annotation = remove_colormap(annotation_name)
filename = os.path.basename(annotation_name)
save_name = os.path.join(output_folder, filename)
save_annotation(annotation, save_name)
convert_list(train_path, train_path.replace('txt', 'list'), output_folder)
convert_list(val_path, val_path.replace('txt', 'list'), output_folder)
convert_list(trainval_path, trainval_path.replace('txt', 'list'), output_folder)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import numpy as np
import os
import glob
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
TEST_PATH = os.path.join(LOCAL_PATH, "..", "test")
sys.path.append(TEST_PATH)
from test_utils import download_file_and_uncompress
from convert_voc2012 import convert_list
from convert_voc2012 import remove_colormap
from convert_voc2012 import save_annotation
def download_pet_dataset(savepath, extrapath):
url = "https://paddleseg.bj.bcebos.com/dataset/VOCtrainval_11-May-2012.tar"
download_file_and_uncompress(
url=url, savepath=savepath, extrapath=extrapath)
if __name__ == "__main__":
download_pet_dataset(LOCAL_PATH, LOCAL_PATH)
print("Dataset download finish!")
pascal_root = "./VOCtrainval_11-May-2012/VOC2012"
pascal_root = os.path.join(LOCAL_PATH, pascal_root)
seg_folder = os.path.join(pascal_root, "SegmentationClass")
txt_folder = os.path.join(pascal_root, "ImageSets/Segmentation")
train_path = os.path.join(txt_folder, "train.txt")
val_path = os.path.join(txt_folder, "val.txt")
trainval_path = os.path.join(txt_folder, "trainval.txt")
# 标注图转换后存储目录
output_folder = os.path.join(pascal_root, "SegmentationClassAug")
print("annotation convert and file list convert")
if not os.path.exists(output_folder):
os.mkdir(output_folder)
annotation_names = glob.glob(os.path.join(seg_folder, '*.png'))
for annotation_name in annotation_names:
annotation = remove_colormap(annotation_name)
filename = os.path.basename(annotation_name)
save_name = os.path.join(output_folder, filename)
save_annotation(annotation, save_name)
convert_list(train_path, train_path.replace('txt', 'list'), output_folder)
convert_list(val_path, val_path.replace('txt', 'list'), output_folder)
convert_list(trainval_path, trainval_path.replace('txt', 'list'), output_folder)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册