diff --git a/pdseg/tools/__init__.py b/pdseg/tools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a5914a21f76a81bf8589b609b2dac8f0548aace4 --- /dev/null +++ b/pdseg/tools/__init__.py @@ -0,0 +1,14 @@ +# coding: utf8 +# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/pdseg/tools/create_dataset_list.md b/pdseg/tools/create_dataset_list.md new file mode 100644 index 0000000000000000000000000000000000000000..f1d211af0c035d0454903f7f579d92c1caaa383f --- /dev/null +++ b/pdseg/tools/create_dataset_list.md @@ -0,0 +1,5 @@ +# 数据列表生成 + +``` +python pdseg/tools/create_dataset_list.py --dataset "your/dataset/dir" --file_splitor " " +``` diff --git a/pdseg/tools/create_dataset_list.py b/pdseg/tools/create_dataset_list.py new file mode 100644 index 0000000000000000000000000000000000000000..065416a3c4a3f8614032acf5dd5f5142d2fb4a24 --- /dev/null +++ b/pdseg/tools/create_dataset_list.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +import glob +import os.path +import argparse + +folder_name = { + 'image': 'leftImg8bit', + 'label': 'gtFine', +} + +postfix = { + 'image': '_leftImg8bit', + 'label': '_gtFine_labelTrainIds', +} + +data_format = { + 'image': 'png', + 'label': 'png', +} + + +def parse_args(): + parser = argparse.ArgumentParser(description='PaddleSeg generate file list on cityscapes') + parser.add_argument( + '--dataset_root', + dest='dataset_root', + help='dataset root directory', + default=None, + type=str) + parser.add_argument('--file_splitor', + dest='file_splitor', + help='file list splitor', + default=None, + type=str) + return parser.parse_args() + + +def get_files(image_or_label, dataset_split, args): + dataset_root = args.dataset_root + pattern = '*%s.%s' % (postfix[image_or_label], data_format[image_or_label]) + search_files = os.path.join( + dataset_root, folder_name[image_or_label], dataset_split, '*', pattern) + filenames = glob.glob(search_files) + return sorted(filenames) + + +def generate_list(dataset_split, args): + dataset_root = args.dataset_root + file_splitor = args.file_splitor + + image_files = get_files('image', dataset_split, args) + label_files = get_files('label', dataset_split, args) + + num_images = len(image_files) + + file_list = os.path.join(dataset_root, dataset_split + '.list') + with open(file_list, "w") as f: + for item in range(num_images): + left = image_files[item].replace(dataset_root, '') + if left[0] == os.path.sep: + left = left.lstrip(os.path.sep) + right = label_files[item].replace(dataset_root, '') + if right[0] == os.path.sep: + right = right.lstrip(os.path.sep) + line = left + file_splitor + right + '\n' + f.write(line) + + +if __name__ == '__main__': + args = parse_args() + for dataset_split in ['train', 'val', 'test']: + generate_list(dataset_split, args)