添加生成数据列表功能 (#41)

* add create_dataset_list function * Create create_dataset_list.md * del .idea

添加生成数据列表功能 (#41)
* add create_dataset_list function * Create create_dataset_list.md * del .idea
341d3907 · LutaoChu · Zeyu Chen · e41f07ce · 341d3907 · 341d3907
Showing with 92 addition and 0 deletion

pdseg/tools/__init__.py pdseg/tools/__init__.py +14 -0

pdseg/tools/create_dataset_list.md pdseg/tools/create_dataset_list.md +5 -0

pdseg/tools/create_dataset_list.py pdseg/tools/create_dataset_list.py +73 -0

未找到文件。
--- a/pdseg/tools/__init__.py
+++ b/pdseg/tools/__init__.py
+# coding: utf8
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/pdseg/tools/create_dataset_list.md
+++ b/pdseg/tools/create_dataset_list.md
+# 数据列表生成
+```
+python pdseg/tools/create_dataset_list.py --dataset "your/dataset/dir" --file_splitor " "
+```
--- a/pdseg/tools/create_dataset_list.py
+++ b/pdseg/tools/create_dataset_list.py
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+import glob
+import os.path
+import argparse
+folder_name = {
+    'image': 'leftImg8bit',
+    'label': 'gtFine',
+}
+postfix = {
+    'image': '_leftImg8bit',
+    'label': '_gtFine_labelTrainIds',
+}
+data_format = {
+    'image': 'png',
+    'label': 'png',
+}
+def parse_args():
+    parser = argparse.ArgumentParser(description='PaddleSeg generate file list on cityscapes')
+    parser.add_argument(
+        '--dataset_root',
+        dest='dataset_root',
+        help='dataset root directory',
+        default=None,
+        type=str)
+    parser.add_argument('--file_splitor',
+                        dest='file_splitor',
+                        help='file list splitor',
+                        default=None,
+                        type=str)
+    return parser.parse_args()
+def get_files(image_or_label, dataset_split, args):
+    dataset_root = args.dataset_root
+    pattern = '*%s.%s' % (postfix[image_or_label], data_format[image_or_label])
+    search_files = os.path.join(
+        dataset_root, folder_name[image_or_label], dataset_split, '*', pattern)
+    filenames = glob.glob(search_files)
+    return sorted(filenames)
+def generate_list(dataset_split, args):
+    dataset_root = args.dataset_root
+    file_splitor = args.file_splitor
+    image_files = get_files('image', dataset_split, args)
+    label_files = get_files('label', dataset_split, args)
+    num_images = len(image_files)
+    file_list = os.path.join(dataset_root, dataset_split + '.list')
+    with open(file_list, "w") as f:
+        for item in range(num_images):
+            left = image_files[item].replace(dataset_root, '')
+            if left[0] == os.path.sep:
+                left = left.lstrip(os.path.sep)
+            right = label_files[item].replace(dataset_root, '')
+            if right[0] == os.path.sep:
+                right = right.lstrip(os.path.sep)
+            line = left + file_splitor + right + '\n'
+            f.write(line)
+if __name__ == '__main__':
+    args = parse_args()
+    for dataset_split in ['train', 'val', 'test']:
+        generate_list(dataset_split, args)