From 341d390798708fe01f0d349d038c4901691afc75 Mon Sep 17 00:00:00 2001 From: LutaoChu <30695251+LutaoChu@users.noreply.github.com> Date: Thu, 19 Sep 2019 21:37:10 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E7=94=9F=E6=88=90=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E5=88=97=E8=A1=A8=E5=8A=9F=E8=83=BD=20(#41)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add create_dataset_list function * Create create_dataset_list.md * del .idea --- pdseg/tools/__init__.py | 14 ++++++ pdseg/tools/create_dataset_list.md | 5 ++ pdseg/tools/create_dataset_list.py | 73 ++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+) create mode 100644 pdseg/tools/__init__.py create mode 100644 pdseg/tools/create_dataset_list.md create mode 100644 pdseg/tools/create_dataset_list.py diff --git a/pdseg/tools/__init__.py b/pdseg/tools/__init__.py new file mode 100644 index 00000000..a5914a21 --- /dev/null +++ b/pdseg/tools/__init__.py @@ -0,0 +1,14 @@ +# coding: utf8 +# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/pdseg/tools/create_dataset_list.md b/pdseg/tools/create_dataset_list.md new file mode 100644 index 00000000..f1d211af --- /dev/null +++ b/pdseg/tools/create_dataset_list.md @@ -0,0 +1,5 @@ +# 数据列表生成 + +``` +python pdseg/tools/create_dataset_list.py --dataset "your/dataset/dir" --file_splitor " " +``` diff --git a/pdseg/tools/create_dataset_list.py b/pdseg/tools/create_dataset_list.py new file mode 100644 index 00000000..065416a3 --- /dev/null +++ b/pdseg/tools/create_dataset_list.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +import glob +import os.path +import argparse + +folder_name = { + 'image': 'leftImg8bit', + 'label': 'gtFine', +} + +postfix = { + 'image': '_leftImg8bit', + 'label': '_gtFine_labelTrainIds', +} + +data_format = { + 'image': 'png', + 'label': 'png', +} + + +def parse_args(): + parser = argparse.ArgumentParser(description='PaddleSeg generate file list on cityscapes') + parser.add_argument( + '--dataset_root', + dest='dataset_root', + help='dataset root directory', + default=None, + type=str) + parser.add_argument('--file_splitor', + dest='file_splitor', + help='file list splitor', + default=None, + type=str) + return parser.parse_args() + + +def get_files(image_or_label, dataset_split, args): + dataset_root = args.dataset_root + pattern = '*%s.%s' % (postfix[image_or_label], data_format[image_or_label]) + search_files = os.path.join( + dataset_root, folder_name[image_or_label], dataset_split, '*', pattern) + filenames = glob.glob(search_files) + return sorted(filenames) + + +def generate_list(dataset_split, args): + dataset_root = args.dataset_root + file_splitor = args.file_splitor + + image_files = get_files('image', dataset_split, args) + label_files = get_files('label', dataset_split, args) + + num_images = len(image_files) + + file_list = os.path.join(dataset_root, dataset_split + '.list') + with open(file_list, "w") as f: + for item in range(num_images): + left = image_files[item].replace(dataset_root, '') + if left[0] == os.path.sep: + left = left.lstrip(os.path.sep) + right = label_files[item].replace(dataset_root, '') + if right[0] == os.path.sep: + right = right.lstrip(os.path.sep) + line = left + file_splitor + right + '\n' + f.write(line) + + +if __name__ == '__main__': + args = parse_args() + for dataset_split in ['train', 'val', 'test']: + generate_list(dataset_split, args) -- GitLab