# coding: utf8 # copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import glob import os.path import argparse def parse_args(): parser = argparse.ArgumentParser( description='PaddleSeg generate file list on cityscapes or your customized dataset.') parser.add_argument( 'dataset_root', help='dataset root directory', type=str ) parser.add_argument( '--type', help='dataset type: \n' '- cityscapes \n' '- custom(default)', default="custom", type=str ) parser.add_argument( '--separator', dest='separator', help='file list separator', default="|", type=str ) parser.add_argument( '--folder', help='the folder names of images and labels', type=str, nargs=2, default=['images', 'annotations'] ) parser.add_argument( '--second_folder', help='the second-level folder names of train set, validation set, test set', type=str, nargs='*', default=['train', 'val', 'test'] ) parser.add_argument( '--format', help='data format of images and labels, e.g. jpg or png.', type=str, nargs=2, default=['jpg', 'png'] ) parser.add_argument( '--postfix', help='postfix of images or labels', type=str, nargs=2, default=['', ''] ) return parser.parse_args() def cityscape_cfg(args): args.postfix = ['_leftImg8bit', '_gtFine_labelTrainIds'] args.folder = ['leftImg8bit', 'gtFine'] args.format = ['png', 'png'] def get_files(image_or_label, dataset_split, args): dataset_root = args.dataset_root postfix = args.postfix format = args.format folder = args.folder pattern = '*%s.%s' % (postfix[image_or_label], format[image_or_label]) search_files = os.path.join(dataset_root, folder[image_or_label], dataset_split, pattern) search_files2 = os.path.join(dataset_root, folder[image_or_label], dataset_split, "*", pattern) # 包含子目录 search_files3 = os.path.join(dataset_root, folder[image_or_label], dataset_split, "*", "*", pattern) # 包含三级目录 filenames = glob.glob(search_files) filenames2 = glob.glob(search_files2) filenames3 = glob.glob(search_files3) filenames = filenames + filenames2 + filenames3 return sorted(filenames) def generate_list(args): dataset_root = args.dataset_root separator = args.separator for dataset_split in args.second_folder: print("Creating {}.list...".format(dataset_split)) image_files = get_files(0, dataset_split, args) label_files = get_files(1, dataset_split, args) if not image_files: img_dir = os.path.join(dataset_root, args.folder[0], dataset_split) print("No files in {}".format(img_dir)) continue elif not label_files: label_dir = os.path.join(dataset_root, args.folder[1], dataset_split) print("No files in {}".format(label_dir)) continue num_images = len(image_files) file_list = os.path.join(dataset_root, dataset_split + '.list') with open(file_list, "w") as f: for item in range(num_images): left = image_files[item].replace(dataset_root, '') if left[0] == os.path.sep: left = left.lstrip(os.path.sep) right = label_files[item].replace(dataset_root, '') if right[0] == os.path.sep: right = right.lstrip(os.path.sep) line = left + separator + right + '\n' f.write(line) print(line) if __name__ == '__main__': args = parse_args() if args.type == 'cityscapes': cityscape_cfg(args) generate_list(args)