提交 ce2f79f7 编写于 作者: L liuhui29

add tools/create_label_list.py.

上级 ee780976
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import glob
import json
import os
import os.path as osp
import sys
import shutil
import xml.etree.ElementTree as ET
from tqdm import tqdm
import re
def main():
"""
create label_list.txt from dataset, support voc and coco dataset_type.
# create_lable_list from voc dataset
python tools/create_label_list.py --dataset_type voc --xml_dir path_to_xml_dir
# create_lable_list from coco dataset
python tools/create_label_list.py --dataset_type coco --json_path path_to_json_path
"""
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--dataset_type', help='the type of dataset')
parser.add_argument('--xml_dir', help='xml directory')
parser.add_argument('--json_path', help='json_path')
parser.add_argument(
'--outfile_path',
help='output file path',
default='output/label_list.txt')
args = parser.parse_args()
if args.dataset_type == 'coco' or args.dataset_type == 'COCO':
assert args.dataset_type and args.json_path
try:
assert os.path.exists(args.json_path)
except AssertionError as e:
print('The json file: {} does not exist!'.format(args.json_path))
os._exit(0)
anno = json.load(open(args.json_path))
categories_list = [cat['name'] for cat in anno['categories']]
with open(args.outfile_path, 'w') as f:
for cat in categories_list[0:-1]:
f.write(cat + '\n')
f.write(categories_list[-1])
print('lable_list file: {} create done!'.format(args.outfile_path))
if args.dataset_type == 'voc' or args.dataset_type == 'VOC':
assert args.dataset_type and args.xml_dir
try:
assert os.path.exists(args.xml_dir)
except AssertionError as e:
print('The xml_dir: {} does not exist!'.format(args.xml_dir))
os._exit(0)
categories_list = set()
for xml_file in tqdm(glob.glob(osp.join(args.xml_dir, '*.xml'))):
# Read annotation xml
ann_tree = ET.parse(xml_file)
ann_root = ann_tree.getroot()
for obj in ann_root.findall('object'):
cat_name = obj.findtext('name')
categories_list.add(cat_name)
categories_list = sorted(categories_list)
with open(args.outfile_path, 'w') as f:
for cat in categories_list[0:-1]:
f.write(cat + '\n')
f.write(categories_list[-1])
print('lable_list file: {} create done!'.format(args.outfile_path))
if __name__ == '__main__':
main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册