提交 3f816875 编写于 作者: S SunAhong1993

add dataset path chaeck

上级 a3465454
...@@ -55,6 +55,7 @@ def get_encoding(path): ...@@ -55,6 +55,7 @@ def get_encoding(path):
f = open(path, 'rb') f = open(path, 'rb')
data = f.read() data = f.read()
file_encoding = chardet.detect(data).get('encoding') file_encoding = chardet.detect(data).get('encoding')
f.close()
return file_encoding return file_encoding
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import os.path as osp import os.path as osp
import platform
import random import random
import copy import copy
import json import json
...@@ -64,10 +65,18 @@ class EasyDataCls(ImageNet): ...@@ -64,10 +65,18 @@ class EasyDataCls(ImageNet):
item = line.strip() item = line.strip()
self.labels.append(item) self.labels.append(item)
logging.info("Starting to read file list from dataset...") logging.info("Starting to read file list from dataset...")
win_sep = "\\"
other_sep = "/"
with open(file_list, encoding=get_encoding(file_list)) as f: with open(file_list, encoding=get_encoding(file_list)) as f:
for line in f: for line in f:
img_file, json_file = [osp.join(data_dir, x) \ img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]] for x in line.strip().split()[:2]]
if platform.system() == "Windows":
img_file = win_sep.join(img_file.split(other_sep))
json_file = win_sep.join(json_file.split(other_sep))
else:
img_file = other_sep.join(img_file.split(win_sep))
json_file = other_sep.join(json_file.split(win_sep))
if not is_pic(img_file): if not is_pic(img_file):
continue continue
if not osp.isfile(json_file): if not osp.isfile(json_file):
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import os.path as osp import os.path as osp
import platform
import random import random
import copy import copy
import json import json
...@@ -83,10 +84,18 @@ class EasyDataDet(VOCDetection): ...@@ -83,10 +84,18 @@ class EasyDataDet(VOCDetection):
from pycocotools.mask import decode from pycocotools.mask import decode
ct = 0 ct = 0
ann_ct = 0 ann_ct = 0
win_sep = "\\"
other_sep = "/"
with open(file_list, encoding=get_encoding(file_list)) as f: with open(file_list, encoding=get_encoding(file_list)) as f:
for line in f: for line in f:
img_file, json_file = [osp.join(data_dir, x) \ img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]] for x in line.strip().split()[:2]]
if platform.system() == "Windows":
img_file = win_sep.join(img_file.split(other_sep))
json_file = win_sep.join(json_file.split(other_sep))
else:
img_file = other_sep.join(img_file.split(win_sep))
json_file = other_sep.join(json_file.split(win_sep))
if not is_pic(img_file): if not is_pic(img_file):
continue continue
if not osp.isfile(json_file): if not osp.isfile(json_file):
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import os.path as osp import os.path as osp
import platform
import random import random
import copy import copy
import json import json
...@@ -61,6 +62,8 @@ class EasyDataSeg(Dataset): ...@@ -61,6 +62,8 @@ class EasyDataSeg(Dataset):
from pycocotools.mask import decode from pycocotools.mask import decode
cname2cid = {} cname2cid = {}
label_id = 0 label_id = 0
win_sep = "\\"
other_sep = "/"
with open(label_list, encoding=get_encoding(label_list)) as fr: with open(label_list, encoding=get_encoding(label_list)) as fr:
for line in fr.readlines(): for line in fr.readlines():
cname2cid[line.strip()] = label_id cname2cid[line.strip()] = label_id
...@@ -71,6 +74,12 @@ class EasyDataSeg(Dataset): ...@@ -71,6 +74,12 @@ class EasyDataSeg(Dataset):
for line in f: for line in f:
img_file, json_file = [osp.join(data_dir, x) \ img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]] for x in line.strip().split()[:2]]
if platform.system() == "Windows":
img_file = win_sep.join(img_file.split(other_sep))
json_file = win_sep.join(json_file.split(other_sep))
else:
img_file = other_sep.join(img_file.split(win_sep))
json_file = other_sep.join(json_file.split(win_sep))
if not is_pic(img_file): if not is_pic(img_file):
continue continue
if not osp.isfile(json_file): if not osp.isfile(json_file):
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import os.path as osp import os.path as osp
import platform
import random import random
import copy import copy
import paddlex.utils.logging as logging import paddlex.utils.logging as logging
...@@ -63,9 +64,15 @@ class ImageNet(Dataset): ...@@ -63,9 +64,15 @@ class ImageNet(Dataset):
item = line.strip() item = line.strip()
self.labels.append(item) self.labels.append(item)
logging.info("Starting to read file list from dataset...") logging.info("Starting to read file list from dataset...")
win_sep = "\\"
other_sep = "/"
with open(file_list, encoding=get_encoding(file_list)) as f: with open(file_list, encoding=get_encoding(file_list)) as f:
for line in f: for line in f:
items = line.strip().split() items = line.strip().split()
if platform.system() == "Windows":
items[0] = win_sep.join(items[0].split(other_sep))
else:
items[0] = other_sep.join(items[0].split(win_sep))
if not is_pic(items[0]): if not is_pic(items[0]):
continue continue
full_path = osp.join(data_dir, items[0]) full_path = osp.join(data_dir, items[0])
......
...@@ -61,10 +61,17 @@ class SegDataset(Dataset): ...@@ -61,10 +61,17 @@ class SegDataset(Dataset):
for line in f: for line in f:
item = line.strip() item = line.strip()
self.labels.append(item) self.labels.append(item)
win_sep = "\\"
other_sep = "/"
with open(file_list, encoding=get_encoding(file_list)) as f: with open(file_list, encoding=get_encoding(file_list)) as f:
for line in f: for line in f:
items = line.strip().split() items = line.strip().split()
if platform.system() == "Windows":
items[0] = win_sep.join(items[0].split(other_sep))
items[1] = win_sep.join(items[1].split(other_sep))
else:
items[0] = other_sep.join(items[0].split(win_sep))
items[1] = other_sep.join(items[1].split(win_sep))
if not is_pic(items[0]): if not is_pic(items[0]):
continue continue
full_path_im = osp.join(data_dir, items[0]) full_path_im = osp.join(data_dir, items[0])
......
...@@ -16,6 +16,7 @@ from __future__ import absolute_import ...@@ -16,6 +16,7 @@ from __future__ import absolute_import
import copy import copy
import os import os
import os.path as osp import os.path as osp
import platform
import random import random
import re import re
import numpy as np import numpy as np
...@@ -85,6 +86,8 @@ class VOCDetection(Dataset): ...@@ -85,6 +86,8 @@ class VOCDetection(Dataset):
}) })
ct = 0 ct = 0
ann_ct = 0 ann_ct = 0
win_sep = "\\"
other_sep = "/"
with open(file_list, 'r', encoding=get_encoding(file_list)) as fr: with open(file_list, 'r', encoding=get_encoding(file_list)) as fr:
while True: while True:
line = fr.readline() line = fr.readline()
...@@ -92,6 +95,12 @@ class VOCDetection(Dataset): ...@@ -92,6 +95,12 @@ class VOCDetection(Dataset):
break break
img_file, xml_file = [osp.join(data_dir, x) \ img_file, xml_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]] for x in line.strip().split()[:2]]
if platform.system() == "Windows":
img_file = win_sep.join(img_file.split(other_sep))
xml_file = win_sep.join(xml_file.split(other_sep))
else:
img_file = other_sep.join(img_file.split(win_sep))
xml_file = other_sep.join(xml_file.split(win_sep))
if not is_pic(img_file): if not is_pic(img_file):
continue continue
if not osp.isfile(xml_file): if not osp.isfile(xml_file):
...@@ -106,8 +115,11 @@ class VOCDetection(Dataset): ...@@ -106,8 +115,11 @@ class VOCDetection(Dataset):
ct = int(tree.find('id').text) ct = int(tree.find('id').text)
im_id = np.array([int(tree.find('id').text)]) im_id = np.array([int(tree.find('id').text)])
pattern = re.compile('<object>', re.IGNORECASE) pattern = re.compile('<object>', re.IGNORECASE)
obj_tag = pattern.findall( obj_match = pattern.findall(
str(ET.tostringlist(tree.getroot())))[0][1:-1] str(ET.tostringlist(tree.getroot())))
if len(obj_match) == 0:
continue
obj_tag = obj_match[0][1:-1]
objs = tree.findall(obj_tag) objs = tree.findall(obj_tag)
pattern = re.compile('<size>', re.IGNORECASE) pattern = re.compile('<size>', re.IGNORECASE)
size_tag = pattern.findall( size_tag = pattern.findall(
......
...@@ -18,6 +18,7 @@ import cv2 ...@@ -18,6 +18,7 @@ import cv2
import json import json
import os import os
import os.path as osp import os.path as osp
import platform
import shutil import shutil
import numpy as np import numpy as np
import PIL.ImageDraw import PIL.ImageDraw
...@@ -100,6 +101,12 @@ class LabelMe2COCO(X2COCO): ...@@ -100,6 +101,12 @@ class LabelMe2COCO(X2COCO):
image["height"] = json_info["imageHeight"] image["height"] = json_info["imageHeight"]
image["width"] = json_info["imageWidth"] image["width"] = json_info["imageWidth"]
image["id"] = image_id + 1 image["id"] = image_id + 1
win_sep = "\\"
other_sep = "/"
if platform.system() == "Windows":
json_info["imagePath"] = win_sep.join(json_info["imagePath"].split(other_sep))
else:
json_info["imagePath"] = other_sep.join(json_info["imagePath"].split(win_sep))
image["file_name"] = osp.split(json_info["imagePath"])[-1] image["file_name"] = osp.split(json_info["imagePath"])[-1]
return image return image
...@@ -187,6 +194,12 @@ class EasyData2COCO(X2COCO): ...@@ -187,6 +194,12 @@ class EasyData2COCO(X2COCO):
image["height"] = img.shape[0] image["height"] = img.shape[0]
image["width"] = img.shape[1] image["width"] = img.shape[1]
image["id"] = image_id + 1 image["id"] = image_id + 1
win_sep = "\\"
other_sep = "/"
if platform.system() == "Windows":
img_path = win_sep.join(img_path.split(other_sep))
else:
img_path = other_sep.join(img_path.split(win_sep))
image["file_name"] = osp.split(img_path)[-1] image["file_name"] = osp.split(img_path)[-1]
return image return image
...@@ -268,6 +281,12 @@ class JingLing2COCO(X2COCO): ...@@ -268,6 +281,12 @@ class JingLing2COCO(X2COCO):
image["height"] = json_info["size"]["height"] image["height"] = json_info["size"]["height"]
image["width"] = json_info["size"]["width"] image["width"] = json_info["size"]["width"]
image["id"] = image_id + 1 image["id"] = image_id + 1
win_sep = "\\"
other_sep = "/"
if platform.system() == "Windows":
json_info["path"] = win_sep.join(json_info["path"].split(other_sep))
else:
json_info["path"] = other_sep.join(json_info["path"].split(win_sep))
image["file_name"] = osp.split(json_info["path"])[-1] image["file_name"] = osp.split(json_info["path"])[-1]
return image return image
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册