未验证 提交 df84bc6b 编写于 作者: J Jason 提交者: GitHub

Merge pull request #203 from SunAhong1993/syf_docs

add dataset path check
......@@ -46,7 +46,7 @@ def is_valid(sample):
return False
elif isinstance(s, np.ndarray) and s.size == 0:
return False
elif isinstance(s, collections.Sequence) and len(s) == 0:
elif isinstance(s, collections.abc.Sequence) and len(s) == 0:
return False
return True
......@@ -55,6 +55,7 @@ def get_encoding(path):
f = open(path, 'rb')
data = f.read()
file_encoding = chardet.detect(data).get('encoding')
f.close()
return file_encoding
......
......@@ -18,6 +18,7 @@ import random
import copy
import json
import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .imagenet import ImageNet
from .dataset import is_pic
from .dataset import get_encoding
......@@ -68,6 +69,8 @@ class EasyDataCls(ImageNet):
for line in f:
img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]]
img_file = path_normalization(img_file)
json_file = path_normalization(json_file)
if not is_pic(img_file):
continue
if not osp.isfile(json_file):
......
......@@ -20,6 +20,7 @@ import json
import cv2
import numpy as np
import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .voc import VOCDetection
from .dataset import is_pic
from .dataset import get_encoding
......@@ -87,6 +88,8 @@ class EasyDataDet(VOCDetection):
for line in f:
img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]]
img_file = path_normalization(img_file)
json_file = path_normalization(json_file)
if not is_pic(img_file):
continue
if not osp.isfile(json_file):
......
......@@ -20,6 +20,7 @@ import json
import cv2
import numpy as np
import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .dataset import Dataset
from .dataset import get_encoding
from .dataset import is_pic
......@@ -71,6 +72,8 @@ class EasyDataSeg(Dataset):
for line in f:
img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]]
img_file = path_normalization(img_file)
json_file = path_normalization(json_file)
if not is_pic(img_file):
continue
if not osp.isfile(json_file):
......
......@@ -17,6 +17,7 @@ import os.path as osp
import random
import copy
import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .dataset import Dataset
from .dataset import is_pic
from .dataset import get_encoding
......@@ -66,6 +67,7 @@ class ImageNet(Dataset):
with open(file_list, encoding=get_encoding(file_list)) as f:
for line in f:
items = line.strip().split()
items[0] = path_normalization(items[0])
if not is_pic(items[0]):
continue
full_path = osp.join(data_dir, items[0])
......
......@@ -17,6 +17,7 @@ import os.path as osp
import random
import copy
import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .dataset import Dataset
from .dataset import get_encoding
from .dataset import is_pic
......@@ -61,10 +62,11 @@ class SegDataset(Dataset):
for line in f:
item = line.strip()
self.labels.append(item)
with open(file_list, encoding=get_encoding(file_list)) as f:
for line in f:
items = line.strip().split()
items[0] = path_normalization(items[0])
items[1] = path_normalization(items[1])
if not is_pic(items[0]):
continue
full_path_im = osp.join(data_dir, items[0])
......
......@@ -22,6 +22,7 @@ import numpy as np
from collections import OrderedDict
import xml.etree.ElementTree as ET
import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .dataset import Dataset
from .dataset import is_pic
from .dataset import get_encoding
......@@ -92,6 +93,8 @@ class VOCDetection(Dataset):
break
img_file, xml_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]]
img_file = path_normalization(img_file)
xml_file = path_normalization(xml_file)
if not is_pic(img_file):
continue
if not osp.isfile(xml_file):
......@@ -106,8 +109,11 @@ class VOCDetection(Dataset):
ct = int(tree.find('id').text)
im_id = np.array([int(tree.find('id').text)])
pattern = re.compile('<object>', re.IGNORECASE)
obj_tag = pattern.findall(
str(ET.tostringlist(tree.getroot())))[0][1:-1]
obj_match = pattern.findall(
str(ET.tostringlist(tree.getroot())))
if len(obj_match) == 0:
continue
obj_tag = obj_match[0][1:-1]
objs = tree.findall(obj_tag)
pattern = re.compile('<size>', re.IGNORECASE)
size_tag = pattern.findall(
......
......@@ -22,6 +22,7 @@ import shutil
import numpy as np
import PIL.ImageDraw
from .base import MyEncoder, is_pic, get_encoding
from paddlex.utils import path_normalization
class X2COCO(object):
......@@ -100,6 +101,7 @@ class LabelMe2COCO(X2COCO):
image["height"] = json_info["imageHeight"]
image["width"] = json_info["imageWidth"]
image["id"] = image_id + 1
json_info["imagePath"] = path_normalization(json_info["imagePath"])
image["file_name"] = osp.split(json_info["imagePath"])[-1]
return image
......@@ -187,6 +189,7 @@ class EasyData2COCO(X2COCO):
image["height"] = img.shape[0]
image["width"] = img.shape[1]
image["id"] = image_id + 1
img_path = path_normalization(img_path)
image["file_name"] = osp.split(img_path)[-1]
return image
......@@ -268,6 +271,7 @@ class JingLing2COCO(X2COCO):
image["height"] = json_info["size"]["height"]
image["width"] = json_info["size"]["width"]
image["id"] = image_id + 1
json_info["path"] = path_normalization(json_info["path"])
image["file_name"] = osp.split(json_info["path"])[-1]
return image
......
......@@ -17,6 +17,7 @@ from . import logging
from . import utils
from . import save
from .utils import seconds_to_hms
from .utils import path_normalization
from .download import download
from .download import decompress
from .download import download_and_decompress
......@@ -20,6 +20,7 @@ import numpy as np
import six
import yaml
import math
import platform
from . import logging
......@@ -49,18 +50,26 @@ def get_environ_info():
info['num'] = fluid.core.get_cuda_device_count()
return info
def path_normalization(path):
win_sep = "\\"
other_sep = "/"
if platform.system() == "Windows":
path = win_sep.join(path.split(other_sep))
else:
path = other_sep.join(path.split(win_sep))
return path
def parse_param_file(param_file, return_shape=True):
from paddle.fluid.proto.framework_pb2 import VarType
f = open(param_file, 'rb')
version = np.fromstring(f.read(4), dtype='int32')
lod_level = np.fromstring(f.read(8), dtype='int64')
version = np.frombuffer(f.read(4), dtype='int32')
lod_level = np.frombuffer(f.read(8), dtype='int64')
for i in range(int(lod_level)):
_size = np.fromstring(f.read(8), dtype='int64')
_size = np.frombuffer(f.read(8), dtype='int64')
_ = f.read(_size)
version = np.fromstring(f.read(4), dtype='int32')
version = np.frombuffer(f.read(4), dtype='int32')
tensor_desc = VarType.TensorDesc()
tensor_desc_size = np.fromstring(f.read(4), dtype='int32')
tensor_desc_size = np.frombuffer(f.read(4), dtype='int32')
tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
tensor_shape = tuple(tensor_desc.dims)
if return_shape:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册