未验证 提交 df84bc6b 编写于 作者: J Jason 提交者: GitHub

Merge pull request #203 from SunAhong1993/syf_docs

add dataset path check
...@@ -46,7 +46,7 @@ def is_valid(sample): ...@@ -46,7 +46,7 @@ def is_valid(sample):
return False return False
elif isinstance(s, np.ndarray) and s.size == 0: elif isinstance(s, np.ndarray) and s.size == 0:
return False return False
elif isinstance(s, collections.Sequence) and len(s) == 0: elif isinstance(s, collections.abc.Sequence) and len(s) == 0:
return False return False
return True return True
...@@ -55,6 +55,7 @@ def get_encoding(path): ...@@ -55,6 +55,7 @@ def get_encoding(path):
f = open(path, 'rb') f = open(path, 'rb')
data = f.read() data = f.read()
file_encoding = chardet.detect(data).get('encoding') file_encoding = chardet.detect(data).get('encoding')
f.close()
return file_encoding return file_encoding
......
...@@ -18,6 +18,7 @@ import random ...@@ -18,6 +18,7 @@ import random
import copy import copy
import json import json
import paddlex.utils.logging as logging import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .imagenet import ImageNet from .imagenet import ImageNet
from .dataset import is_pic from .dataset import is_pic
from .dataset import get_encoding from .dataset import get_encoding
...@@ -68,6 +69,8 @@ class EasyDataCls(ImageNet): ...@@ -68,6 +69,8 @@ class EasyDataCls(ImageNet):
for line in f: for line in f:
img_file, json_file = [osp.join(data_dir, x) \ img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]] for x in line.strip().split()[:2]]
img_file = path_normalization(img_file)
json_file = path_normalization(json_file)
if not is_pic(img_file): if not is_pic(img_file):
continue continue
if not osp.isfile(json_file): if not osp.isfile(json_file):
......
...@@ -20,6 +20,7 @@ import json ...@@ -20,6 +20,7 @@ import json
import cv2 import cv2
import numpy as np import numpy as np
import paddlex.utils.logging as logging import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .voc import VOCDetection from .voc import VOCDetection
from .dataset import is_pic from .dataset import is_pic
from .dataset import get_encoding from .dataset import get_encoding
...@@ -87,6 +88,8 @@ class EasyDataDet(VOCDetection): ...@@ -87,6 +88,8 @@ class EasyDataDet(VOCDetection):
for line in f: for line in f:
img_file, json_file = [osp.join(data_dir, x) \ img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]] for x in line.strip().split()[:2]]
img_file = path_normalization(img_file)
json_file = path_normalization(json_file)
if not is_pic(img_file): if not is_pic(img_file):
continue continue
if not osp.isfile(json_file): if not osp.isfile(json_file):
......
...@@ -20,6 +20,7 @@ import json ...@@ -20,6 +20,7 @@ import json
import cv2 import cv2
import numpy as np import numpy as np
import paddlex.utils.logging as logging import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .dataset import Dataset from .dataset import Dataset
from .dataset import get_encoding from .dataset import get_encoding
from .dataset import is_pic from .dataset import is_pic
...@@ -71,6 +72,8 @@ class EasyDataSeg(Dataset): ...@@ -71,6 +72,8 @@ class EasyDataSeg(Dataset):
for line in f: for line in f:
img_file, json_file = [osp.join(data_dir, x) \ img_file, json_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]] for x in line.strip().split()[:2]]
img_file = path_normalization(img_file)
json_file = path_normalization(json_file)
if not is_pic(img_file): if not is_pic(img_file):
continue continue
if not osp.isfile(json_file): if not osp.isfile(json_file):
......
...@@ -17,6 +17,7 @@ import os.path as osp ...@@ -17,6 +17,7 @@ import os.path as osp
import random import random
import copy import copy
import paddlex.utils.logging as logging import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .dataset import Dataset from .dataset import Dataset
from .dataset import is_pic from .dataset import is_pic
from .dataset import get_encoding from .dataset import get_encoding
...@@ -66,6 +67,7 @@ class ImageNet(Dataset): ...@@ -66,6 +67,7 @@ class ImageNet(Dataset):
with open(file_list, encoding=get_encoding(file_list)) as f: with open(file_list, encoding=get_encoding(file_list)) as f:
for line in f: for line in f:
items = line.strip().split() items = line.strip().split()
items[0] = path_normalization(items[0])
if not is_pic(items[0]): if not is_pic(items[0]):
continue continue
full_path = osp.join(data_dir, items[0]) full_path = osp.join(data_dir, items[0])
......
...@@ -17,6 +17,7 @@ import os.path as osp ...@@ -17,6 +17,7 @@ import os.path as osp
import random import random
import copy import copy
import paddlex.utils.logging as logging import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .dataset import Dataset from .dataset import Dataset
from .dataset import get_encoding from .dataset import get_encoding
from .dataset import is_pic from .dataset import is_pic
...@@ -61,10 +62,11 @@ class SegDataset(Dataset): ...@@ -61,10 +62,11 @@ class SegDataset(Dataset):
for line in f: for line in f:
item = line.strip() item = line.strip()
self.labels.append(item) self.labels.append(item)
with open(file_list, encoding=get_encoding(file_list)) as f: with open(file_list, encoding=get_encoding(file_list)) as f:
for line in f: for line in f:
items = line.strip().split() items = line.strip().split()
items[0] = path_normalization(items[0])
items[1] = path_normalization(items[1])
if not is_pic(items[0]): if not is_pic(items[0]):
continue continue
full_path_im = osp.join(data_dir, items[0]) full_path_im = osp.join(data_dir, items[0])
......
...@@ -22,6 +22,7 @@ import numpy as np ...@@ -22,6 +22,7 @@ import numpy as np
from collections import OrderedDict from collections import OrderedDict
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
import paddlex.utils.logging as logging import paddlex.utils.logging as logging
from paddlex.utils import path_normalization
from .dataset import Dataset from .dataset import Dataset
from .dataset import is_pic from .dataset import is_pic
from .dataset import get_encoding from .dataset import get_encoding
...@@ -92,6 +93,8 @@ class VOCDetection(Dataset): ...@@ -92,6 +93,8 @@ class VOCDetection(Dataset):
break break
img_file, xml_file = [osp.join(data_dir, x) \ img_file, xml_file = [osp.join(data_dir, x) \
for x in line.strip().split()[:2]] for x in line.strip().split()[:2]]
img_file = path_normalization(img_file)
xml_file = path_normalization(xml_file)
if not is_pic(img_file): if not is_pic(img_file):
continue continue
if not osp.isfile(xml_file): if not osp.isfile(xml_file):
...@@ -106,8 +109,11 @@ class VOCDetection(Dataset): ...@@ -106,8 +109,11 @@ class VOCDetection(Dataset):
ct = int(tree.find('id').text) ct = int(tree.find('id').text)
im_id = np.array([int(tree.find('id').text)]) im_id = np.array([int(tree.find('id').text)])
pattern = re.compile('<object>', re.IGNORECASE) pattern = re.compile('<object>', re.IGNORECASE)
obj_tag = pattern.findall( obj_match = pattern.findall(
str(ET.tostringlist(tree.getroot())))[0][1:-1] str(ET.tostringlist(tree.getroot())))
if len(obj_match) == 0:
continue
obj_tag = obj_match[0][1:-1]
objs = tree.findall(obj_tag) objs = tree.findall(obj_tag)
pattern = re.compile('<size>', re.IGNORECASE) pattern = re.compile('<size>', re.IGNORECASE)
size_tag = pattern.findall( size_tag = pattern.findall(
......
...@@ -22,6 +22,7 @@ import shutil ...@@ -22,6 +22,7 @@ import shutil
import numpy as np import numpy as np
import PIL.ImageDraw import PIL.ImageDraw
from .base import MyEncoder, is_pic, get_encoding from .base import MyEncoder, is_pic, get_encoding
from paddlex.utils import path_normalization
class X2COCO(object): class X2COCO(object):
...@@ -100,6 +101,7 @@ class LabelMe2COCO(X2COCO): ...@@ -100,6 +101,7 @@ class LabelMe2COCO(X2COCO):
image["height"] = json_info["imageHeight"] image["height"] = json_info["imageHeight"]
image["width"] = json_info["imageWidth"] image["width"] = json_info["imageWidth"]
image["id"] = image_id + 1 image["id"] = image_id + 1
json_info["imagePath"] = path_normalization(json_info["imagePath"])
image["file_name"] = osp.split(json_info["imagePath"])[-1] image["file_name"] = osp.split(json_info["imagePath"])[-1]
return image return image
...@@ -187,6 +189,7 @@ class EasyData2COCO(X2COCO): ...@@ -187,6 +189,7 @@ class EasyData2COCO(X2COCO):
image["height"] = img.shape[0] image["height"] = img.shape[0]
image["width"] = img.shape[1] image["width"] = img.shape[1]
image["id"] = image_id + 1 image["id"] = image_id + 1
img_path = path_normalization(img_path)
image["file_name"] = osp.split(img_path)[-1] image["file_name"] = osp.split(img_path)[-1]
return image return image
...@@ -268,6 +271,7 @@ class JingLing2COCO(X2COCO): ...@@ -268,6 +271,7 @@ class JingLing2COCO(X2COCO):
image["height"] = json_info["size"]["height"] image["height"] = json_info["size"]["height"]
image["width"] = json_info["size"]["width"] image["width"] = json_info["size"]["width"]
image["id"] = image_id + 1 image["id"] = image_id + 1
json_info["path"] = path_normalization(json_info["path"])
image["file_name"] = osp.split(json_info["path"])[-1] image["file_name"] = osp.split(json_info["path"])[-1]
return image return image
......
...@@ -17,6 +17,7 @@ from . import logging ...@@ -17,6 +17,7 @@ from . import logging
from . import utils from . import utils
from . import save from . import save
from .utils import seconds_to_hms from .utils import seconds_to_hms
from .utils import path_normalization
from .download import download from .download import download
from .download import decompress from .download import decompress
from .download import download_and_decompress from .download import download_and_decompress
...@@ -20,6 +20,7 @@ import numpy as np ...@@ -20,6 +20,7 @@ import numpy as np
import six import six
import yaml import yaml
import math import math
import platform
from . import logging from . import logging
...@@ -49,18 +50,26 @@ def get_environ_info(): ...@@ -49,18 +50,26 @@ def get_environ_info():
info['num'] = fluid.core.get_cuda_device_count() info['num'] = fluid.core.get_cuda_device_count()
return info return info
def path_normalization(path):
win_sep = "\\"
other_sep = "/"
if platform.system() == "Windows":
path = win_sep.join(path.split(other_sep))
else:
path = other_sep.join(path.split(win_sep))
return path
def parse_param_file(param_file, return_shape=True): def parse_param_file(param_file, return_shape=True):
from paddle.fluid.proto.framework_pb2 import VarType from paddle.fluid.proto.framework_pb2 import VarType
f = open(param_file, 'rb') f = open(param_file, 'rb')
version = np.fromstring(f.read(4), dtype='int32') version = np.frombuffer(f.read(4), dtype='int32')
lod_level = np.fromstring(f.read(8), dtype='int64') lod_level = np.frombuffer(f.read(8), dtype='int64')
for i in range(int(lod_level)): for i in range(int(lod_level)):
_size = np.fromstring(f.read(8), dtype='int64') _size = np.frombuffer(f.read(8), dtype='int64')
_ = f.read(_size) _ = f.read(_size)
version = np.fromstring(f.read(4), dtype='int32') version = np.frombuffer(f.read(4), dtype='int32')
tensor_desc = VarType.TensorDesc() tensor_desc = VarType.TensorDesc()
tensor_desc_size = np.fromstring(f.read(4), dtype='int32') tensor_desc_size = np.frombuffer(f.read(4), dtype='int32')
tensor_desc.ParseFromString(f.read(int(tensor_desc_size))) tensor_desc.ParseFromString(f.read(int(tensor_desc_size)))
tensor_shape = tuple(tensor_desc.dims) tensor_shape = tuple(tensor_desc.dims)
if return_shape: if return_shape:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册