Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleX
提交
df84bc6b
P
PaddleX
项目概览
PaddlePaddle
/
PaddleX
通知
138
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
43
列表
看板
标记
里程碑
合并请求
5
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleX
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
43
Issue
43
列表
看板
标记
里程碑
合并请求
5
合并请求
5
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
df84bc6b
编写于
7月 10, 2020
作者:
J
Jason
提交者:
GitHub
7月 10, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #203 from SunAhong1993/syf_docs
add dataset path check
上级
d0bf6adb
e7afabeb
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
43 addition
and
9 deletion
+43
-9
paddlex/cv/datasets/dataset.py
paddlex/cv/datasets/dataset.py
+2
-1
paddlex/cv/datasets/easydata_cls.py
paddlex/cv/datasets/easydata_cls.py
+3
-0
paddlex/cv/datasets/easydata_det.py
paddlex/cv/datasets/easydata_det.py
+3
-0
paddlex/cv/datasets/easydata_seg.py
paddlex/cv/datasets/easydata_seg.py
+3
-0
paddlex/cv/datasets/imagenet.py
paddlex/cv/datasets/imagenet.py
+2
-0
paddlex/cv/datasets/seg_dataset.py
paddlex/cv/datasets/seg_dataset.py
+3
-1
paddlex/cv/datasets/voc.py
paddlex/cv/datasets/voc.py
+8
-2
paddlex/tools/x2coco.py
paddlex/tools/x2coco.py
+4
-0
paddlex/utils/__init__.py
paddlex/utils/__init__.py
+1
-0
paddlex/utils/utils.py
paddlex/utils/utils.py
+14
-5
未找到文件。
paddlex/cv/datasets/dataset.py
浏览文件 @
df84bc6b
...
...
@@ -46,7 +46,7 @@ def is_valid(sample):
return
False
elif
isinstance
(
s
,
np
.
ndarray
)
and
s
.
size
==
0
:
return
False
elif
isinstance
(
s
,
collections
.
Sequence
)
and
len
(
s
)
==
0
:
elif
isinstance
(
s
,
collections
.
abc
.
Sequence
)
and
len
(
s
)
==
0
:
return
False
return
True
...
...
@@ -55,6 +55,7 @@ def get_encoding(path):
f
=
open
(
path
,
'rb'
)
data
=
f
.
read
()
file_encoding
=
chardet
.
detect
(
data
).
get
(
'encoding'
)
f
.
close
()
return
file_encoding
...
...
paddlex/cv/datasets/easydata_cls.py
浏览文件 @
df84bc6b
...
...
@@ -18,6 +18,7 @@ import random
import
copy
import
json
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.imagenet
import
ImageNet
from
.dataset
import
is_pic
from
.dataset
import
get_encoding
...
...
@@ -68,6 +69,8 @@ class EasyDataCls(ImageNet):
for
line
in
f
:
img_file
,
json_file
=
[
osp
.
join
(
data_dir
,
x
)
\
for
x
in
line
.
strip
().
split
()[:
2
]]
img_file
=
path_normalization
(
img_file
)
json_file
=
path_normalization
(
json_file
)
if
not
is_pic
(
img_file
):
continue
if
not
osp
.
isfile
(
json_file
):
...
...
paddlex/cv/datasets/easydata_det.py
浏览文件 @
df84bc6b
...
...
@@ -20,6 +20,7 @@ import json
import
cv2
import
numpy
as
np
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.voc
import
VOCDetection
from
.dataset
import
is_pic
from
.dataset
import
get_encoding
...
...
@@ -87,6 +88,8 @@ class EasyDataDet(VOCDetection):
for
line
in
f
:
img_file
,
json_file
=
[
osp
.
join
(
data_dir
,
x
)
\
for
x
in
line
.
strip
().
split
()[:
2
]]
img_file
=
path_normalization
(
img_file
)
json_file
=
path_normalization
(
json_file
)
if
not
is_pic
(
img_file
):
continue
if
not
osp
.
isfile
(
json_file
):
...
...
paddlex/cv/datasets/easydata_seg.py
浏览文件 @
df84bc6b
...
...
@@ -20,6 +20,7 @@ import json
import
cv2
import
numpy
as
np
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.dataset
import
Dataset
from
.dataset
import
get_encoding
from
.dataset
import
is_pic
...
...
@@ -71,6 +72,8 @@ class EasyDataSeg(Dataset):
for
line
in
f
:
img_file
,
json_file
=
[
osp
.
join
(
data_dir
,
x
)
\
for
x
in
line
.
strip
().
split
()[:
2
]]
img_file
=
path_normalization
(
img_file
)
json_file
=
path_normalization
(
json_file
)
if
not
is_pic
(
img_file
):
continue
if
not
osp
.
isfile
(
json_file
):
...
...
paddlex/cv/datasets/imagenet.py
浏览文件 @
df84bc6b
...
...
@@ -17,6 +17,7 @@ import os.path as osp
import
random
import
copy
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.dataset
import
Dataset
from
.dataset
import
is_pic
from
.dataset
import
get_encoding
...
...
@@ -66,6 +67,7 @@ class ImageNet(Dataset):
with
open
(
file_list
,
encoding
=
get_encoding
(
file_list
))
as
f
:
for
line
in
f
:
items
=
line
.
strip
().
split
()
items
[
0
]
=
path_normalization
(
items
[
0
])
if
not
is_pic
(
items
[
0
]):
continue
full_path
=
osp
.
join
(
data_dir
,
items
[
0
])
...
...
paddlex/cv/datasets/seg_dataset.py
浏览文件 @
df84bc6b
...
...
@@ -17,6 +17,7 @@ import os.path as osp
import
random
import
copy
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.dataset
import
Dataset
from
.dataset
import
get_encoding
from
.dataset
import
is_pic
...
...
@@ -61,10 +62,11 @@ class SegDataset(Dataset):
for
line
in
f
:
item
=
line
.
strip
()
self
.
labels
.
append
(
item
)
with
open
(
file_list
,
encoding
=
get_encoding
(
file_list
))
as
f
:
for
line
in
f
:
items
=
line
.
strip
().
split
()
items
[
0
]
=
path_normalization
(
items
[
0
])
items
[
1
]
=
path_normalization
(
items
[
1
])
if
not
is_pic
(
items
[
0
]):
continue
full_path_im
=
osp
.
join
(
data_dir
,
items
[
0
])
...
...
paddlex/cv/datasets/voc.py
浏览文件 @
df84bc6b
...
...
@@ -22,6 +22,7 @@ import numpy as np
from
collections
import
OrderedDict
import
xml.etree.ElementTree
as
ET
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.dataset
import
Dataset
from
.dataset
import
is_pic
from
.dataset
import
get_encoding
...
...
@@ -92,6 +93,8 @@ class VOCDetection(Dataset):
break
img_file
,
xml_file
=
[
osp
.
join
(
data_dir
,
x
)
\
for
x
in
line
.
strip
().
split
()[:
2
]]
img_file
=
path_normalization
(
img_file
)
xml_file
=
path_normalization
(
xml_file
)
if
not
is_pic
(
img_file
):
continue
if
not
osp
.
isfile
(
xml_file
):
...
...
@@ -106,8 +109,11 @@ class VOCDetection(Dataset):
ct
=
int
(
tree
.
find
(
'id'
).
text
)
im_id
=
np
.
array
([
int
(
tree
.
find
(
'id'
).
text
)])
pattern
=
re
.
compile
(
'<object>'
,
re
.
IGNORECASE
)
obj_tag
=
pattern
.
findall
(
str
(
ET
.
tostringlist
(
tree
.
getroot
())))[
0
][
1
:
-
1
]
obj_match
=
pattern
.
findall
(
str
(
ET
.
tostringlist
(
tree
.
getroot
())))
if
len
(
obj_match
)
==
0
:
continue
obj_tag
=
obj_match
[
0
][
1
:
-
1
]
objs
=
tree
.
findall
(
obj_tag
)
pattern
=
re
.
compile
(
'<size>'
,
re
.
IGNORECASE
)
size_tag
=
pattern
.
findall
(
...
...
paddlex/tools/x2coco.py
浏览文件 @
df84bc6b
...
...
@@ -22,6 +22,7 @@ import shutil
import
numpy
as
np
import
PIL.ImageDraw
from
.base
import
MyEncoder
,
is_pic
,
get_encoding
from
paddlex.utils
import
path_normalization
class
X2COCO
(
object
):
...
...
@@ -100,6 +101,7 @@ class LabelMe2COCO(X2COCO):
image
[
"height"
]
=
json_info
[
"imageHeight"
]
image
[
"width"
]
=
json_info
[
"imageWidth"
]
image
[
"id"
]
=
image_id
+
1
json_info
[
"imagePath"
]
=
path_normalization
(
json_info
[
"imagePath"
])
image
[
"file_name"
]
=
osp
.
split
(
json_info
[
"imagePath"
])[
-
1
]
return
image
...
...
@@ -187,6 +189,7 @@ class EasyData2COCO(X2COCO):
image
[
"height"
]
=
img
.
shape
[
0
]
image
[
"width"
]
=
img
.
shape
[
1
]
image
[
"id"
]
=
image_id
+
1
img_path
=
path_normalization
(
img_path
)
image
[
"file_name"
]
=
osp
.
split
(
img_path
)[
-
1
]
return
image
...
...
@@ -268,6 +271,7 @@ class JingLing2COCO(X2COCO):
image
[
"height"
]
=
json_info
[
"size"
][
"height"
]
image
[
"width"
]
=
json_info
[
"size"
][
"width"
]
image
[
"id"
]
=
image_id
+
1
json_info
[
"path"
]
=
path_normalization
(
json_info
[
"path"
])
image
[
"file_name"
]
=
osp
.
split
(
json_info
[
"path"
])[
-
1
]
return
image
...
...
paddlex/utils/__init__.py
浏览文件 @
df84bc6b
...
...
@@ -17,6 +17,7 @@ from . import logging
from
.
import
utils
from
.
import
save
from
.utils
import
seconds_to_hms
from
.utils
import
path_normalization
from
.download
import
download
from
.download
import
decompress
from
.download
import
download_and_decompress
paddlex/utils/utils.py
浏览文件 @
df84bc6b
...
...
@@ -20,6 +20,7 @@ import numpy as np
import
six
import
yaml
import
math
import
platform
from
.
import
logging
...
...
@@ -49,18 +50,26 @@ def get_environ_info():
info
[
'num'
]
=
fluid
.
core
.
get_cuda_device_count
()
return
info
def
path_normalization
(
path
):
win_sep
=
"
\\
"
other_sep
=
"/"
if
platform
.
system
()
==
"Windows"
:
path
=
win_sep
.
join
(
path
.
split
(
other_sep
))
else
:
path
=
other_sep
.
join
(
path
.
split
(
win_sep
))
return
path
def
parse_param_file
(
param_file
,
return_shape
=
True
):
from
paddle.fluid.proto.framework_pb2
import
VarType
f
=
open
(
param_file
,
'rb'
)
version
=
np
.
from
string
(
f
.
read
(
4
),
dtype
=
'int32'
)
lod_level
=
np
.
from
string
(
f
.
read
(
8
),
dtype
=
'int64'
)
version
=
np
.
from
buffer
(
f
.
read
(
4
),
dtype
=
'int32'
)
lod_level
=
np
.
from
buffer
(
f
.
read
(
8
),
dtype
=
'int64'
)
for
i
in
range
(
int
(
lod_level
)):
_size
=
np
.
from
string
(
f
.
read
(
8
),
dtype
=
'int64'
)
_size
=
np
.
from
buffer
(
f
.
read
(
8
),
dtype
=
'int64'
)
_
=
f
.
read
(
_size
)
version
=
np
.
from
string
(
f
.
read
(
4
),
dtype
=
'int32'
)
version
=
np
.
from
buffer
(
f
.
read
(
4
),
dtype
=
'int32'
)
tensor_desc
=
VarType
.
TensorDesc
()
tensor_desc_size
=
np
.
from
string
(
f
.
read
(
4
),
dtype
=
'int32'
)
tensor_desc_size
=
np
.
from
buffer
(
f
.
read
(
4
),
dtype
=
'int32'
)
tensor_desc
.
ParseFromString
(
f
.
read
(
int
(
tensor_desc_size
)))
tensor_shape
=
tuple
(
tensor_desc
.
dims
)
if
return_shape
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录