Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleX
提交
df84bc6b
P
PaddleX
项目概览
PaddlePaddle
/
PaddleX
通知
138
Star
4
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
43
列表
看板
标记
里程碑
合并请求
5
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleX
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
43
Issue
43
列表
看板
标记
里程碑
合并请求
5
合并请求
5
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
df84bc6b
编写于
7月 10, 2020
作者:
J
Jason
提交者:
GitHub
7月 10, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #203 from SunAhong1993/syf_docs
add dataset path check
上级
d0bf6adb
e7afabeb
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
43 addition
and
9 deletion
+43
-9
paddlex/cv/datasets/dataset.py
paddlex/cv/datasets/dataset.py
+2
-1
paddlex/cv/datasets/easydata_cls.py
paddlex/cv/datasets/easydata_cls.py
+3
-0
paddlex/cv/datasets/easydata_det.py
paddlex/cv/datasets/easydata_det.py
+3
-0
paddlex/cv/datasets/easydata_seg.py
paddlex/cv/datasets/easydata_seg.py
+3
-0
paddlex/cv/datasets/imagenet.py
paddlex/cv/datasets/imagenet.py
+2
-0
paddlex/cv/datasets/seg_dataset.py
paddlex/cv/datasets/seg_dataset.py
+3
-1
paddlex/cv/datasets/voc.py
paddlex/cv/datasets/voc.py
+8
-2
paddlex/tools/x2coco.py
paddlex/tools/x2coco.py
+4
-0
paddlex/utils/__init__.py
paddlex/utils/__init__.py
+1
-0
paddlex/utils/utils.py
paddlex/utils/utils.py
+14
-5
未找到文件。
paddlex/cv/datasets/dataset.py
浏览文件 @
df84bc6b
...
@@ -46,7 +46,7 @@ def is_valid(sample):
...
@@ -46,7 +46,7 @@ def is_valid(sample):
return
False
return
False
elif
isinstance
(
s
,
np
.
ndarray
)
and
s
.
size
==
0
:
elif
isinstance
(
s
,
np
.
ndarray
)
and
s
.
size
==
0
:
return
False
return
False
elif
isinstance
(
s
,
collections
.
Sequence
)
and
len
(
s
)
==
0
:
elif
isinstance
(
s
,
collections
.
abc
.
Sequence
)
and
len
(
s
)
==
0
:
return
False
return
False
return
True
return
True
...
@@ -55,6 +55,7 @@ def get_encoding(path):
...
@@ -55,6 +55,7 @@ def get_encoding(path):
f
=
open
(
path
,
'rb'
)
f
=
open
(
path
,
'rb'
)
data
=
f
.
read
()
data
=
f
.
read
()
file_encoding
=
chardet
.
detect
(
data
).
get
(
'encoding'
)
file_encoding
=
chardet
.
detect
(
data
).
get
(
'encoding'
)
f
.
close
()
return
file_encoding
return
file_encoding
...
...
paddlex/cv/datasets/easydata_cls.py
浏览文件 @
df84bc6b
...
@@ -18,6 +18,7 @@ import random
...
@@ -18,6 +18,7 @@ import random
import
copy
import
copy
import
json
import
json
import
paddlex.utils.logging
as
logging
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.imagenet
import
ImageNet
from
.imagenet
import
ImageNet
from
.dataset
import
is_pic
from
.dataset
import
is_pic
from
.dataset
import
get_encoding
from
.dataset
import
get_encoding
...
@@ -68,6 +69,8 @@ class EasyDataCls(ImageNet):
...
@@ -68,6 +69,8 @@ class EasyDataCls(ImageNet):
for
line
in
f
:
for
line
in
f
:
img_file
,
json_file
=
[
osp
.
join
(
data_dir
,
x
)
\
img_file
,
json_file
=
[
osp
.
join
(
data_dir
,
x
)
\
for
x
in
line
.
strip
().
split
()[:
2
]]
for
x
in
line
.
strip
().
split
()[:
2
]]
img_file
=
path_normalization
(
img_file
)
json_file
=
path_normalization
(
json_file
)
if
not
is_pic
(
img_file
):
if
not
is_pic
(
img_file
):
continue
continue
if
not
osp
.
isfile
(
json_file
):
if
not
osp
.
isfile
(
json_file
):
...
...
paddlex/cv/datasets/easydata_det.py
浏览文件 @
df84bc6b
...
@@ -20,6 +20,7 @@ import json
...
@@ -20,6 +20,7 @@ import json
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
import
paddlex.utils.logging
as
logging
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.voc
import
VOCDetection
from
.voc
import
VOCDetection
from
.dataset
import
is_pic
from
.dataset
import
is_pic
from
.dataset
import
get_encoding
from
.dataset
import
get_encoding
...
@@ -87,6 +88,8 @@ class EasyDataDet(VOCDetection):
...
@@ -87,6 +88,8 @@ class EasyDataDet(VOCDetection):
for
line
in
f
:
for
line
in
f
:
img_file
,
json_file
=
[
osp
.
join
(
data_dir
,
x
)
\
img_file
,
json_file
=
[
osp
.
join
(
data_dir
,
x
)
\
for
x
in
line
.
strip
().
split
()[:
2
]]
for
x
in
line
.
strip
().
split
()[:
2
]]
img_file
=
path_normalization
(
img_file
)
json_file
=
path_normalization
(
json_file
)
if
not
is_pic
(
img_file
):
if
not
is_pic
(
img_file
):
continue
continue
if
not
osp
.
isfile
(
json_file
):
if
not
osp
.
isfile
(
json_file
):
...
...
paddlex/cv/datasets/easydata_seg.py
浏览文件 @
df84bc6b
...
@@ -20,6 +20,7 @@ import json
...
@@ -20,6 +20,7 @@ import json
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
import
paddlex.utils.logging
as
logging
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.dataset
import
Dataset
from
.dataset
import
Dataset
from
.dataset
import
get_encoding
from
.dataset
import
get_encoding
from
.dataset
import
is_pic
from
.dataset
import
is_pic
...
@@ -71,6 +72,8 @@ class EasyDataSeg(Dataset):
...
@@ -71,6 +72,8 @@ class EasyDataSeg(Dataset):
for
line
in
f
:
for
line
in
f
:
img_file
,
json_file
=
[
osp
.
join
(
data_dir
,
x
)
\
img_file
,
json_file
=
[
osp
.
join
(
data_dir
,
x
)
\
for
x
in
line
.
strip
().
split
()[:
2
]]
for
x
in
line
.
strip
().
split
()[:
2
]]
img_file
=
path_normalization
(
img_file
)
json_file
=
path_normalization
(
json_file
)
if
not
is_pic
(
img_file
):
if
not
is_pic
(
img_file
):
continue
continue
if
not
osp
.
isfile
(
json_file
):
if
not
osp
.
isfile
(
json_file
):
...
...
paddlex/cv/datasets/imagenet.py
浏览文件 @
df84bc6b
...
@@ -17,6 +17,7 @@ import os.path as osp
...
@@ -17,6 +17,7 @@ import os.path as osp
import
random
import
random
import
copy
import
copy
import
paddlex.utils.logging
as
logging
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.dataset
import
Dataset
from
.dataset
import
Dataset
from
.dataset
import
is_pic
from
.dataset
import
is_pic
from
.dataset
import
get_encoding
from
.dataset
import
get_encoding
...
@@ -66,6 +67,7 @@ class ImageNet(Dataset):
...
@@ -66,6 +67,7 @@ class ImageNet(Dataset):
with
open
(
file_list
,
encoding
=
get_encoding
(
file_list
))
as
f
:
with
open
(
file_list
,
encoding
=
get_encoding
(
file_list
))
as
f
:
for
line
in
f
:
for
line
in
f
:
items
=
line
.
strip
().
split
()
items
=
line
.
strip
().
split
()
items
[
0
]
=
path_normalization
(
items
[
0
])
if
not
is_pic
(
items
[
0
]):
if
not
is_pic
(
items
[
0
]):
continue
continue
full_path
=
osp
.
join
(
data_dir
,
items
[
0
])
full_path
=
osp
.
join
(
data_dir
,
items
[
0
])
...
...
paddlex/cv/datasets/seg_dataset.py
浏览文件 @
df84bc6b
...
@@ -17,6 +17,7 @@ import os.path as osp
...
@@ -17,6 +17,7 @@ import os.path as osp
import
random
import
random
import
copy
import
copy
import
paddlex.utils.logging
as
logging
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.dataset
import
Dataset
from
.dataset
import
Dataset
from
.dataset
import
get_encoding
from
.dataset
import
get_encoding
from
.dataset
import
is_pic
from
.dataset
import
is_pic
...
@@ -61,10 +62,11 @@ class SegDataset(Dataset):
...
@@ -61,10 +62,11 @@ class SegDataset(Dataset):
for
line
in
f
:
for
line
in
f
:
item
=
line
.
strip
()
item
=
line
.
strip
()
self
.
labels
.
append
(
item
)
self
.
labels
.
append
(
item
)
with
open
(
file_list
,
encoding
=
get_encoding
(
file_list
))
as
f
:
with
open
(
file_list
,
encoding
=
get_encoding
(
file_list
))
as
f
:
for
line
in
f
:
for
line
in
f
:
items
=
line
.
strip
().
split
()
items
=
line
.
strip
().
split
()
items
[
0
]
=
path_normalization
(
items
[
0
])
items
[
1
]
=
path_normalization
(
items
[
1
])
if
not
is_pic
(
items
[
0
]):
if
not
is_pic
(
items
[
0
]):
continue
continue
full_path_im
=
osp
.
join
(
data_dir
,
items
[
0
])
full_path_im
=
osp
.
join
(
data_dir
,
items
[
0
])
...
...
paddlex/cv/datasets/voc.py
浏览文件 @
df84bc6b
...
@@ -22,6 +22,7 @@ import numpy as np
...
@@ -22,6 +22,7 @@ import numpy as np
from
collections
import
OrderedDict
from
collections
import
OrderedDict
import
xml.etree.ElementTree
as
ET
import
xml.etree.ElementTree
as
ET
import
paddlex.utils.logging
as
logging
import
paddlex.utils.logging
as
logging
from
paddlex.utils
import
path_normalization
from
.dataset
import
Dataset
from
.dataset
import
Dataset
from
.dataset
import
is_pic
from
.dataset
import
is_pic
from
.dataset
import
get_encoding
from
.dataset
import
get_encoding
...
@@ -92,6 +93,8 @@ class VOCDetection(Dataset):
...
@@ -92,6 +93,8 @@ class VOCDetection(Dataset):
break
break
img_file
,
xml_file
=
[
osp
.
join
(
data_dir
,
x
)
\
img_file
,
xml_file
=
[
osp
.
join
(
data_dir
,
x
)
\
for
x
in
line
.
strip
().
split
()[:
2
]]
for
x
in
line
.
strip
().
split
()[:
2
]]
img_file
=
path_normalization
(
img_file
)
xml_file
=
path_normalization
(
xml_file
)
if
not
is_pic
(
img_file
):
if
not
is_pic
(
img_file
):
continue
continue
if
not
osp
.
isfile
(
xml_file
):
if
not
osp
.
isfile
(
xml_file
):
...
@@ -106,8 +109,11 @@ class VOCDetection(Dataset):
...
@@ -106,8 +109,11 @@ class VOCDetection(Dataset):
ct
=
int
(
tree
.
find
(
'id'
).
text
)
ct
=
int
(
tree
.
find
(
'id'
).
text
)
im_id
=
np
.
array
([
int
(
tree
.
find
(
'id'
).
text
)])
im_id
=
np
.
array
([
int
(
tree
.
find
(
'id'
).
text
)])
pattern
=
re
.
compile
(
'<object>'
,
re
.
IGNORECASE
)
pattern
=
re
.
compile
(
'<object>'
,
re
.
IGNORECASE
)
obj_tag
=
pattern
.
findall
(
obj_match
=
pattern
.
findall
(
str
(
ET
.
tostringlist
(
tree
.
getroot
())))[
0
][
1
:
-
1
]
str
(
ET
.
tostringlist
(
tree
.
getroot
())))
if
len
(
obj_match
)
==
0
:
continue
obj_tag
=
obj_match
[
0
][
1
:
-
1
]
objs
=
tree
.
findall
(
obj_tag
)
objs
=
tree
.
findall
(
obj_tag
)
pattern
=
re
.
compile
(
'<size>'
,
re
.
IGNORECASE
)
pattern
=
re
.
compile
(
'<size>'
,
re
.
IGNORECASE
)
size_tag
=
pattern
.
findall
(
size_tag
=
pattern
.
findall
(
...
...
paddlex/tools/x2coco.py
浏览文件 @
df84bc6b
...
@@ -22,6 +22,7 @@ import shutil
...
@@ -22,6 +22,7 @@ import shutil
import
numpy
as
np
import
numpy
as
np
import
PIL.ImageDraw
import
PIL.ImageDraw
from
.base
import
MyEncoder
,
is_pic
,
get_encoding
from
.base
import
MyEncoder
,
is_pic
,
get_encoding
from
paddlex.utils
import
path_normalization
class
X2COCO
(
object
):
class
X2COCO
(
object
):
...
@@ -100,6 +101,7 @@ class LabelMe2COCO(X2COCO):
...
@@ -100,6 +101,7 @@ class LabelMe2COCO(X2COCO):
image
[
"height"
]
=
json_info
[
"imageHeight"
]
image
[
"height"
]
=
json_info
[
"imageHeight"
]
image
[
"width"
]
=
json_info
[
"imageWidth"
]
image
[
"width"
]
=
json_info
[
"imageWidth"
]
image
[
"id"
]
=
image_id
+
1
image
[
"id"
]
=
image_id
+
1
json_info
[
"imagePath"
]
=
path_normalization
(
json_info
[
"imagePath"
])
image
[
"file_name"
]
=
osp
.
split
(
json_info
[
"imagePath"
])[
-
1
]
image
[
"file_name"
]
=
osp
.
split
(
json_info
[
"imagePath"
])[
-
1
]
return
image
return
image
...
@@ -187,6 +189,7 @@ class EasyData2COCO(X2COCO):
...
@@ -187,6 +189,7 @@ class EasyData2COCO(X2COCO):
image
[
"height"
]
=
img
.
shape
[
0
]
image
[
"height"
]
=
img
.
shape
[
0
]
image
[
"width"
]
=
img
.
shape
[
1
]
image
[
"width"
]
=
img
.
shape
[
1
]
image
[
"id"
]
=
image_id
+
1
image
[
"id"
]
=
image_id
+
1
img_path
=
path_normalization
(
img_path
)
image
[
"file_name"
]
=
osp
.
split
(
img_path
)[
-
1
]
image
[
"file_name"
]
=
osp
.
split
(
img_path
)[
-
1
]
return
image
return
image
...
@@ -268,6 +271,7 @@ class JingLing2COCO(X2COCO):
...
@@ -268,6 +271,7 @@ class JingLing2COCO(X2COCO):
image
[
"height"
]
=
json_info
[
"size"
][
"height"
]
image
[
"height"
]
=
json_info
[
"size"
][
"height"
]
image
[
"width"
]
=
json_info
[
"size"
][
"width"
]
image
[
"width"
]
=
json_info
[
"size"
][
"width"
]
image
[
"id"
]
=
image_id
+
1
image
[
"id"
]
=
image_id
+
1
json_info
[
"path"
]
=
path_normalization
(
json_info
[
"path"
])
image
[
"file_name"
]
=
osp
.
split
(
json_info
[
"path"
])[
-
1
]
image
[
"file_name"
]
=
osp
.
split
(
json_info
[
"path"
])[
-
1
]
return
image
return
image
...
...
paddlex/utils/__init__.py
浏览文件 @
df84bc6b
...
@@ -17,6 +17,7 @@ from . import logging
...
@@ -17,6 +17,7 @@ from . import logging
from
.
import
utils
from
.
import
utils
from
.
import
save
from
.
import
save
from
.utils
import
seconds_to_hms
from
.utils
import
seconds_to_hms
from
.utils
import
path_normalization
from
.download
import
download
from
.download
import
download
from
.download
import
decompress
from
.download
import
decompress
from
.download
import
download_and_decompress
from
.download
import
download_and_decompress
paddlex/utils/utils.py
浏览文件 @
df84bc6b
...
@@ -20,6 +20,7 @@ import numpy as np
...
@@ -20,6 +20,7 @@ import numpy as np
import
six
import
six
import
yaml
import
yaml
import
math
import
math
import
platform
from
.
import
logging
from
.
import
logging
...
@@ -49,18 +50,26 @@ def get_environ_info():
...
@@ -49,18 +50,26 @@ def get_environ_info():
info
[
'num'
]
=
fluid
.
core
.
get_cuda_device_count
()
info
[
'num'
]
=
fluid
.
core
.
get_cuda_device_count
()
return
info
return
info
def
path_normalization
(
path
):
win_sep
=
"
\\
"
other_sep
=
"/"
if
platform
.
system
()
==
"Windows"
:
path
=
win_sep
.
join
(
path
.
split
(
other_sep
))
else
:
path
=
other_sep
.
join
(
path
.
split
(
win_sep
))
return
path
def
parse_param_file
(
param_file
,
return_shape
=
True
):
def
parse_param_file
(
param_file
,
return_shape
=
True
):
from
paddle.fluid.proto.framework_pb2
import
VarType
from
paddle.fluid.proto.framework_pb2
import
VarType
f
=
open
(
param_file
,
'rb'
)
f
=
open
(
param_file
,
'rb'
)
version
=
np
.
from
string
(
f
.
read
(
4
),
dtype
=
'int32'
)
version
=
np
.
from
buffer
(
f
.
read
(
4
),
dtype
=
'int32'
)
lod_level
=
np
.
from
string
(
f
.
read
(
8
),
dtype
=
'int64'
)
lod_level
=
np
.
from
buffer
(
f
.
read
(
8
),
dtype
=
'int64'
)
for
i
in
range
(
int
(
lod_level
)):
for
i
in
range
(
int
(
lod_level
)):
_size
=
np
.
from
string
(
f
.
read
(
8
),
dtype
=
'int64'
)
_size
=
np
.
from
buffer
(
f
.
read
(
8
),
dtype
=
'int64'
)
_
=
f
.
read
(
_size
)
_
=
f
.
read
(
_size
)
version
=
np
.
from
string
(
f
.
read
(
4
),
dtype
=
'int32'
)
version
=
np
.
from
buffer
(
f
.
read
(
4
),
dtype
=
'int32'
)
tensor_desc
=
VarType
.
TensorDesc
()
tensor_desc
=
VarType
.
TensorDesc
()
tensor_desc_size
=
np
.
from
string
(
f
.
read
(
4
),
dtype
=
'int32'
)
tensor_desc_size
=
np
.
from
buffer
(
f
.
read
(
4
),
dtype
=
'int32'
)
tensor_desc
.
ParseFromString
(
f
.
read
(
int
(
tensor_desc_size
)))
tensor_desc
.
ParseFromString
(
f
.
read
(
int
(
tensor_desc_size
)))
tensor_shape
=
tuple
(
tensor_desc
.
dims
)
tensor_shape
=
tuple
(
tensor_desc
.
dims
)
if
return_shape
:
if
return_shape
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录