Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
d229f76f
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
d229f76f
编写于
8月 14, 2020
作者:
L
liuyuhui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
updated for pre-commit
上级
c52c0d25
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
67 addition
and
6 deletion
+67
-6
core/trainers/framework/dataset.py
core/trainers/framework/dataset.py
+25
-4
core/utils/dataloader_instance.py
core/utils/dataloader_instance.py
+42
-2
未找到文件。
core/trainers/framework/dataset.py
浏览文件 @
d229f76f
...
...
@@ -89,6 +89,24 @@ class QueueDataset(DatasetBase):
else
:
return
self
.
_get_dataset
(
dataset_name
,
context
)
def
check_filelist
(
self
,
file_list
,
train_data_path
):
for
root
,
dirs
,
files
in
os
.
walk
(
train_data_path
):
files
=
[
f
for
f
in
files
if
not
f
[
0
]
==
'.'
]
dirs
[:]
=
[
d
for
d
in
dirs
if
not
d
[
0
]
==
'.'
]
if
(
files
==
None
and
dirs
==
None
):
return
None
else
:
# use files and dirs
for
file_name
in
files
:
file_list
.
append
(
os
.
path
.
join
(
train_data_path
,
file_name
))
print
(
os
.
path
.
join
(
train_data_path
,
file_name
))
for
dirs_name
in
dirs
:
dir_root
.
append
(
os
.
path
.
join
(
train_data_path
,
dirs_name
))
check_filelist
(
file_list
,
os
.
path
.
join
(
train_data_path
,
dirs_name
))
print
(
os
.
path
.
join
(
train_data_path
,
dirs_name
))
return
file_list
def
_get_dataset
(
self
,
dataset_name
,
context
):
name
=
"dataset."
+
dataset_name
+
"."
reader_class
=
envs
.
get_global_env
(
name
+
"data_converter"
)
...
...
@@ -119,10 +137,13 @@ class QueueDataset(DatasetBase):
dataset
.
set_pipe_command
(
pipe_cmd
)
train_data_path
=
envs
.
get_global_env
(
name
+
"data_path"
)
file_list
=
[
os
.
path
.
join
(
train_data_path
,
x
)
for
x
in
os
.
listdir
(
train_data_path
)
]
# file_list = [
# os.path.join(train_data_path, x)
# for x in os.listdir(train_data_path)
# ]
file_list
=
[]
file_list
=
self
.
check_filelist
(
file_list
,
train_data_path
)
if
context
[
"engine"
]
==
EngineMode
.
LOCAL_CLUSTER
:
file_list
=
split_files
(
file_list
,
context
[
"fleet"
].
worker_index
(),
context
[
"fleet"
].
worker_num
())
...
...
core/utils/dataloader_instance.py
浏览文件 @
d229f76f
...
...
@@ -38,7 +38,27 @@ def dataloader_by_name(readerclass,
assert
package_base
is
not
None
data_path
=
os
.
path
.
join
(
package_base
,
data_path
.
split
(
"::"
)[
1
])
files
=
[
str
(
data_path
)
+
"/%s"
%
x
for
x
in
os
.
listdir
(
data_path
)]
def
check_filelist
(
file_list
,
train_data_path
):
for
root
,
dirs
,
files
in
os
.
walk
(
train_data_path
):
files
=
[
f
for
f
in
files
if
not
f
[
0
]
==
'.'
]
dirs
[:]
=
[
d
for
d
in
dirs
if
not
d
[
0
]
==
'.'
]
if
(
files
==
None
and
dirs
==
None
):
return
None
else
:
# use files and dirs
for
file_name
in
files
:
file_list
.
append
(
os
.
path
.
join
(
train_data_path
,
file_name
))
print
(
os
.
path
.
join
(
train_data_path
,
file_name
))
for
dirs_name
in
dirs
:
dir_root
.
append
(
os
.
path
.
join
(
train_data_path
,
dirs_name
))
check_filelist
(
file_list
,
os
.
path
.
join
(
train_data_path
,
dirs_name
))
print
(
os
.
path
.
join
(
train_data_path
,
dirs_name
))
return
file_list
#files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)]
files
=
[]
files
=
check_filelist
(
files
,
data_path
)
if
context
[
"engine"
]
==
EngineMode
.
LOCAL_CLUSTER
:
files
=
split_files
(
files
,
context
[
"fleet"
].
worker_index
(),
context
[
"fleet"
].
worker_num
())
...
...
@@ -80,7 +100,27 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context):
assert
package_base
is
not
None
data_path
=
os
.
path
.
join
(
package_base
,
data_path
.
split
(
"::"
)[
1
])
files
=
[
str
(
data_path
)
+
"/%s"
%
x
for
x
in
os
.
listdir
(
data_path
)]
def
check_filelist
(
file_list
,
train_data_path
):
for
root
,
dirs
,
files
in
os
.
walk
(
train_data_path
):
files
=
[
f
for
f
in
files
if
not
f
[
0
]
==
'.'
]
dirs
[:]
=
[
d
for
d
in
dirs
if
not
d
[
0
]
==
'.'
]
if
(
files
==
None
and
dirs
==
None
):
return
None
else
:
# use files and dirs
for
file_name
in
files
:
file_list
.
append
(
os
.
path
.
join
(
train_data_path
,
file_name
))
print
(
os
.
path
.
join
(
train_data_path
,
file_name
))
for
dirs_name
in
dirs
:
dir_root
.
append
(
os
.
path
.
join
(
train_data_path
,
dirs_name
))
check_filelist
(
file_list
,
os
.
path
.
join
(
train_data_path
,
dirs_name
))
print
(
os
.
path
.
join
(
train_data_path
,
dirs_name
))
return
file_list
#files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)]
files
=
[]
files
=
check_filelist
(
files
,
data_path
)
if
context
[
"engine"
]
==
EngineMode
.
LOCAL_CLUSTER
:
files
=
split_files
(
files
,
context
[
"fleet"
].
worker_index
(),
context
[
"fleet"
].
worker_num
())
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录