Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
b1f708fc
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b1f708fc
编写于
9月 09, 2020
作者:
W
wuzhihua
提交者:
GitHub
9月 09, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #204 from vslyu/fix_collective_files_partition
fix bugs for files partition running in collective mode
上级
b619c193
9a12e113
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
23 addition
and
71 deletion
+23
-71
core/engine/local_cluster.py
core/engine/local_cluster.py
+2
-1
core/utils/dataloader_instance.py
core/utils/dataloader_instance.py
+21
-70
未找到文件。
core/engine/local_cluster.py
浏览文件 @
b1f708fc
...
@@ -119,7 +119,8 @@ class LocalClusterEngine(Engine):
...
@@ -119,7 +119,8 @@ class LocalClusterEngine(Engine):
"PADDLE_TRAINERS_NUM"
:
str
(
worker_num
),
"PADDLE_TRAINERS_NUM"
:
str
(
worker_num
),
"TRAINING_ROLE"
:
"TRAINER"
,
"TRAINING_ROLE"
:
"TRAINER"
,
"PADDLE_TRAINER_ID"
:
str
(
i
),
"PADDLE_TRAINER_ID"
:
str
(
i
),
"FLAGS_selected_gpus"
:
str
(
selected_gpus
[
i
])
"FLAGS_selected_gpus"
:
str
(
selected_gpus
[
i
]),
"PADDLEREC_GPU_NUMS"
:
str
(
selected_gpus_num
)
})
})
os
.
system
(
"mkdir -p {}"
.
format
(
logs_dir
))
os
.
system
(
"mkdir -p {}"
.
format
(
logs_dir
))
...
...
core/utils/dataloader_instance.py
浏览文件 @
b1f708fc
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
os
import
os
import
warnings
from
paddlerec.core.utils.envs
import
lazy_instance_by_fliename
from
paddlerec.core.utils.envs
import
lazy_instance_by_fliename
from
paddlerec.core.utils.envs
import
get_global_env
from
paddlerec.core.utils.envs
import
get_global_env
from
paddlerec.core.utils.envs
import
get_runtime_environ
from
paddlerec.core.utils.envs
import
get_runtime_environ
...
@@ -47,6 +48,16 @@ def dataloader_by_name(readerclass,
...
@@ -47,6 +48,16 @@ def dataloader_by_name(readerclass,
files
.
sort
()
files
.
sort
()
# for local cluster: discard some files if files cannot be divided equally between GPUs
if
(
context
[
"device"
]
==
"GPU"
)
and
"PADDLEREC_GPU_NUMS"
in
os
.
environ
:
selected_gpu_nums
=
int
(
os
.
getenv
(
"PADDLEREC_GPU_NUMS"
))
discard_file_nums
=
len
(
files
)
%
selected_gpu_nums
if
(
discard_file_nums
!=
0
):
warnings
.
warn
(
"Because files cannot be divided equally between GPUs,discard these files:{}"
.
format
(
files
[
-
discard_file_nums
:]))
files
=
files
[:
len
(
files
)
-
discard_file_nums
]
need_split_files
=
False
need_split_files
=
False
if
context
[
"engine"
]
==
EngineMode
.
LOCAL_CLUSTER
:
if
context
[
"engine"
]
==
EngineMode
.
LOCAL_CLUSTER
:
# for local cluster: split files for multi process
# for local cluster: split files for multi process
...
@@ -109,6 +120,16 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context):
...
@@ -109,6 +120,16 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context):
files
.
sort
()
files
.
sort
()
# for local cluster: discard some files if files cannot be divided equally between GPUs
if
(
context
[
"device"
]
==
"GPU"
)
and
"PADDLEREC_GPU_NUMS"
in
os
.
environ
:
selected_gpu_nums
=
int
(
os
.
getenv
(
"PADDLEREC_GPU_NUMS"
))
discard_file_nums
=
len
(
files
)
%
selected_gpu_nums
if
(
discard_file_nums
!=
0
):
warnings
.
warn
(
"Because files cannot be divided equally between GPUs,discard these files:{}"
.
format
(
files
[
-
discard_file_nums
:]))
files
=
files
[:
len
(
files
)
-
discard_file_nums
]
need_split_files
=
False
need_split_files
=
False
if
context
[
"engine"
]
==
EngineMode
.
LOCAL_CLUSTER
:
if
context
[
"engine"
]
==
EngineMode
.
LOCAL_CLUSTER
:
# for local cluster: split files for multi process
# for local cluster: split files for multi process
...
@@ -153,73 +174,3 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context):
...
@@ -153,73 +174,3 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context):
if
hasattr
(
reader
,
'generate_batch_from_trainfiles'
):
if
hasattr
(
reader
,
'generate_batch_from_trainfiles'
):
return
gen_batch_reader
()
return
gen_batch_reader
()
return
gen_reader
return
gen_reader
def
slotdataloader
(
readerclass
,
train
,
yaml_file
,
context
):
if
train
==
"TRAIN"
:
reader_name
=
"SlotReader"
namespace
=
"train.reader"
data_path
=
get_global_env
(
"train_data_path"
,
None
,
namespace
)
else
:
reader_name
=
"SlotReader"
namespace
=
"evaluate.reader"
data_path
=
get_global_env
(
"test_data_path"
,
None
,
namespace
)
if
data_path
.
startswith
(
"paddlerec::"
):
package_base
=
get_runtime_environ
(
"PACKAGE_BASE"
)
assert
package_base
is
not
None
data_path
=
os
.
path
.
join
(
package_base
,
data_path
.
split
(
"::"
)[
1
])
hidden_file_list
,
files
=
check_filelist
(
hidden_file_list
=
[],
data_file_list
=
[],
train_data_path
=
data_path
)
if
(
hidden_file_list
is
not
None
):
print
(
"Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}"
.
format
(
hidden_file_list
))
files
.
sort
()
need_split_files
=
False
if
context
[
"engine"
]
==
EngineMode
.
LOCAL_CLUSTER
:
# for local cluster: split files for multi process
need_split_files
=
True
elif
context
[
"engine"
]
==
EngineMode
.
CLUSTER
and
context
[
"cluster_type"
]
==
"K8S"
:
# for k8s mount mode, split files for every node
need_split_files
=
True
if
need_split_files
:
files
=
split_files
(
files
,
context
[
"fleet"
].
worker_index
(),
context
[
"fleet"
].
worker_num
())
context
[
"file_list"
]
=
files
sparse
=
get_global_env
(
"sparse_slots"
,
"#"
,
namespace
)
if
sparse
==
""
:
sparse
=
"#"
dense
=
get_global_env
(
"dense_slots"
,
"#"
,
namespace
)
if
dense
==
""
:
dense
=
"#"
padding
=
get_global_env
(
"padding"
,
0
,
namespace
)
reader
=
SlotReader
(
yaml_file
)
reader
.
init
(
sparse
,
dense
,
int
(
padding
))
def
gen_reader
():
for
file
in
files
:
with
open
(
file
,
'r'
)
as
f
:
for
line
in
f
:
line
=
line
.
rstrip
(
'
\n
'
)
iter
=
reader
.
generate_sample
(
line
)
for
parsed_line
in
iter
():
if
parsed_line
is
None
:
continue
else
:
values
=
[]
for
pased
in
parsed_line
:
values
.
append
(
pased
[
1
])
yield
values
def
gen_batch_reader
():
return
reader
.
generate_batch_from_trainfiles
(
files
)
if
hasattr
(
reader
,
'generate_batch_from_trainfiles'
):
return
gen_batch_reader
()
return
gen_reader
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录