Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
c762d6cb
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c762d6cb
编写于
4月 21, 2020
作者:
T
tangwei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
merge yaml two to one
上级
3b612ee4
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
16 addition
and
16 deletion
+16
-16
fleetrec/core/trainers/cluster_trainer.py
fleetrec/core/trainers/cluster_trainer.py
+1
-1
fleetrec/core/trainers/transpiler_trainer.py
fleetrec/core/trainers/transpiler_trainer.py
+1
-1
fleetrec/core/utils/envs.py
fleetrec/core/utils/envs.py
+1
-1
fleetrec/run.py
fleetrec/run.py
+13
-13
未找到文件。
fleetrec/core/trainers/cluster_trainer.py
浏览文件 @
c762d6cb
...
...
@@ -43,7 +43,7 @@ class ClusterTrainer(TranspileTrainer):
self
.
regist_context_processor
(
'terminal_pass'
,
self
.
terminal
)
def
build_strategy
(
self
):
mode
=
envs
.
get_runtime_environ
(
"trainer.strategy"
)
mode
=
envs
.
get_runtime_environ
(
"train
.train
er.strategy"
)
assert
mode
in
[
"async"
,
"geo"
,
"sync"
,
"half_async"
]
strategy
=
None
...
...
fleetrec/core/trainers/transpiler_trainer.py
浏览文件 @
c762d6cb
...
...
@@ -39,7 +39,7 @@ class TranspileTrainer(Trainer):
namespace
=
"train.reader"
inputs
=
self
.
model
.
get_inputs
()
threads
=
int
(
envs
.
get_runtime_environ
(
"trainer.threads"
))
threads
=
int
(
envs
.
get_runtime_environ
(
"train
.train
er.threads"
))
batch_size
=
envs
.
get_global_env
(
"batch_size"
,
None
,
namespace
)
reader_class
=
envs
.
get_global_env
(
"class"
,
None
,
namespace
)
abs_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
...
...
fleetrec/core/utils/envs.py
浏览文件 @
c762d6cb
...
...
@@ -50,7 +50,7 @@ def get_runtime_environ(key):
return
os
.
getenv
(
key
,
None
)
def
get_trainer
():
train_mode
=
get_runtime_environ
(
"trainer.trainer"
)
train_mode
=
get_runtime_environ
(
"train
.train
er.trainer"
)
return
train_mode
...
...
fleetrec/run.py
浏览文件 @
c762d6cb
...
...
@@ -33,7 +33,7 @@ def set_runtime_envs(cluster_envs, engine_yaml):
need_print
=
{}
for
k
,
v
in
os
.
environ
.
items
():
if
k
.
startswith
(
"trainer."
):
if
k
.
startswith
(
"train
.train
er."
):
need_print
[
k
]
=
v
print
(
envs
.
pretty_print_envs
(
need_print
,
(
"Runtime Envs"
,
"Value"
)))
...
...
@@ -55,9 +55,9 @@ def single_engine(args):
print
(
"use single engine to run model: {}"
.
format
(
args
.
model
))
single_envs
=
{}
single_envs
[
"trainer.trainer"
]
=
"SingleTrainer"
single_envs
[
"trainer.threads"
]
=
"2"
single_envs
[
"trainer.engine"
]
=
"single"
single_envs
[
"train
.train
er.trainer"
]
=
"SingleTrainer"
single_envs
[
"train
.train
er.threads"
]
=
"2"
single_envs
[
"train
.train
er.engine"
]
=
"single"
set_runtime_envs
(
single_envs
,
args
.
model
)
trainer
=
TrainerFactory
.
create
(
args
.
model
)
return
trainer
...
...
@@ -67,8 +67,8 @@ def cluster_engine(args):
print
(
"launch cluster engine with cluster to run model: {}"
.
format
(
args
.
model
))
cluster_envs
=
{}
cluster_envs
[
"trainer.trainer"
]
=
"ClusterTrainer"
cluster_envs
[
"trainer.engine"
]
=
"cluster"
cluster_envs
[
"train
.train
er.trainer"
]
=
"ClusterTrainer"
cluster_envs
[
"train
.train
er.engine"
]
=
"cluster"
set_runtime_envs
(
cluster_envs
,
args
.
model
)
trainer
=
TrainerFactory
.
create
(
args
.
model
)
...
...
@@ -79,7 +79,7 @@ def cluster_mpi_engine(args):
print
(
"launch cluster engine with cluster to run model: {}"
.
format
(
args
.
model
))
cluster_envs
=
{}
cluster_envs
[
"trainer.trainer"
]
=
"CtrCodingTrainer"
cluster_envs
[
"train
.train
er.trainer"
]
=
"CtrCodingTrainer"
set_runtime_envs
(
cluster_envs
,
args
.
model
)
trainer
=
TrainerFactory
.
create
(
args
.
model
)
...
...
@@ -95,10 +95,10 @@ def local_cluster_engine(args):
cluster_envs
[
"worker_num"
]
=
1
cluster_envs
[
"start_port"
]
=
36001
cluster_envs
[
"log_dir"
]
=
"logs"
cluster_envs
[
"trainer.trainer"
]
=
"ClusterTrainer"
cluster_envs
[
"trainer.strategy"
]
=
"async"
cluster_envs
[
"trainer.threads"
]
=
"2"
cluster_envs
[
"trainer.engine"
]
=
"local_cluster"
cluster_envs
[
"train
.train
er.trainer"
]
=
"ClusterTrainer"
cluster_envs
[
"train
.train
er.strategy"
]
=
"async"
cluster_envs
[
"train
.train
er.threads"
]
=
"2"
cluster_envs
[
"train
.train
er.engine"
]
=
"local_cluster"
cluster_envs
[
"CPU_NUM"
]
=
"2"
set_runtime_envs
(
cluster_envs
,
args
.
model
)
...
...
@@ -118,9 +118,9 @@ def local_mpi_engine(args):
raise
RuntimeError
(
"can not find mpirun, please check environment"
)
cluster_envs
=
{}
cluster_envs
[
"mpirun"
]
=
mpi
cluster_envs
[
"trainer.trainer"
]
=
"CtrCodingTrainer"
cluster_envs
[
"train
.train
er.trainer"
]
=
"CtrCodingTrainer"
cluster_envs
[
"log_dir"
]
=
"logs"
cluster_envs
[
"trainer.engine"
]
=
"local_cluster"
cluster_envs
[
"train
.train
er.engine"
]
=
"local_cluster"
set_runtime_envs
(
cluster_envs
,
args
.
model
)
launch
=
LocalMPIEngine
(
cluster_envs
,
args
.
model
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录