Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
f24182e6
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f24182e6
编写于
4月 20, 2020
作者:
T
tangwei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bug
上级
42347db7
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
19 addition
and
9 deletion
+19
-9
fleetrec/examples/user_define_trainer.py
fleetrec/examples/user_define_trainer.py
+2
-4
fleetrec/run.py
fleetrec/run.py
+17
-5
未找到文件。
fleetrec/examples/user_define_trainer.py
浏览文件 @
f24182e6
...
@@ -29,11 +29,9 @@ class UserDefineTrainer(TranspileTrainer):
...
@@ -29,11 +29,9 @@ class UserDefineTrainer(TranspileTrainer):
self
.
regist_context_processor
(
'train_pass'
,
self
.
train
)
self
.
regist_context_processor
(
'train_pass'
,
self
.
train
)
def
init
(
self
,
context
):
def
init
(
self
,
context
):
self
.
model
.
net
()
self
.
model
.
train_net
()
self
.
model
.
metrics
()
self
.
model
.
avg_loss
()
optimizer
=
self
.
model
.
optimizer
()
optimizer
=
self
.
model
.
optimizer
()
optimizer
.
minimize
(
self
.
model
.
_cost
)
optimizer
.
minimize
(
(
self
.
model
.
get_cost_op
())
)
self
.
fetch_vars
=
[]
self
.
fetch_vars
=
[]
self
.
fetch_alias
=
[]
self
.
fetch_alias
=
[]
...
...
fleetrec/run.py
浏览文件 @
f24182e6
...
@@ -21,10 +21,15 @@ def set_runtime_envs(cluster_envs, engine_yaml):
...
@@ -21,10 +21,15 @@ def set_runtime_envs(cluster_envs, engine_yaml):
if
cluster_envs
is
None
:
if
cluster_envs
is
None
:
cluster_envs
=
{}
cluster_envs
=
{}
cluster_envs
.
update
(
cluster_envs
)
envs
.
set_runtime_envions
(
cluster_envs
)
cluster_envs
.
update
(
_envs
)
envs
.
set_runtime_envions
(
_envs
)
# envs.set_runtime_envions(cluster_envs)
print
(
envs
.
pretty_print_envs
(
cluster_envs
,
(
"Runtime Envs"
,
"Value"
)))
need_print
=
{}
for
k
,
v
in
os
.
environ
.
items
():
if
k
.
startswith
()
==
"trainer."
:
need_print
[
k
]
=
v
print
(
envs
.
pretty_print_envs
(
need_print
,
(
"Runtime Envs"
,
"Value"
)))
def
get_engine
(
engine
):
def
get_engine
(
engine
):
...
@@ -45,6 +50,7 @@ def single_engine(args):
...
@@ -45,6 +50,7 @@ def single_engine(args):
single_envs
=
{}
single_envs
=
{}
single_envs
[
"trainer.trainer"
]
=
"SingleTrainer"
single_envs
[
"trainer.trainer"
]
=
"SingleTrainer"
single_envs
[
"trainer.threads"
]
=
"2"
single_envs
[
"trainer.threads"
]
=
"2"
single_envs
[
"trainer.engine"
]
=
"single"
set_runtime_envs
(
single_envs
,
args
.
engine_extras
)
set_runtime_envs
(
single_envs
,
args
.
engine_extras
)
trainer
=
TrainerFactory
.
create
(
args
.
model
)
trainer
=
TrainerFactory
.
create
(
args
.
model
)
return
trainer
return
trainer
...
@@ -55,6 +61,7 @@ def cluster_engine(args):
...
@@ -55,6 +61,7 @@ def cluster_engine(args):
cluster_envs
=
{}
cluster_envs
=
{}
cluster_envs
[
"trainer.trainer"
]
=
"ClusterTrainer"
cluster_envs
[
"trainer.trainer"
]
=
"ClusterTrainer"
cluster_envs
[
"trainer.engine"
]
=
"cluster"
set_runtime_envs
(
cluster_envs
,
args
.
engine_extras
)
set_runtime_envs
(
cluster_envs
,
args
.
engine_extras
)
envs
.
set_runtime_envions
(
cluster_envs
)
envs
.
set_runtime_envions
(
cluster_envs
)
...
@@ -85,6 +92,7 @@ def local_cluster_engine(args):
...
@@ -85,6 +92,7 @@ def local_cluster_engine(args):
cluster_envs
[
"trainer.trainer"
]
=
"ClusterTrainer"
cluster_envs
[
"trainer.trainer"
]
=
"ClusterTrainer"
cluster_envs
[
"trainer.strategy"
]
=
"async"
cluster_envs
[
"trainer.strategy"
]
=
"async"
cluster_envs
[
"trainer.threads"
]
=
"2"
cluster_envs
[
"trainer.threads"
]
=
"2"
cluster_envs
[
"trainer.engine"
]
=
"local_cluster"
cluster_envs
[
"CPU_NUM"
]
=
"2"
cluster_envs
[
"CPU_NUM"
]
=
"2"
set_runtime_envs
(
cluster_envs
,
args
.
engine_extras
)
set_runtime_envs
(
cluster_envs
,
args
.
engine_extras
)
...
@@ -102,8 +110,12 @@ def local_mpi_engine(args):
...
@@ -102,8 +110,12 @@ def local_mpi_engine(args):
mpi
=
util
.
run_which
(
"mpirun"
)
mpi
=
util
.
run_which
(
"mpirun"
)
if
not
mpi
:
if
not
mpi
:
raise
RuntimeError
(
"can not find mpirun, please check environment"
)
raise
RuntimeError
(
"can not find mpirun, please check environment"
)
cluster_envs
=
{}
cluster_envs
[
"mpirun"
]
=
mpi
cluster_envs
[
"trainer.trainer"
]
=
"CtrCodingTrainer"
cluster_envs
[
"log_dir"
]
=
"logs"
cluster_envs
[
"trainer.engine"
]
=
"local_cluster"
cluster_envs
=
{
"mpirun"
:
mpi
,
"trainer.trainer"
:
"CtrCodingTrainer"
,
"log_dir"
:
"logs"
}
set_runtime_envs
(
cluster_envs
,
args
.
engine_extras
)
set_runtime_envs
(
cluster_envs
,
args
.
engine_extras
)
launch
=
LocalMPIEngine
(
cluster_envs
,
args
.
model
)
launch
=
LocalMPIEngine
(
cluster_envs
,
args
.
model
)
return
launch
return
launch
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录