Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
e7986cb4
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e7986cb4
编写于
4月 20, 2020
作者:
T
tangwei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix readme
上级
3ccb3ea7
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
17 addition
and
30 deletion
+17
-30
fleetrec/core/factory.py
fleetrec/core/factory.py
+1
-1
fleetrec/run.py
fleetrec/run.py
+16
-29
未找到文件。
fleetrec/core/factory.py
浏览文件 @
e7986cb4
...
...
@@ -46,7 +46,7 @@ class TrainerFactory(object):
if
trainer_abs
is
None
:
if
not
os
.
path
.
exists
(
train_mode
)
or
os
.
path
.
isfile
(
train_mode
):
raise
ValueError
(
"trainer {} can not be recognized"
)
raise
ValueError
(
"trainer {} can not be recognized"
.
format
(
train_mode
)
)
trainer_abs
=
train_mode
train_mode
=
"UserDefineTrainer"
...
...
fleetrec/run.py
浏览文件 @
e7986cb4
...
...
@@ -16,6 +16,8 @@ def set_runtime_envs(cluster_envs, engine_yaml):
if
engine_yaml
is
not
None
:
with
open
(
engine_yaml
,
'r'
)
as
rb
:
_envs
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
_envs
=
{}
if
cluster_envs
is
None
:
cluster_envs
=
{}
...
...
@@ -24,15 +26,6 @@ def set_runtime_envs(cluster_envs, engine_yaml):
print
(
envs
.
pretty_print_envs
(
cluster_envs
,
(
"Runtime Envs"
,
"Value"
)))
def
engine_registry
():
engines
[
"TRAINSPILER"
][
"SINGLE"
]
=
single_engine
engines
[
"TRAINSPILER"
][
"LOCAL_CLUSTER"
]
=
local_cluster_engine
engines
[
"TRAINSPILER"
][
"CLUSTER"
]
=
cluster_engine
engines
[
"PSLIB"
][
"SINGLE"
]
=
local_mpi_engine
engines
[
"PSLIB"
][
"LOCAL_CLUSTER"
]
=
local_mpi_engine
engines
[
"PSLIB"
][
"CLUSTER"
]
=
cluster_mpi_engine
def
get_engine
(
engine
):
engine
=
engine
.
upper
()
if
version
.
is_transpiler
():
...
...
@@ -47,7 +40,7 @@ def get_engine(engine):
def
single_engine
(
args
):
print
(
"use single engine to run model: {}"
.
format
(
args
.
model
))
single_envs
=
{
"trainer.trainer"
:
"SingleTrain
ing
"
}
single_envs
=
{
"trainer.trainer"
:
"SingleTrain
er
"
}
set_runtime_envs
(
single_envs
,
args
.
engine_extras
)
...
...
@@ -58,7 +51,7 @@ def single_engine(args):
def
cluster_engine
(
args
):
print
(
"launch cluster engine with cluster to run model: {}"
.
format
(
args
.
model
))
cluster_envs
=
{
"trainer.trainer"
:
"ClusterTrain
ing
"
}
cluster_envs
=
{
"trainer.trainer"
:
"ClusterTrain
er
"
}
set_runtime_envs
(
cluster_envs
,
args
.
engine_extras
)
envs
.
set_runtime_envions
(
cluster_envs
)
...
...
@@ -69,7 +62,7 @@ def cluster_engine(args):
def
cluster_mpi_engine
(
args
):
print
(
"launch cluster engine with cluster to run model: {}"
.
format
(
args
.
model
))
cluster_envs
=
{
"trainer.trainer"
:
"Ctr
Training
"
}
cluster_envs
=
{
"trainer.trainer"
:
"Ctr
CodingTrainer
"
}
set_runtime_envs
(
cluster_envs
,
args
.
engine_extras
)
trainer
=
TrainerFactory
.
create
(
args
.
model
)
...
...
@@ -85,7 +78,7 @@ def local_cluster_engine(args):
cluster_envs
[
"worker_num"
]
=
1
cluster_envs
[
"start_port"
]
=
36001
cluster_envs
[
"log_dir"
]
=
"logs"
cluster_envs
[
"trainer.trainer"
]
=
"ClusterTrain
ing
"
cluster_envs
[
"trainer.trainer"
]
=
"ClusterTrain
er
"
cluster_envs
[
"trainer.strategy.mode"
]
=
"async"
set_runtime_envs
(
cluster_envs
,
args
.
engine_extras
)
...
...
@@ -104,28 +97,22 @@ def local_mpi_engine(args):
if
not
mpi
:
raise
RuntimeError
(
"can not find mpirun, please check environment"
)
cluster_envs
=
{
"mpirun"
:
mpi
,
"trainer.trainer"
:
"Ctr
Training
"
,
"log_dir"
:
"logs"
}
cluster_envs
=
{
"mpirun"
:
mpi
,
"trainer.trainer"
:
"Ctr
CodingTrainer
"
,
"log_dir"
:
"logs"
}
set_runtime_envs
(
cluster_envs
,
args
.
engine_extras
)
launch
=
LocalMPIEngine
(
cluster_envs
,
args
.
model
)
return
launch
#
# def yaml_engine(engine_yaml, model_yaml):
# with open(engine_yaml, 'r') as rb:
# _config = yaml.load(rb.read(), Loader=yaml.FullLoader)
# assert _config is not None
#
# envs.set_global_envs(_config)
#
# train_location = envs.get_global_env("engine.file")
# train_dirname = os.path.dirname(train_location)
# base_name = os.path.splitext(os.path.basename(train_location))[0]
# sys.path.append(train_dirname)
# trainer_class = envs.lazy_instance(base_name, "UserDefineTraining")
# trainer = trainer_class(model_yaml)
# return trainer
def
engine_registry
():
engines
[
"TRAINSPILER"
][
"SINGLE"
]
=
single_engine
engines
[
"TRAINSPILER"
][
"LOCAL_CLUSTER"
]
=
local_cluster_engine
engines
[
"TRAINSPILER"
][
"CLUSTER"
]
=
cluster_engine
engines
[
"PSLIB"
][
"SINGLE"
]
=
local_mpi_engine
engines
[
"PSLIB"
][
"LOCAL_CLUSTER"
]
=
local_mpi_engine
engines
[
"PSLIB"
][
"CLUSTER"
]
=
cluster_mpi_engine
engine_registry
()
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
'fleet-rec run'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录