Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
3b9c100b
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3b9c100b
编写于
9月 28, 2020
作者:
W
wuzhihua
提交者:
GitHub
9月 28, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #236 from vslyu/fix_phase
fix phase & format save_step output information
上级
7cfe354b
dad59669
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
21 addition
and
18 deletion
+21
-18
core/trainer.py
core/trainer.py
+3
-4
core/trainers/framework/network.py
core/trainers/framework/network.py
+13
-11
core/trainers/framework/runner.py
core/trainers/framework/runner.py
+5
-3
未找到文件。
core/trainer.py
浏览文件 @
3b9c100b
...
...
@@ -76,9 +76,6 @@ class Trainer(object):
_config
=
envs
.
load_yaml
(
config
)
self
.
_context
[
"env"
]
=
_config
self
.
_context
[
"dataset"
]
=
_config
.
get
(
"dataset"
)
phases
=
[]
if
phase_names
is
None
:
phases
=
_config
.
get
(
"phase"
)
...
...
@@ -86,8 +83,10 @@ class Trainer(object):
for
phase
in
_config
.
get
(
"phase"
):
if
phase
[
"name"
]
in
phase_names
:
phases
.
append
(
phase
)
self
.
_context
[
"phases"
]
=
phases
_config
[
"phase"
]
=
phases
self
.
_context
[
"env"
]
=
_config
self
.
_context
[
"dataset"
]
=
_config
.
get
(
"dataset"
)
print
(
"PaddleRec: Runner {} Begin"
.
format
(
self
.
_runner_name
))
self
.
which_engine
()
self
.
which_device
()
...
...
core/trainers/framework/network.py
浏览文件 @
3b9c100b
...
...
@@ -238,8 +238,8 @@ class PSNetwork(NetworkBase):
else
:
context
[
"fleet"
].
init_worker
()
context
[
"dataset"
]
=
{}
for
dataset
in
context
[
"env"
][
"dataset
"
]:
type
=
envs
.
get_global_env
(
"dataset."
+
dataset
[
"
name"
]
+
for
phase
in
context
[
"env"
][
"phase
"
]:
type
=
envs
.
get_global_env
(
"dataset."
+
phase
[
"dataset_
name"
]
+
".type"
)
if
type
==
"DataLoader"
:
data_loader
=
DataLoader
(
context
)
...
...
@@ -247,9 +247,9 @@ class PSNetwork(NetworkBase):
model
.
_data_loader
)
elif
type
==
"QueueDataset"
:
dataset_class
=
QueueDataset
(
context
)
context
[
"dataset"
][
dataset
[
"name"
]]
=
dataset_class
.
create_dataset
(
dataset
[
"
name"
],
context
)
context
[
"dataset"
][
phase
[
"
dataset_
name"
]]
=
dataset_class
.
create_dataset
(
phase
[
"dataset_
name"
],
context
)
context
[
"status"
]
=
"startup_pass"
def
_build_strategy
(
self
,
context
):
...
...
@@ -336,7 +336,7 @@ class PslibNetwork(NetworkBase):
self
.
_server
(
context
)
else
:
context
[
"dataset"
]
=
{}
for
dataset
in
context
[
"env"
][
"dataset
"
]:
for
phase
in
context
[
"env"
][
"phase
"
]:
type
=
envs
.
get_global_env
(
"dataset."
+
dataset
[
"name"
]
+
".type"
)
if
type
==
"DataLoader"
:
...
...
@@ -363,6 +363,7 @@ class CollectiveNetwork(NetworkBase):
def
build_network
(
self
,
context
):
context
[
"model"
]
=
{}
if
len
(
context
[
"env"
][
"phase"
])
>
1
:
print
(
"CollectiveNetwork phase:{}"
.
format
(
context
[
"env"
][
"phase"
]))
warnings
.
warn
(
"Cluster Train Only Support One Phase."
,
category
=
UserWarning
,
...
...
@@ -407,16 +408,17 @@ class CollectiveNetwork(NetworkBase):
context
[
"model"
][
model_dict
[
"name"
]][
"compiled_program"
]
=
None
context
[
"dataset"
]
=
{}
for
dataset
in
context
[
"env"
][
"dataset"
]:
type
=
envs
.
get_global_env
(
"dataset."
+
dataset
[
"name"
]
+
".type"
)
for
phase
in
context
[
"env"
][
"phase"
]:
type
=
envs
.
get_global_env
(
"dataset."
+
phase
[
"dataset_name"
]
+
".type"
)
if
type
==
"QueueDataset"
:
raise
ValueError
(
"Collective don't support QueueDataset training, please use DataLoader."
)
dataset_class
=
QueueDataset
(
context
)
context
[
"dataset"
][
dataset
[
"
name"
]]
=
dataset_class
.
create_dataset
(
dataset
[
"name"
],
context
)
context
[
"dataset"
][
phase
[
"
dataset_name"
]]
=
dataset_class
.
create_dataset
(
phase
[
"dataset_name"
],
context
)
context
[
"status"
]
=
"startup_pass"
def
_build_strategy
(
self
,
context
):
...
...
core/trainers/framework/runner.py
浏览文件 @
3b9c100b
...
...
@@ -436,9 +436,11 @@ class RunnerBase(object):
dirname
=
envs
.
get_global_env
(
name
+
"save_step_path"
,
None
)
if
dirname
is
None
or
dirname
==
""
:
return
dirname
=
os
.
path
.
join
(
dirname
,
str
(
batch_id
))
logging
.
info
(
"
\t
save batch_id:%d model into:
\"
%s
\"
"
%
(
batch_id
,
dirname
))
dirname
=
os
.
path
.
join
(
dirname
,
"epoch_"
+
str
(
context
[
"current_epoch"
])
+
"_batch_"
+
str
(
batch_id
))
logging
.
info
(
"
\t
save epoch_id:%d, batch_id:%d model into:
\"
%s
\"
"
%
(
context
[
"current_epoch"
],
batch_id
,
dirname
))
if
is_fleet
:
if
context
[
"fleet"
].
worker_index
()
==
0
:
context
[
"fleet"
].
save_persistables
(
context
[
"exe"
],
dirname
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录