Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
229964e4
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
229964e4
编写于
9月 12, 2019
作者:
X
xiexionghang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add force-dump in startup
上级
19b00fb2
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
8 addition
and
6 deletion
+8
-6
paddle/fluid/train/custom_trainer/feed/process/learner_process.cc
...luid/train/custom_trainer/feed/process/learner_process.cc
+5
-4
paddle/fluid/train/custom_trainer/feed/process/learner_process.h
...fluid/train/custom_trainer/feed/process/learner_process.h
+3
-2
未找到文件。
paddle/fluid/train/custom_trainer/feed/process/learner_process.cc
浏览文件 @
229964e4
...
@@ -16,6 +16,7 @@ namespace feed {
...
@@ -16,6 +16,7 @@ namespace feed {
int
LearnerProcess
::
initialize
(
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
int
LearnerProcess
::
initialize
(
std
::
shared_ptr
<
TrainerContext
>
context_ptr
)
{
int
ret
=
Process
::
initialize
(
context_ptr
);
int
ret
=
Process
::
initialize
(
context_ptr
);
auto
&
config
=
_context_ptr
->
trainer_config
;
auto
&
config
=
_context_ptr
->
trainer_config
;
_startup_dump_inference_base
=
config
[
"startup_dump_inference_base"
].
as
<
bool
>
(
false
);
if
(
config
[
"executor"
])
{
if
(
config
[
"executor"
])
{
_executors
.
resize
(
config
[
"executor"
].
size
());
_executors
.
resize
(
config
[
"executor"
].
size
());
for
(
size_t
i
=
0
;
i
<
_executors
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
_executors
.
size
();
++
i
)
{
...
@@ -26,7 +27,7 @@ int LearnerProcess::initialize(std::shared_ptr<TrainerContext> context_ptr) {
...
@@ -26,7 +27,7 @@ int LearnerProcess::initialize(std::shared_ptr<TrainerContext> context_ptr) {
return
0
;
return
0
;
}
}
int
LearnerProcess
::
wait_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
way
)
{
int
LearnerProcess
::
wait_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
way
,
bool
is_force_dump
)
{
auto
fs
=
_context_ptr
->
file_system
;
auto
fs
=
_context_ptr
->
file_system
;
auto
*
ps_client
=
_context_ptr
->
pslib
->
ps_client
();
auto
*
ps_client
=
_context_ptr
->
pslib
->
ps_client
();
auto
*
environment
=
_context_ptr
->
environment
.
get
();
auto
*
environment
=
_context_ptr
->
environment
.
get
();
...
@@ -34,7 +35,7 @@ int LearnerProcess::wait_save_model(uint64_t epoch_id, ModelSaveWay way) {
...
@@ -34,7 +35,7 @@ int LearnerProcess::wait_save_model(uint64_t epoch_id, ModelSaveWay way) {
if
(
!
environment
->
is_master_node
(
EnvironmentRole
::
WORKER
))
{
if
(
!
environment
->
is_master_node
(
EnvironmentRole
::
WORKER
))
{
return
0
;
return
0
;
}
}
if
(
!
epoch_accessor
->
need_save_model
(
epoch_id
,
way
))
{
if
(
!
is_force_dump
&&
!
epoch_accessor
->
need_save_model
(
epoch_id
,
way
))
{
return
0
;
return
0
;
}
}
paddle
::
platform
::
Timer
timer
;
paddle
::
platform
::
Timer
timer
;
...
@@ -112,8 +113,8 @@ int LearnerProcess::run() {
...
@@ -112,8 +113,8 @@ int LearnerProcess::run() {
CHECK
(
load_model
(
epoch_id
)
==
0
);
CHECK
(
load_model
(
epoch_id
)
==
0
);
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
//判断是否先dump出base
//判断是否先dump出base
TODO
wait_save_model
(
epoch_id
,
ModelSaveWay
::
ModelSaveInferenceBase
);
wait_save_model
(
epoch_id
,
ModelSaveWay
::
ModelSaveInferenceBase
,
_startup_dump_inference_base
);
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
environment
->
barrier
(
EnvironmentRole
::
WORKER
);
while
(
true
)
{
while
(
true
)
{
...
...
paddle/fluid/train/custom_trainer/feed/process/learner_process.h
浏览文件 @
229964e4
...
@@ -20,10 +20,11 @@ public:
...
@@ -20,10 +20,11 @@ public:
protected:
protected:
// 加载所有模型
// 加载所有模型
virtual
int
load_model
(
uint64_t
epoch_id
);
virtual
int
load_model
(
uint64_t
epoch_id
);
// 同步保存所有模型
// 同步保存所有模型
, is_force_dump:不判断dump条件,强制dump出模型
virtual
int
wait_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
way
);
virtual
int
wait_save_model
(
uint64_t
epoch_id
,
ModelSaveWay
way
,
bool
is_force_dump
=
false
);
private:
private:
bool
_startup_dump_inference_base
;
//启动立即dump base
std
::
vector
<
std
::
shared_ptr
<
MultiThreadExecutor
>>
_executors
;
std
::
vector
<
std
::
shared_ptr
<
MultiThreadExecutor
>>
_executors
;
};
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录