Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
9735f250
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9735f250
编写于
6月 05, 2018
作者:
T
tangwei12
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimized
上级
bfdcf187
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
20 addition
and
32 deletion
+20
-32
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+17
-27
python/paddle/fluid/trainer.py
python/paddle/fluid/trainer.py
+3
-5
未找到文件。
python/paddle/fluid/io.py
浏览文件 @
9735f250
...
...
@@ -492,7 +492,7 @@ def save_checkpoint(executor,
if
not
os
.
path
.
isdir
(
checkpoint_dir
):
os
.
makedirs
(
checkpoint_dir
)
serial
=
_get_latest_checkpoint_dir
(
checkpoint_dir
)
+
1
serial
=
get_latest_checkpoint_serial
(
checkpoint_dir
)
+
1
cur_dir
=
_get_serial_dir
(
checkpoint_dir
,
serial
)
save_trainer_args
(
cur_dir
,
trainer_id
,
trainer_args
)
...
...
@@ -503,18 +503,6 @@ def save_checkpoint(executor,
_lru_delete
(
checkpoint_dir
,
max_num_checkpoints
)
def
get_latest_checkpoint_serial
(
checkpoint_dir
):
"""
If the directory have checkpoint files, it will return latest checkpoint directory serial number
:param checkpoint_dir
"""
serial
=
_get_latest_checkpoint_dir
(
checkpoint_dir
)
if
serial
<
0
:
return
None
return
serial
def
load_checkpoint
(
executor
,
checkpoint_dir
,
serial
,
main_program
):
"""
Load checkpoint from a directory by executor,
...
...
@@ -527,17 +515,16 @@ def load_checkpoint(executor, checkpoint_dir, serial, main_program):
"""
if
checkpoint_dir
is
None
:
raise
ValueError
(
"The values of 'checkpoint_dir' or 'serial' should not be None"
)
raise
ValueError
(
"The values of 'checkpoint_dir' should not be None"
)
if
serial
is
None
or
serial
<
0
:
raise
ValueError
(
"The values of 'serial' should not be None or <0 "
)
if
main_program
is
None
:
raise
ValueError
(
"The values of 'main_program'should not be None"
)
raise
ValueError
(
'main_program should not be None.'
)
cur_dir
=
_get_serial_dir
(
checkpoint_dir
,
serial
)
load_persist_vars_without_grad
(
executor
,
cur_dir
,
main_program
)
load_persist_vars_without_grad
(
executor
,
cur_dir
,
main_program
,
True
)
def
clean_checkpoint
(
checkpoint_dir
,
delete_dir
=
False
):
...
...
@@ -557,18 +544,21 @@ def clean_checkpoint(checkpoint_dir, delete_dir=False):
os
.
rmdir
(
checkpoint_dir
)
def
load_persist_vars_without_grad
(
executor
,
dirname
,
program
,
nest
=
True
):
def
load_persist_vars_without_grad
(
executor
,
dirname
,
program
,
has_model_dir
=
False
):
"""
load_persist_vars_without_grad will load variables from a directory by an executor,
the variable named end with "@GRAD" will not be loaded.
:param executor
:param dirname
:param program
:param
nest
:param executor
executor for load the value
:param dirname
the checkpoint directory
:param program
will load all variables in program
:param
has_model_dir if has_model_dir is True, will load variables from sub directory named __model__
"""
if
nest
:
if
has_model_dir
:
dirname
=
_get_model_dir
(
dirname
)
load_vars
(
...
...
@@ -584,9 +574,9 @@ def save_persist_vars_without_grad(executor, dirname, program):
save_persist_vars_without_grad will save variables to a directory by an executor,
the variable named end with "@GRAD" will not be saved.
:param executor
:param dirname
:param program
:param executor
executor for load the value
:param dirname
the checkpoint directory
:param program
will load all variables in program
"""
cur_dir
=
_get_model_dir
(
dirname
)
save_vars
(
...
...
@@ -722,7 +712,7 @@ def _write_success(dirname):
f
.
write
(
now
)
def
_get_latest_checkpoint_dir
(
checkpoint_dir
):
def
get_latest_checkpoint_serial
(
checkpoint_dir
):
"""
get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory
...
...
python/paddle/fluid/trainer.py
浏览文件 @
9735f250
...
...
@@ -146,8 +146,9 @@ class Trainer(object):
"The checkpoint_config shoule be an instance of CheckpointConfig"
)
else
:
se
lf
.
checkpoint
.
load_se
rial
=
io
.
get_latest_checkpoint_serial
(
serial
=
io
.
get_latest_checkpoint_serial
(
self
.
checkpoint
.
checkpoint_dir
)
self
.
checkpoint
.
load_serial
=
serial
if
serial
>=
0
else
None
self
.
scope
=
core
.
Scope
()
...
...
@@ -194,10 +195,7 @@ class Trainer(object):
if
param_path
and
os
.
path
.
isdir
(
param_path
):
# load params from param_path into scope
io
.
load_persist_vars_without_grad
(
exe
,
dirname
=
param_path
,
program
=
self
.
startup_program
,
nest
=
False
)
exe
,
dirname
=
param_path
,
program
=
self
.
startup_program
)
def
_transpile_nccl2_dist
(
self
):
# PADDLE_TRAINER_IPS
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录