Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Oneflow-Inc
OneFlow-Benchmark
提交
afec85f2
O
OneFlow-Benchmark
项目概览
Oneflow-Inc
/
OneFlow-Benchmark
上一次同步 接近 3 年
通知
1
Star
92
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
OneFlow-Benchmark
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
afec85f2
编写于
9月 28, 2021
作者:
O
ouyangyu
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add model save parameters
上级
f51c43e2
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
39 addition
and
9 deletion
+39
-9
Classification/cnns/config.py
Classification/cnns/config.py
+18
-0
Classification/cnns/of_cnn_train_val.py
Classification/cnns/of_cnn_train_val.py
+8
-3
Classification/cnns/util.py
Classification/cnns/util.py
+3
-3
LanguageModeling/BERT/config.py
LanguageModeling/BERT/config.py
+7
-0
LanguageModeling/BERT/run_pretraining.py
LanguageModeling/BERT/run_pretraining.py
+1
-1
LanguageModeling/BERT/util.py
LanguageModeling/BERT/util.py
+2
-2
未找到文件。
Classification/cnns/config.py
浏览文件 @
afec85f2
...
...
@@ -92,6 +92,24 @@ def get_parser(parser=None):
parser
.
add_argument
(
"--model_load_dir"
,
type
=
str
,
default
=
None
,
help
=
"model load directory if need"
)
parser
.
add_argument
(
"--save_epoch_interval"
,
type
=
int
,
default
=
10
,
help
=
"Number of iterations between checkpoint saves."
,
)
parser
.
add_argument
(
"--save_last"
,
action
=
"store_true"
,
default
=
False
,
help
=
"save model snapshot for last iteration"
,
)
parser
.
add_argument
(
"--save_init"
,
action
=
"store_true"
,
default
=
False
,
help
=
"save model snapshot for inited"
,
)
parser
.
add_argument
(
"--batch_size_per_device"
,
type
=
int
,
default
=
64
)
parser
.
add_argument
(
"--val_batch_size_per_device"
,
type
=
int
,
default
=
8
)
...
...
Classification/cnns/of_cnn_train_val.py
浏览文件 @
afec85f2
...
...
@@ -67,6 +67,7 @@ if args.nccl_fusion_max_ops:
if
args
.
num_nodes
>
1
and
args
.
use_rdma
:
flow
.
config
.
use_rdma
(
True
)
def
label_smoothing
(
labels
,
classes
,
eta
,
dtype
):
assert
classes
>
0
assert
eta
>=
0.0
and
eta
<
1.0
...
...
@@ -132,11 +133,11 @@ def main():
InitNodes
(
args
)
flow
.
env
.
log_dir
(
args
.
log_dir
)
snapshot
=
Snapshot
(
args
.
model_save_dir
,
args
.
model_load_dir
)
snapshot
=
Snapshot
(
args
.
model_save_dir
,
args
.
model_load_dir
,
args
.
save_init
)
print
(
" {} iter per epoch..."
.
format
(
epoch_size
))
for
epoch
in
range
(
args
.
num_epochs
):
for
epoch
in
range
(
1
,
args
.
num_epochs
):
metric
=
Metric
(
desc
=
"train"
,
calculate_batches
=
args
.
loss_print_every_n_iter
,
...
...
@@ -154,7 +155,11 @@ def main():
)
for
i
in
range
(
num_val_steps
):
InferenceNet
().
async_get
(
metric
.
metric_cb
(
epoch
,
i
))
snapshot
.
save
(
"epoch_{}"
.
format
(
epoch
))
if
epoch
%
args
.
save_epoch_interval
==
0
:
snapshot
.
save
(
"epoch_{}"
.
format
(
epoch
))
if
args
.
save_last
:
snapshot
.
save
(
"epoch_{}"
.
format
(
"last"
))
if
__name__
==
"__main__"
:
...
...
Classification/cnns/util.py
浏览文件 @
afec85f2
...
...
@@ -36,14 +36,14 @@ def InitNodes(args):
class
Snapshot
(
object
):
def
__init__
(
self
,
model_save_dir
,
model_load_dir
):
def
__init__
(
self
,
model_save_dir
,
model_load_dir
,
save_init
=
False
):
self
.
_model_save_dir
=
model_save_dir
if
model_load_dir
:
assert
os
.
path
.
isdir
(
model_load_dir
)
print
(
"Restoring model from {}."
.
format
(
model_load_dir
))
flow
.
load_variables
(
flow
.
checkpoint
.
get
(
model_load_dir
))
el
se
:
#
flow.checkpoint.save("initial_model")
el
if
save_init
:
flow
.
checkpoint
.
save
(
"initial_model"
)
print
(
"Init model on demand."
)
def
save
(
self
,
name
):
...
...
LanguageModeling/BERT/config.py
浏览文件 @
afec85f2
...
...
@@ -121,6 +121,13 @@ def get_parser(parser=None):
required
=
False
,
help
=
"model save directory"
,
)
parser
.
add_argument
(
"--model_save_init"
,
action
=
"store_true"
,
default
=
False
,
help
=
"save model snapshot for inited"
,
)
parser
.
add_argument
(
"--save_last_snapshot"
,
type
=
str2bool
,
...
...
LanguageModeling/BERT/run_pretraining.py
浏览文件 @
afec85f2
...
...
@@ -124,7 +124,7 @@ def main():
InitNodes
(
args
)
snapshot
=
Snapshot
(
args
.
model_save_dir
,
args
.
model_load_dir
)
snapshot
=
Snapshot
(
args
.
model_save_dir
,
args
.
model_load_dir
,
args
.
model_save_init
)
print
(
"num_accumulation_steps:"
,
args
.
num_accumulation_steps
)
metric
=
Metric
(
...
...
LanguageModeling/BERT/util.py
浏览文件 @
afec85f2
...
...
@@ -37,13 +37,13 @@ def InitNodes(args):
class
Snapshot
(
object
):
def
__init__
(
self
,
model_save_dir
,
model_load_dir
):
def
__init__
(
self
,
model_save_dir
,
model_load_dir
,
model_save_init
=
False
):
self
.
_model_save_dir
=
model_save_dir
if
model_load_dir
:
assert
os
.
path
.
isdir
(
model_load_dir
)
print
(
"Restoring model from {}."
.
format
(
model_load_dir
))
flow
.
load_variables
(
flow
.
checkpoint
.
get
(
model_load_dir
))
el
se
:
el
if
model_save_init
:
flow
.
checkpoint
.
save
(
"initial_model"
)
print
(
"Init model on demand."
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录