Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
2aa4e664
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2aa4e664
编写于
9月 07, 2020
作者:
T
tangwei12
提交者:
GitHub
9月 07, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'master' into tdm_build_tree
上级
f81ecb77
c26b0e75
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
53 addition
and
5 deletion
+53
-5
core/model.py
core/model.py
+7
-0
core/trainers/framework/dataset.py
core/trainers/framework/dataset.py
+2
-0
core/trainers/framework/runner.py
core/trainers/framework/runner.py
+29
-2
core/utils/dataloader_instance.py
core/utils/dataloader_instance.py
+3
-3
core/utils/util.py
core/utils/util.py
+12
-0
未找到文件。
core/model.py
浏览文件 @
2aa4e664
...
...
@@ -177,6 +177,13 @@ class ModelBase(object):
opt_name
=
envs
.
get_global_env
(
"hyper_parameters.optimizer.class"
)
opt_lr
=
envs
.
get_global_env
(
"hyper_parameters.optimizer.learning_rate"
)
if
not
isinstance
(
opt_lr
,
(
float
,
Variable
)):
try
:
opt_lr
=
float
(
opt_lr
)
except
ValueError
:
raise
ValueError
(
"In your config yaml, 'learning_rate': %s must be written as a floating piont number,such as 0.001 or 1e-3"
%
opt_lr
)
opt_strategy
=
envs
.
get_global_env
(
"hyper_parameters.optimizer.strategy"
)
...
...
core/trainers/framework/dataset.py
浏览文件 @
2aa4e664
...
...
@@ -143,6 +143,8 @@ class QueueDataset(DatasetBase):
if
need_split_files
:
file_list
=
split_files
(
file_list
,
context
[
"fleet"
].
worker_index
(),
context
[
"fleet"
].
worker_num
())
context
[
"file_list"
]
=
file_list
print
(
"File_list: {}"
.
format
(
file_list
))
dataset
.
set_filelist
(
file_list
)
...
...
core/trainers/framework/runner.py
浏览文件 @
2aa4e664
...
...
@@ -18,10 +18,12 @@ import os
import
time
import
warnings
import
numpy
as
np
import
random
import
logging
import
paddle.fluid
as
fluid
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils.util
import
shuffle_files
from
paddlerec.core.metric
import
Metric
logging
.
basicConfig
(
...
...
@@ -92,7 +94,6 @@ class RunnerBase(object):
reader_name
=
model_dict
[
"dataset_name"
]
model_name
=
model_dict
[
"name"
]
model_class
=
context
[
"model"
][
model_dict
[
"name"
]][
"model"
]
fetch_vars
=
[]
fetch_alias
=
[]
fetch_period
=
int
(
...
...
@@ -395,7 +396,12 @@ class SingleRunner(RunnerBase):
for
model_dict
in
context
[
"phases"
]:
model_class
=
context
[
"model"
][
model_dict
[
"name"
]][
"model"
]
metrics
=
model_class
.
_metrics
if
"shuffle_filelist"
in
model_dict
:
need_shuffle_files
=
model_dict
.
get
(
"shuffle_filelist"
,
None
)
filelist
=
context
[
"file_list"
]
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
filelist
)
begin_time
=
time
.
time
()
result
=
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
...
...
@@ -439,6 +445,11 @@ class PSRunner(RunnerBase):
model_class
=
context
[
"model"
][
model_dict
[
"name"
]][
"model"
]
metrics
=
model_class
.
_metrics
for
epoch
in
range
(
epochs
):
if
"shuffle_filelist"
in
model_dict
:
need_shuffle_files
=
model_dict
.
get
(
"shuffle_filelist"
,
None
)
filelist
=
context
[
"file_list"
]
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
filelist
)
begin_time
=
time
.
time
()
result
=
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
...
...
@@ -484,6 +495,11 @@ class CollectiveRunner(RunnerBase):
".epochs"
))
model_dict
=
context
[
"env"
][
"phase"
][
0
]
for
epoch
in
range
(
epochs
):
if
"shuffle_filelist"
in
model_dict
:
need_shuffle_files
=
model_dict
.
get
(
"shuffle_filelist"
,
None
)
filelist
=
context
[
"file_list"
]
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
filelist
)
begin_time
=
time
.
time
()
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
...
...
@@ -512,6 +528,11 @@ class PslibRunner(RunnerBase):
envs
.
get_global_env
(
"runner."
+
context
[
"runner_name"
]
+
".epochs"
))
for
epoch
in
range
(
epochs
):
if
"shuffle_filelist"
in
model_dict
:
need_shuffle_files
=
model_dict
.
get
(
"shuffle_filelist"
,
None
)
filelist
=
context
[
"file_list"
]
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
filelist
)
begin_time
=
time
.
time
()
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
...
...
@@ -574,6 +595,12 @@ class SingleInferRunner(RunnerBase):
metrics
=
model_class
.
_infer_results
self
.
_load
(
context
,
model_dict
,
self
.
epoch_model_path_list
[
index
])
if
"shuffle_filelist"
in
model_dict
:
need_shuffle_files
=
model_dict
.
get
(
"shuffle_filelist"
,
None
)
filelist
=
context
[
"file_list"
]
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
filelist
)
begin_time
=
time
.
time
()
result
=
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
...
...
core/utils/dataloader_instance.py
浏览文件 @
2aa4e664
...
...
@@ -59,7 +59,7 @@ def dataloader_by_name(readerclass,
if
need_split_files
:
files
=
split_files
(
files
,
context
[
"fleet"
].
worker_index
(),
context
[
"fleet"
].
worker_num
())
context
[
"file_list"
]
=
files
reader
=
reader_class
(
yaml_file
)
reader
.
init
()
...
...
@@ -121,7 +121,7 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context):
if
need_split_files
:
files
=
split_files
(
files
,
context
[
"fleet"
].
worker_index
(),
context
[
"fleet"
].
worker_num
())
context
[
"file_list"
]
=
files
sparse
=
get_global_env
(
name
+
"sparse_slots"
,
"#"
)
if
sparse
==
""
:
sparse
=
"#"
...
...
@@ -191,7 +191,7 @@ def slotdataloader(readerclass, train, yaml_file, context):
if
need_split_files
:
files
=
split_files
(
files
,
context
[
"fleet"
].
worker_index
(),
context
[
"fleet"
].
worker_num
())
context
[
"file_list"
]
=
files
sparse
=
get_global_env
(
"sparse_slots"
,
"#"
,
namespace
)
if
sparse
==
""
:
sparse
=
"#"
...
...
core/utils/util.py
浏览文件 @
2aa4e664
...
...
@@ -16,6 +16,8 @@ import datetime
import
os
import
sys
import
time
import
warnings
import
random
import
numpy
as
np
from
paddle
import
fluid
...
...
@@ -223,6 +225,16 @@ def check_filelist(hidden_file_list, data_file_list, train_data_path):
return
hidden_file_list
,
data_file_list
def
shuffle_files
(
need_shuffle_files
,
filelist
):
if
not
isinstance
(
need_shuffle_files
,
bool
):
raise
ValueError
(
"In your config yaml, 'shuffle_filelist': %s must be written as a boolean type,such as True or False"
%
need_shuffle_files
)
elif
need_shuffle_files
:
random
.
shuffle
(
filelist
)
return
filelist
class
CostPrinter
(
object
):
"""
For count cost time && print cost log
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录