Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
ca1c4695
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ca1c4695
编写于
5月 28, 2020
作者:
X
xjqbest
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix
上级
07bd7092
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
42 addition
and
26 deletion
+42
-26
core/reader.py
core/reader.py
+0
-3
core/trainers/single_trainer.py
core/trainers/single_trainer.py
+29
-15
core/utils/dataloader_instance.py
core/utils/dataloader_instance.py
+3
-2
core/utils/envs.py
core/utils/envs.py
+2
-0
models/rank/dnn/model.py
models/rank/dnn/model.py
+6
-3
run.py
run.py
+2
-3
未找到文件。
core/reader.py
浏览文件 @
ca1c4695
...
...
@@ -35,7 +35,6 @@ class Reader(dg.MultiSlotDataGenerator):
else
:
raise
ValueError
(
"reader config only support yaml"
)
@
abc
.
abstractmethod
def
init
(
self
):
"""init"""
...
...
@@ -56,8 +55,6 @@ class SlotReader(dg.MultiSlotDataGenerator):
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
raise
ValueError
(
"reader config only support yaml"
)
#envs.set_global_envs(_config)
#envs.update_workspace()
def
init
(
self
,
sparse_slots
,
dense_slots
,
padding
=
0
):
from
operator
import
mul
...
...
core/trainers/single_trainer.py
浏览文件 @
ca1c4695
...
...
@@ -69,13 +69,14 @@ class SingleTrainer(TranspileTrainer):
reader
=
os
.
path
.
join
(
abs_dir
,
'../utils'
,
'dataset_instance.py'
)
if
sparse_slots
is
None
and
dense_slots
is
None
:
pipe_cmd
=
"python {} {} {} {}"
.
format
(
reader
,
reader_class
,
"TRAIN"
,
self
.
_config_yaml
)
pipe_cmd
=
"python {} {} {} {}"
.
format
(
reader
,
reader_class
,
"TRAIN"
,
self
.
_config_yaml
)
else
:
if
sparse_slots
is
None
:
sparse_slots
=
"#"
if
dense_slots
is
None
:
dense_slots
=
"#"
padding
=
envs
.
get_global_env
(
name
+
"padding"
,
0
)
padding
=
envs
.
get_global_env
(
name
+
"padding"
,
0
)
pipe_cmd
=
"python {} {} {} {} {} {} {} {}"
.
format
(
reader
,
"slot"
,
"slot"
,
self
.
_config_yaml
,
"fake"
,
\
sparse_slots
.
replace
(
" "
,
"#"
),
dense_slots
.
replace
(
" "
,
"#"
),
str
(
padding
))
...
...
@@ -145,19 +146,29 @@ class SingleTrainer(TranspileTrainer):
scope
=
fluid
.
Scope
()
dataset_name
=
model_dict
[
"dataset_name"
]
opt_name
=
envs
.
get_global_env
(
"hyper_parameters.optimizer.class"
)
opt_lr
=
envs
.
get_global_env
(
"hyper_parameters.optimizer.learning_rate"
)
opt_strategy
=
envs
.
get_global_env
(
"hyper_parameters.optimizer.strategy"
)
opt_lr
=
envs
.
get_global_env
(
"hyper_parameters.optimizer.learning_rate"
)
opt_strategy
=
envs
.
get_global_env
(
"hyper_parameters.optimizer.strategy"
)
with
fluid
.
program_guard
(
train_program
,
startup_program
):
with
fluid
.
unique_name
.
guard
():
with
fluid
.
scope_guard
(
scope
):
model_path
=
model_dict
[
"model"
].
replace
(
"{workspace}"
,
envs
.
path_adapter
(
self
.
_env
[
"workspace"
]))
model
=
envs
.
lazy_instance_by_fliename
(
model_path
,
"Model"
)(
self
.
_env
)
model
.
_data_var
=
model
.
input_data
(
dataset_name
=
model_dict
[
"dataset_name"
])
if
envs
.
get_global_env
(
"dataset."
+
dataset_name
+
".type"
)
==
"DataLoader"
:
model_path
=
model_dict
[
"model"
].
replace
(
"{workspace}"
,
envs
.
path_adapter
(
self
.
_env
[
"workspace"
]))
model
=
envs
.
lazy_instance_by_fliename
(
model_path
,
"Model"
)(
self
.
_env
)
model
.
_data_var
=
model
.
input_data
(
dataset_name
=
model_dict
[
"dataset_name"
])
if
envs
.
get_global_env
(
"dataset."
+
dataset_name
+
".type"
)
==
"DataLoader"
:
model
.
_init_dataloader
()
self
.
_get_dataloader
(
dataset_name
,
model
.
_data_loader
)
model
.
net
(
model
.
_data_var
,
is_infer
=
model_dict
.
get
(
"is_infer"
,
False
))
optimizer
=
model
.
_build_optimizer
(
opt_name
,
opt_lr
,
opt_strategy
)
self
.
_get_dataloader
(
dataset_name
,
model
.
_data_loader
)
model
.
net
(
model
.
_data_var
,
is_infer
=
model_dict
.
get
(
"is_infer"
,
False
))
optimizer
=
model
.
_build_optimizer
(
opt_name
,
opt_lr
,
opt_strategy
)
optimizer
.
minimize
(
model
.
_cost
)
self
.
_model
[
model_dict
[
"name"
]][
0
]
=
train_program
self
.
_model
[
model_dict
[
"name"
]][
1
]
=
startup_program
...
...
@@ -167,13 +178,14 @@ class SingleTrainer(TranspileTrainer):
for
dataset
in
self
.
_env
[
"dataset"
]:
if
dataset
[
"type"
]
!=
"DataLoader"
:
self
.
_dataset
[
dataset
[
"name"
]]
=
self
.
_create_dataset
(
dataset
[
"name"
])
self
.
_dataset
[
dataset
[
"name"
]]
=
self
.
_create_dataset
(
dataset
[
"name"
])
context
[
'status'
]
=
'startup_pass'
def
startup
(
self
,
context
):
for
model_dict
in
self
.
_env
[
"executor"
]:
with
fluid
.
scope_guard
(
self
.
_model
[
model_dict
[
"name"
]][
2
]):
with
fluid
.
scope_guard
(
self
.
_model
[
model_dict
[
"name"
]][
2
]):
self
.
_exe
.
run
(
self
.
_model
[
model_dict
[
"name"
]][
1
])
context
[
'status'
]
=
'train_pass'
...
...
@@ -289,7 +301,8 @@ class SingleTrainer(TranspileTrainer):
return
epoch_id
%
epoch_interval
==
0
def
save_inference_model
():
save_interval
=
envs
.
get_global_env
(
"epoch.save_inference_interval"
,
-
1
)
save_interval
=
int
(
envs
.
get_global_env
(
"epoch.save_inference_interval"
,
-
1
)
if
not
need_save
(
epoch_id
,
save_interval
,
False
):
return
feed_varnames
=
envs
.
get_global_env
(
"epoch.save_inference_feed_varnames"
,
None
)
...
...
@@ -313,7 +326,8 @@ class SingleTrainer(TranspileTrainer):
fetch_vars
,
self
.
_exe
)
def
save_persistables
():
save_interval
=
int
(
envs
.
get_global_env
(
"epoch.save_checkpoint_interval"
,
-
1
))
save_interval
=
int
(
envs
.
get_global_env
(
"epoch.save_checkpoint_interval"
,
-
1
))
if
not
need_save
(
epoch_id
,
save_interval
,
False
):
return
dirname
=
envs
.
get_global_env
(
"epoch.save_checkpoint_path"
,
None
)
...
...
core/utils/dataloader_instance.py
浏览文件 @
ca1c4695
...
...
@@ -19,6 +19,7 @@ from paddlerec.core.utils.envs import get_global_env
from
paddlerec.core.utils.envs
import
get_runtime_environ
from
paddlerec.core.reader
import
SlotReader
def
dataloader_by_name
(
readerclass
,
dataset_name
,
yaml_file
):
reader_class
=
lazy_instance_by_fliename
(
readerclass
,
"TrainReader"
)
name
=
"dataset."
+
dataset_name
+
"."
...
...
@@ -30,9 +31,9 @@ def dataloader_by_name(readerclass, dataset_name, yaml_file):
data_path
=
os
.
path
.
join
(
package_base
,
data_path
.
split
(
"::"
)[
1
])
files
=
[
str
(
data_path
)
+
"/%s"
%
x
for
x
in
os
.
listdir
(
data_path
)]
reader
=
reader_class
(
yaml_file
)
reader
.
init
()
def
gen_reader
():
for
file
in
files
:
with
open
(
file
,
'r'
)
as
f
:
...
...
@@ -67,7 +68,6 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file):
data_path
=
os
.
path
.
join
(
package_base
,
data_path
.
split
(
"::"
)[
1
])
files
=
[
str
(
data_path
)
+
"/%s"
%
x
for
x
in
os
.
listdir
(
data_path
)]
sparse
=
get_global_env
(
name
+
"sparse_slots"
)
dense
=
get_global_env
(
name
+
"dense_slots"
)
padding
=
get_global_env
(
name
+
"padding"
,
0
)
...
...
@@ -96,6 +96,7 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file):
return
gen_batch_reader
()
return
gen_reader
def
dataloader
(
readerclass
,
train
,
yaml_file
):
if
train
==
"TRAIN"
:
reader_name
=
"TrainReader"
...
...
core/utils/envs.py
浏览文件 @
ca1c4695
...
...
@@ -20,6 +20,7 @@ import sys
global_envs
=
{}
def
flatten_environs
(
envs
,
separator
=
"."
):
flatten_dict
=
{}
assert
isinstance
(
envs
,
dict
)
...
...
@@ -81,6 +82,7 @@ def set_global_envs(envs):
fatten_env_namespace
([],
envs
)
def
get_global_env
(
env_name
,
default_value
=
None
,
namespace
=
None
):
"""
get os environment value
...
...
models/rank/dnn/model.py
浏览文件 @
ca1c4695
...
...
@@ -27,9 +27,12 @@ class Model(ModelBase):
def
_init_hyper_parameters
(
self
):
self
.
is_distributed
=
True
if
envs
.
get_trainer
(
)
==
"CtrTrainer"
else
False
self
.
sparse_feature_number
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_number"
)
self
.
sparse_feature_dim
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
)
self
.
learning_rate
=
envs
.
get_global_env
(
"hyper_parameters.learning_rate"
)
self
.
sparse_feature_number
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_number"
)
self
.
sparse_feature_dim
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
)
self
.
learning_rate
=
envs
.
get_global_env
(
"hyper_parameters.learning_rate"
)
def
net
(
self
,
input
,
is_infer
=
False
):
self
.
sparse_inputs
=
self
.
_sparse_data_var
[
1
:]
...
...
run.py
浏览文件 @
ca1c4695
...
...
@@ -68,10 +68,8 @@ def get_engine(args):
if
engine
is
None
:
engine
=
run_extras
.
get
(
"epoch.trainer_class"
,
None
)
if
engine
is
None
:
engine
=
"single"
engine
=
"single"
engine
=
engine
.
upper
()
if
engine
not
in
engine_choices
:
raise
ValueError
(
"train.engin can not be chosen in {}"
.
format
(
engine_choices
))
...
...
@@ -135,6 +133,7 @@ def single_engine(args):
trainer
=
TrainerFactory
.
create
(
args
.
model
)
return
trainer
def
cluster_engine
(
args
):
def
update_workspace
(
cluster_envs
):
workspace
=
cluster_envs
.
get
(
"engine_workspace"
,
None
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录