Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
0432b71b
M
models
项目概览
PaddlePaddle
/
models
接近 2 年 前同步成功
通知
230
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0432b71b
编写于
3月 30, 2020
作者:
C
chengmo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update tdm sampler
上级
432abdc0
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
57 addition
and
29 deletion
+57
-29
PaddleRec/tdm/tdm_demo/args.py
PaddleRec/tdm/tdm_demo/args.py
+9
-5
PaddleRec/tdm/tdm_demo/data/train/demo_fake_train_data.txt
PaddleRec/tdm/tdm_demo/data/train/demo_fake_train_data.txt
+1
-1
PaddleRec/tdm/tdm_demo/local_train.py
PaddleRec/tdm/tdm_demo/local_train.py
+11
-11
PaddleRec/tdm/tdm_demo/train_network.py
PaddleRec/tdm/tdm_demo/train_network.py
+20
-10
PaddleRec/tdm/tdm_demo/utils.py
PaddleRec/tdm/tdm_demo/utils.py
+16
-2
未找到文件。
PaddleRec/tdm/tdm_demo/args.py
浏览文件 @
0432b71b
...
@@ -73,7 +73,7 @@ def parse_args():
...
@@ -73,7 +73,7 @@ def parse_args():
"whether to perform local training"
)
"whether to perform local training"
)
main_g
.
add_arg
(
"is_cloud"
,
bool
,
False
,
""
)
main_g
.
add_arg
(
"is_cloud"
,
bool
,
False
,
""
)
main_g
.
add_arg
(
"is_test"
,
bool
,
False
,
""
)
main_g
.
add_arg
(
"is_test"
,
bool
,
False
,
""
)
main_g
.
add_arg
(
"sync_mode"
,
str
,
"async"
,
"distributed traing mode"
)
main_g
.
add_arg
(
"sync_mode"
,
str
,
"async"
,
"distributed traing mode"
)
main_g
.
add_arg
(
"need_trace"
,
bool
,
False
,
""
)
main_g
.
add_arg
(
"need_trace"
,
bool
,
False
,
""
)
main_g
.
add_arg
(
"need_detail"
,
bool
,
False
,
""
)
main_g
.
add_arg
(
"need_detail"
,
bool
,
False
,
""
)
...
@@ -89,7 +89,13 @@ def parse_args():
...
@@ -89,7 +89,13 @@ def parse_args():
model_g
.
add_arg
(
"node_nums"
,
int
,
26
,
"tree node nums"
)
model_g
.
add_arg
(
"node_nums"
,
int
,
26
,
"tree node nums"
)
model_g
.
add_arg
(
"node_emb_size"
,
int
,
64
,
"node embedding size"
)
model_g
.
add_arg
(
"node_emb_size"
,
int
,
64
,
"node embedding size"
)
model_g
.
add_arg
(
"query_emb_size"
,
int
,
768
,
"input query embedding size"
)
model_g
.
add_arg
(
"query_emb_size"
,
int
,
768
,
"input query embedding size"
)
model_g
.
add_arg
(
"neg_sampling_list"
,
list
,
[
1
,
2
,
3
,
4
],
"nce sample nums at every layer"
)
model_g
.
add_arg
(
"neg_sampling_list"
,
list
,
[
1
,
2
,
3
,
4
],
"nce sample nums at every layer"
)
model_g
.
add_arg
(
"layer_node_num_list"
,
list
,
[
2
,
4
,
7
,
12
],
"node nums at every layer"
)
model_g
.
add_arg
(
"leaf_node_num"
,
int
,
13
,
"leaf node nums"
)
# for infer
model_g
.
add_arg
(
"child_nums"
,
int
,
2
,
"child node of ancestor node"
)
model_g
.
add_arg
(
"child_nums"
,
int
,
2
,
"child node of ancestor node"
)
model_g
.
add_arg
(
"topK"
,
int
,
2
,
"best recall result nums"
)
model_g
.
add_arg
(
"topK"
,
int
,
2
,
"best recall result nums"
)
...
@@ -99,10 +105,9 @@ def parse_args():
...
@@ -99,10 +105,9 @@ def parse_args():
model_g
.
add_arg
(
"test_files_path"
,
str
,
"./data/test"
,
"test data path"
)
model_g
.
add_arg
(
"test_files_path"
,
str
,
"./data/test"
,
"test data path"
)
model_g
.
add_arg
(
"model_files_path"
,
str
,
"./models"
,
"model data path"
)
model_g
.
add_arg
(
"model_files_path"
,
str
,
"./models"
,
"model data path"
)
# build tree and warm up
# build tree and warm up
model_g
.
add_arg
(
"build_tree_init_path"
,
str
,
model_g
.
add_arg
(
"build_tree_init_path"
,
str
,
"./data/gen_tree/demo_fake_input.txt"
,
"build tree embedding path"
)
"./data/gen_tree/demo_fake_input.txt"
,
"build tree embedding path"
)
model_g
.
add_arg
(
"warm-up"
,
bool
,
False
,
model_g
.
add_arg
(
"warm-up"
,
bool
,
False
,
"warm up, builing new tree."
)
"warm up, builing new tree."
)
model_g
.
add_arg
(
"rebuild_tree_per_epochs"
,
int
,
-
1
,
model_g
.
add_arg
(
"rebuild_tree_per_epochs"
,
int
,
-
1
,
...
@@ -135,4 +140,3 @@ def print_arguments(args):
...
@@ -135,4 +140,3 @@ def print_arguments(args):
for
arg
,
value
in
sorted
(
six
.
iteritems
(
vars
(
args
))):
for
arg
,
value
in
sorted
(
six
.
iteritems
(
vars
(
args
))):
print
(
'%s: %s'
%
(
arg
,
value
))
print
(
'%s: %s'
%
(
arg
,
value
))
print
(
'------------------------------------------------'
)
print
(
'------------------------------------------------'
)
PaddleRec/tdm/tdm_demo/data/train/demo_fake_train_data.txt
浏览文件 @
0432b71b
此差异已折叠。
点击以展开。
PaddleRec/tdm/tdm_demo/local_train.py
浏览文件 @
0432b71b
...
@@ -71,15 +71,6 @@ def run_train(args):
...
@@ -71,15 +71,6 @@ def run_train(args):
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exe
.
run
(
fluid
.
default_startup_program
())
# Set TDM_Tree Parameter
Numpy_model
=
{}
Numpy_model
[
'TDM_Tree_Travel'
]
=
tdm_model
.
travel_array
Numpy_model
[
'TDM_Tree_Layer'
]
=
tdm_model
.
layer_array
Numpy_model
[
'TDM_Tree_Info'
]
=
tdm_model
.
info_array
for
param_name
in
Numpy_model
:
param_t
=
fluid
.
global_scope
().
find_var
(
param_name
).
get_tensor
()
param_t
.
set
(
Numpy_model
[
str
(
param_name
)].
astype
(
'int32'
),
place
)
if
args
.
load_model
:
if
args
.
load_model
:
path
=
args
.
init_model_files_path
path
=
args
.
init_model_files_path
fluid
.
io
.
load_persistables
(
fluid
.
io
.
load_persistables
(
...
@@ -89,8 +80,17 @@ def run_train(args):
...
@@ -89,8 +80,17 @@ def run_train(args):
lr
=
fluid
.
global_scope
().
find_var
(
"learning_rate_0"
).
get_tensor
()
lr
=
fluid
.
global_scope
().
find_var
(
"learning_rate_0"
).
get_tensor
()
lr
.
set
(
np
.
array
(
args
.
learning_rate
).
astype
(
'float32'
),
place
)
lr
.
set
(
np
.
array
(
args
.
learning_rate
).
astype
(
'float32'
),
place
)
logger
.
info
(
"Load persistables from
\"
{}
\"
"
.
format
(
path
))
logger
.
info
(
"Load persistables from
\"
{}
\"
"
.
format
(
path
))
else
:
if
args
.
save_init_model
:
# Set TDM_Tree Parameter
Numpy_model
=
{}
Numpy_model
[
'TDM_Tree_Travel'
]
=
tdm_model
.
tdm_sampler_prepare_dict
[
'travel_array'
]
Numpy_model
[
'TDM_Tree_Layer'
]
=
tdm_model
.
tdm_sampler_prepare_dict
[
'layer_array'
]
Numpy_model
[
'TDM_Tree_Info'
]
=
tdm_model
.
info_array
for
param_name
in
Numpy_model
:
param_t
=
fluid
.
global_scope
().
find_var
(
param_name
).
get_tensor
()
param_t
.
set
(
Numpy_model
[
str
(
param_name
)].
astype
(
'int32'
),
place
)
if
args
.
save_init_model
or
not
args
.
load_model
:
logger
.
info
(
"Begin Save Init model."
)
logger
.
info
(
"Begin Save Init model."
)
model_path
=
os
.
path
.
join
(
args
.
model_files_path
,
"init_model"
)
model_path
=
os
.
path
.
join
(
args
.
model_files_path
,
"init_model"
)
fluid
.
io
.
save_persistables
(
executor
=
exe
,
dirname
=
model_path
)
fluid
.
io
.
save_persistables
(
executor
=
exe
,
dirname
=
model_path
)
...
...
PaddleRec/tdm/tdm_demo/train_network.py
浏览文件 @
0432b71b
...
@@ -32,13 +32,23 @@ class TdmTrainNet(object):
...
@@ -32,13 +32,23 @@ class TdmTrainNet(object):
self
.
max_layers
=
args
.
layer_size
self
.
max_layers
=
args
.
layer_size
self
.
neg_sampling_list
=
args
.
neg_sampling_list
self
.
neg_sampling_list
=
args
.
neg_sampling_list
self
.
output_positive
=
True
self
.
output_positive
=
True
self
.
travel_list
,
self
.
travel_array
,
self
.
layer_list
,
self
.
layer_array
=
tdm_sampler_prepare
(
args
)
self
.
info_list
,
self
.
info_array
=
tdm_child_prepare
(
args
)
self
.
need_trace
=
args
.
need_trace
self
.
need_trace
=
args
.
need_trace
self
.
need_detail
=
args
.
need_detail
self
.
need_detail
=
args
.
need_detail
if
not
args
.
load_model
:
self
.
tdm_sampler_prepare_dict
=
tdm_sampler_prepare
(
args
)
print
(
"--Layer node num list--: {}"
.
format
(
self
.
tdm_sampler_prepare_dict
[
'layer_node_num_list'
]))
self
.
layer_node_num_list
=
self
.
tdm_sampler_prepare_dict
[
'layer_node_num_list'
]
print
(
"--leaf node num--: {}"
.
format
(
self
.
tdm_sampler_prepare_dict
[
'leaf_node_num'
]))
self
.
leaf_node_num
=
self
.
tdm_sampler_prepare_dict
[
'leaf_node_num'
]
self
.
info_array
=
tdm_child_prepare
(
args
)
else
:
self
.
layer_node_num_list
=
args
.
layer_node_num_list
self
.
leaf_node_num
=
args
.
leaf_node_num
self
.
get_tree_info
(
args
)
self
.
get_tree_info
(
args
)
self
.
input_trans_layer
=
InputTransNet
(
args
)
self
.
input_trans_layer
=
InputTransNet
(
args
)
self
.
layer_classifier
=
DnnLayerClassifierNet
(
args
)
self
.
layer_classifier
=
DnnLayerClassifierNet
(
args
)
...
@@ -76,19 +86,19 @@ class TdmTrainNet(object):
...
@@ -76,19 +86,19 @@ class TdmTrainNet(object):
trace_var
(
item_label
,
"[TDM][inputs]"
,
trace_var
(
item_label
,
"[TDM][inputs]"
,
"item_label"
,
self
.
need_trace
,
self
.
need_detail
)
"item_label"
,
self
.
need_trace
,
self
.
need_detail
)
sample_nodes
,
sample_label
,
sample_mask
=
fluid
.
layers
.
tdm_sampler
(
sample_nodes
,
sample_label
,
sample_mask
=
fluid
.
contrib
.
layers
.
tdm_sampler
(
input
=
item_label
,
x
=
item_label
,
neg_samples_num_list
=
self
.
neg_sampling_list
,
neg_samples_num_list
=
self
.
neg_sampling_list
,
tree_travel_list
=
self
.
travel
_list
,
layer_node_num_list
=
self
.
layer_node_num
_list
,
tree_layer_list
=
self
.
layer_list
,
leaf_node_num
=
self
.
leaf_node_num
,
tree_travel_attr
=
fluid
.
ParamAttr
(
name
=
"TDM_Tree_Travel"
),
tree_travel_attr
=
fluid
.
ParamAttr
(
name
=
"TDM_Tree_Travel"
),
tree_layer_attr
=
fluid
.
ParamAttr
(
name
=
"TDM_Tree_Layer"
),
tree_layer_attr
=
fluid
.
ParamAttr
(
name
=
"TDM_Tree_Layer"
),
output_labels
=
True
,
output_positive
=
self
.
output_positive
,
output_positive
=
self
.
output_positive
,
output_list
=
True
,
output_list
=
True
,
seed
=
0
,
seed
=
0
,
dtype
=
'int
64
'
dtype
=
'int
32
'
)
)
trace_var
(
sample_nodes
,
"[TDM][tdm_sample]"
,
trace_var
(
sample_nodes
,
"[TDM][tdm_sample]"
,
"sample_nodes"
,
self
.
need_trace
,
self
.
need_detail
)
"sample_nodes"
,
self
.
need_trace
,
self
.
need_detail
)
trace_var
(
sample_label
,
"[TDM][tdm_sample]"
,
trace_var
(
sample_label
,
"[TDM][tdm_sample]"
,
...
...
PaddleRec/tdm/tdm_demo/utils.py
浏览文件 @
0432b71b
...
@@ -59,18 +59,32 @@ def read_layer_list(path):
...
@@ -59,18 +59,32 @@ def read_layer_list(path):
def
tdm_sampler_prepare
(
args
):
def
tdm_sampler_prepare
(
args
):
"""load tdm tree param from list file"""
"""load tdm tree param from list file"""
prepare_dict
=
{}
travel_list
=
read_list
(
args
.
tree_travel_init_path
)
travel_list
=
read_list
(
args
.
tree_travel_init_path
)
travel_array
=
np
.
array
(
travel_list
)
travel_array
=
np
.
array
(
travel_list
)
prepare_dict
[
'travel_array'
]
=
travel_array
leaf_num
=
len
(
travel_list
)
prepare_dict
[
'leaf_node_num'
]
=
leaf_num
layer_list
,
layer_array
=
read_layer_list
(
args
.
tree_layer_init_path
)
layer_list
,
layer_array
=
read_layer_list
(
args
.
tree_layer_init_path
)
return
[
travel_list
,
travel_array
,
layer_list
,
layer_array
]
prepare_dict
[
'layer_array'
]
=
layer_array
layer_node_num_list
=
[
len
(
i
)
for
i
in
layer_list
]
prepare_dict
[
'layer_node_num_list'
]
=
layer_node_num_list
node_num
=
int
(
np
.
sum
(
layer_node_num_list
))
prepare_dict
[
'node_num'
]
=
node_num
return
prepare_dict
def
tdm_child_prepare
(
args
):
def
tdm_child_prepare
(
args
):
"""load tdm tree param from list file"""
"""load tdm tree param from list file"""
info_list
=
read_list
(
args
.
tree_info_init_path
)
info_list
=
read_list
(
args
.
tree_info_init_path
)
info_array
=
np
.
array
(
info_list
)
info_array
=
np
.
array
(
info_list
)
return
info_
list
,
info_
array
return
info_array
def
trace_var
(
var
,
msg_prefix
,
var_name
,
need_trace
=
False
,
need_detail
=
False
):
def
trace_var
(
var
,
msg_prefix
,
var_name
,
need_trace
=
False
,
need_detail
=
False
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录