Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
c34bd511
M
models
项目概览
PaddlePaddle
/
models
1 年多 前同步成功
通知
226
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c34bd511
编写于
1月 10, 2019
作者:
Z
zhangwenhui03
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix style
上级
8f43b4fa
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
193 addition
and
121 deletion
+193
-121
fluid/PaddleRec/gru4rec/README.md
fluid/PaddleRec/gru4rec/README.md
+12
-7
fluid/PaddleRec/gru4rec/infer_sample_neg.py
fluid/PaddleRec/gru4rec/infer_sample_neg.py
+1
-1
fluid/PaddleRec/gru4rec/net.py
fluid/PaddleRec/gru4rec/net.py
+171
-5
fluid/PaddleRec/gru4rec/net_bpr.py
fluid/PaddleRec/gru4rec/net_bpr.py
+0
-103
fluid/PaddleRec/gru4rec/train.py
fluid/PaddleRec/gru4rec/train.py
+1
-2
fluid/PaddleRec/gru4rec/train_sample_neg.py
fluid/PaddleRec/gru4rec/train_sample_neg.py
+8
-3
未找到文件。
fluid/PaddleRec/gru4rec/README.md
浏览文件 @
c34bd511
...
...
@@ -5,12 +5,11 @@
```
text
.
├── README.md # 文档
├── train.py # 训练脚本 cross-entropy loss
├── train_bpr.py # 训练脚本 bpr loss
├── infer.py # 预测脚本 cross-entropy loss
├── infer_bpr.py # 预测脚本 bpr loss
├── net.py # 网络结构 cross-entropy loss
├── net_bpr.py # 网络结构 bpr loss
├── train.py # 训练脚本 全词表 cross-entropy
├── train_sample_neg.py # 训练脚本 sample负例 包含bpr loss 和cross-entropy
├── infer.py # 预测脚本 全词表
├── infer_sample_neg.py # 预测脚本 sample负例
├── net.py # 网络结构
├── text2paddle.py # 文本数据转paddle数据
├── cluster_train.py # 多机训练
├── cluster_train.sh # 多机训练脚本
...
...
@@ -33,6 +32,9 @@ GRU4REC模型的介绍可以参阅论文[Session-based Recommendations with Recu
session-based推荐应用场景非常广泛,比如用户的商品浏览、新闻点击、地点签到等序列数据。
支持三种形式的损失函数, 分别是全词表的cross-entropy, 采负样本的Bayesian Pairwise Ranking和采负样本的Cross-entropy.
运行样例程序可跳过'RSC15 数据下载及预处理'部分
## RSC15 数据下载及预处理
...
...
@@ -129,7 +131,10 @@ CPU 环境
python train.py --train_dir train_data/
```
bayesian pairwise ranking loss(bpr loss) 训练和cross-entropy的格式一样。
bayesian pairwise ranking loss(bpr loss) 训练
```
CUDA_VISIBLE_DEVICES=0 python train_sample_neg.py --loss bpr --use_cuda 1
```
请注意CPU环境下运行单机多卡任务(--parallel 1)时,batch_size应大于cpu核数。
...
...
fluid/PaddleRec/gru4rec/infer_
bpr
.py
→
fluid/PaddleRec/gru4rec/infer_
sample_neg
.py
浏览文件 @
c34bd511
...
...
@@ -8,7 +8,7 @@ import numpy as np
import
six
import
paddle.fluid
as
fluid
import
paddle
import
net
_bpr
as
net
import
net
import
utils
...
...
fluid/PaddleRec/gru4rec/net.py
浏览文件 @
c34bd511
import
paddle.fluid
as
fluid
def
network
(
vocab_size
,
hid_size
=
100
,
init_low_bound
=-
0.04
,
init_high_bound
=
0.04
):
def
all_vocab_network
(
vocab_size
,
hid_size
=
100
,
init_low_bound
=-
0.04
,
init_high_bound
=
0.04
):
""" network definition """
emb_lr_x
=
10.0
gru_lr_x
=
1.0
...
...
@@ -43,8 +44,173 @@ def network(vocab_size,
initializer
=
fluid
.
initializer
.
Uniform
(
low
=
init_low_bound
,
high
=
init_high_bound
),
learning_rate
=
fc_lr_x
))
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
fc
,
label
=
dst_wordseq
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
fc
,
label
=
dst_wordseq
,
k
=
20
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
return
src_wordseq
,
dst_wordseq
,
avg_cost
,
acc
def
train_bpr_network
(
vocab_size
,
neg_size
,
hid_size
,
drop_out
=
0.2
):
""" network definition """
emb_lr_x
=
1.0
gru_lr_x
=
1.0
fc_lr_x
=
1.0
# Input data
src
=
fluid
.
layers
.
data
(
name
=
"src"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
pos_label
=
fluid
.
layers
.
data
(
name
=
"pos_label"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
neg_size
+
1
],
dtype
=
"int64"
,
lod_level
=
1
)
emb_src
=
fluid
.
layers
.
embedding
(
input
=
src
,
size
=
[
vocab_size
,
hid_size
],
param_attr
=
fluid
.
ParamAttr
(
name
=
"emb"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
emb_lr_x
))
emb_src_drop
=
fluid
.
layers
.
dropout
(
emb_src
,
dropout_prob
=
drop_out
)
fc0
=
fluid
.
layers
.
fc
(
input
=
emb_src_drop
,
size
=
hid_size
*
3
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"gru_fc"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
gru_lr_x
),
bias_attr
=
False
)
gru_h0
=
fluid
.
layers
.
dynamic_gru
(
input
=
fc0
,
size
=
hid_size
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"dy_gru.param"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
gru_lr_x
),
bias_attr
=
"dy_gru.bias"
)
gru_h0_drop
=
fluid
.
layers
.
dropout
(
gru_h0
,
dropout_prob
=
drop_out
)
label_re
=
fluid
.
layers
.
sequence_reshape
(
input
=
label
,
new_dim
=
1
)
emb_label
=
fluid
.
layers
.
embedding
(
input
=
label_re
,
size
=
[
vocab_size
,
hid_size
],
param_attr
=
fluid
.
ParamAttr
(
name
=
"emb"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
emb_lr_x
))
emb_label_drop
=
fluid
.
layers
.
dropout
(
emb_label
,
dropout_prob
=
drop_out
)
gru_exp
=
fluid
.
layers
.
expand
(
x
=
gru_h0_drop
,
expand_times
=
[
1
,
(
neg_size
+
1
)])
gru
=
fluid
.
layers
.
sequence_reshape
(
input
=
gru_exp
,
new_dim
=
hid_size
)
ele_mul
=
fluid
.
layers
.
elementwise_mul
(
emb_label_drop
,
gru
)
red_sum
=
fluid
.
layers
.
reduce_sum
(
input
=
ele_mul
,
dim
=
1
,
keep_dim
=
True
)
pre
=
fluid
.
layers
.
sequence_reshape
(
input
=
red_sum
,
new_dim
=
(
neg_size
+
1
))
cost
=
fluid
.
layers
.
bpr_loss
(
input
=
pre
,
label
=
pos_label
)
cost_sum
=
fluid
.
layers
.
reduce_sum
(
input
=
cost
)
return
src
,
pos_label
,
label
,
cost_sum
def
train_cross_entropy_network
(
vocab_size
,
neg_size
,
hid_size
,
drop_out
=
0.2
):
""" network definition """
emb_lr_x
=
1.0
gru_lr_x
=
1.0
fc_lr_x
=
1.0
# Input data
src
=
fluid
.
layers
.
data
(
name
=
"src"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
pos_label
=
fluid
.
layers
.
data
(
name
=
"pos_label"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
neg_size
+
1
],
dtype
=
"int64"
,
lod_level
=
1
)
emb_src
=
fluid
.
layers
.
embedding
(
input
=
src
,
size
=
[
vocab_size
,
hid_size
],
param_attr
=
fluid
.
ParamAttr
(
name
=
"emb"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
emb_lr_x
))
emb_src_drop
=
fluid
.
layers
.
dropout
(
emb_src
,
dropout_prob
=
drop_out
)
fc0
=
fluid
.
layers
.
fc
(
input
=
emb_src_drop
,
size
=
hid_size
*
3
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"gru_fc"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
gru_lr_x
),
bias_attr
=
False
)
gru_h0
=
fluid
.
layers
.
dynamic_gru
(
input
=
fc0
,
size
=
hid_size
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"dy_gru.param"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
gru_lr_x
),
bias_attr
=
"dy_gru.bias"
)
gru_h0_drop
=
fluid
.
layers
.
dropout
(
gru_h0
,
dropout_prob
=
drop_out
)
label_re
=
fluid
.
layers
.
sequence_reshape
(
input
=
label
,
new_dim
=
1
)
emb_label
=
fluid
.
layers
.
embedding
(
input
=
label_re
,
size
=
[
vocab_size
,
hid_size
],
param_attr
=
fluid
.
ParamAttr
(
name
=
"emb"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
emb_lr_x
))
emb_label_drop
=
fluid
.
layers
.
dropout
(
emb_label
,
dropout_prob
=
drop_out
)
gru_exp
=
fluid
.
layers
.
expand
(
x
=
gru_h0_drop
,
expand_times
=
[
1
,
(
neg_size
+
1
)])
gru
=
fluid
.
layers
.
sequence_reshape
(
input
=
gru_exp
,
new_dim
=
hid_size
)
ele_mul
=
fluid
.
layers
.
elementwise_mul
(
emb_label_drop
,
gru
)
red_sum
=
fluid
.
layers
.
reduce_sum
(
input
=
ele_mul
,
dim
=
1
,
keep_dim
=
True
)
pre
=
fluid
.
layers
.
sequence_reshape
(
input
=
red_sum
,
new_dim
=
(
neg_size
+
1
))
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
pre
,
label
=
pos_label
)
cost_sum
=
fluid
.
layers
.
reduce_sum
(
input
=
cost
)
return
src
,
pos_label
,
label
,
cost_sum
def
infer_bpr_network
(
vocab_size
,
batch_size
,
hid_size
,
dropout
=
0.2
):
src
=
fluid
.
layers
.
data
(
name
=
"src"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
emb_src
=
fluid
.
layers
.
embedding
(
input
=
src
,
size
=
[
vocab_size
,
hid_size
],
param_attr
=
"emb"
)
emb_src_drop
=
fluid
.
layers
.
dropout
(
emb_src
,
dropout_prob
=
dropout
,
is_test
=
True
)
fc0
=
fluid
.
layers
.
fc
(
input
=
emb_src_drop
,
size
=
hid_size
*
3
,
param_attr
=
"gru_fc"
,
bias_attr
=
False
)
gru_h0
=
fluid
.
layers
.
dynamic_gru
(
input
=
fc0
,
size
=
hid_size
,
param_attr
=
"dy_gru.param"
,
bias_attr
=
"dy_gru.bias"
)
gru_h0_drop
=
fluid
.
layers
.
dropout
(
gru_h0
,
dropout_prob
=
dropout
,
is_test
=
True
)
all_label
=
fluid
.
layers
.
data
(
name
=
"all_label"
,
shape
=
[
vocab_size
,
1
],
dtype
=
"int64"
,
append_batch_size
=
False
)
emb_all_label
=
fluid
.
layers
.
embedding
(
input
=
all_label
,
size
=
[
vocab_size
,
hid_size
],
param_attr
=
"emb"
)
emb_all_label_drop
=
fluid
.
layers
.
dropout
(
emb_all_label
,
dropout_prob
=
dropout
,
is_test
=
True
)
all_pre
=
fluid
.
layers
.
matmul
(
gru_h0_drop
,
emb_all_label_drop
,
transpose_y
=
True
)
pos_label
=
fluid
.
layers
.
data
(
name
=
"pos_label"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
all_pre
,
label
=
pos_label
,
k
=
20
)
return
acc
fluid/PaddleRec/gru4rec/net_bpr.py
已删除
100644 → 0
浏览文件 @
8f43b4fa
import
paddle.fluid
as
fluid
def
train_network
(
vocab_size
,
neg_size
,
hid_size
,
drop_out
=
0.2
):
""" network definition """
emb_lr_x
=
1.0
gru_lr_x
=
1.0
fc_lr_x
=
1.0
# Input data
src
=
fluid
.
layers
.
data
(
name
=
"src"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
pos_label
=
fluid
.
layers
.
data
(
name
=
"pos_label"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
neg_size
+
1
],
dtype
=
"int64"
,
lod_level
=
1
)
emb_src
=
fluid
.
layers
.
embedding
(
input
=
src
,
size
=
[
vocab_size
,
hid_size
],
param_attr
=
fluid
.
ParamAttr
(
name
=
"emb"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
emb_lr_x
))
emb_src_drop
=
fluid
.
layers
.
dropout
(
emb_src
,
dropout_prob
=
drop_out
)
fc0
=
fluid
.
layers
.
fc
(
input
=
emb_src_drop
,
size
=
hid_size
*
3
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"gru_fc"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
gru_lr_x
),
bias_attr
=
False
)
gru_h0
=
fluid
.
layers
.
dynamic_gru
(
input
=
fc0
,
size
=
hid_size
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"dy_gru.param"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
gru_lr_x
),
bias_attr
=
"dy_gru.bias"
)
gru_h0_drop
=
fluid
.
layers
.
dropout
(
gru_h0
,
dropout_prob
=
drop_out
)
label_re
=
fluid
.
layers
.
sequence_reshape
(
input
=
label
,
new_dim
=
1
)
emb_label
=
fluid
.
layers
.
embedding
(
input
=
label_re
,
size
=
[
vocab_size
,
hid_size
],
param_attr
=
fluid
.
ParamAttr
(
name
=
"emb"
,
initializer
=
fluid
.
initializer
.
XavierInitializer
(),
learning_rate
=
emb_lr_x
))
emb_label_drop
=
fluid
.
layers
.
dropout
(
emb_label
,
dropout_prob
=
drop_out
)
gru_exp
=
fluid
.
layers
.
expand
(
x
=
gru_h0_drop
,
expand_times
=
[
1
,
(
neg_size
+
1
)])
gru
=
fluid
.
layers
.
sequence_reshape
(
input
=
gru_exp
,
new_dim
=
hid_size
)
ele_mul
=
fluid
.
layers
.
elementwise_mul
(
emb_label_drop
,
gru
)
red_sum
=
fluid
.
layers
.
reduce_sum
(
input
=
ele_mul
,
dim
=
1
,
keep_dim
=
True
)
pre
=
fluid
.
layers
.
sequence_reshape
(
input
=
red_sum
,
new_dim
=
(
neg_size
+
1
))
cost
=
fluid
.
layers
.
bpr_loss
(
input
=
pre
,
label
=
pos_label
)
cost_sum
=
fluid
.
layers
.
reduce_sum
(
input
=
cost
)
return
src
,
pos_label
,
label
,
cost_sum
def
infer_network
(
vocab_size
,
batch_size
,
hid_size
,
dropout
=
0.2
):
src
=
fluid
.
layers
.
data
(
name
=
"src"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
emb_src
=
fluid
.
layers
.
embedding
(
input
=
src
,
size
=
[
vocab_size
,
hid_size
],
param_attr
=
"emb"
)
emb_src_drop
=
fluid
.
layers
.
dropout
(
emb_src
,
dropout_prob
=
dropout
,
is_test
=
True
)
fc0
=
fluid
.
layers
.
fc
(
input
=
emb_src_drop
,
size
=
hid_size
*
3
,
param_attr
=
"gru_fc"
,
bias_attr
=
False
)
gru_h0
=
fluid
.
layers
.
dynamic_gru
(
input
=
fc0
,
size
=
hid_size
,
param_attr
=
"dy_gru.param"
,
bias_attr
=
"dy_gru.bias"
)
gru_h0_drop
=
fluid
.
layers
.
dropout
(
gru_h0
,
dropout_prob
=
dropout
,
is_test
=
True
)
all_label
=
fluid
.
layers
.
data
(
name
=
"all_label"
,
shape
=
[
vocab_size
,
1
],
dtype
=
"int64"
,
append_batch_size
=
False
)
emb_all_label
=
fluid
.
layers
.
embedding
(
input
=
all_label
,
size
=
[
vocab_size
,
hid_size
],
param_attr
=
"emb"
)
emb_all_label_drop
=
fluid
.
layers
.
dropout
(
emb_all_label
,
dropout_prob
=
dropout
,
is_test
=
True
)
all_pre
=
fluid
.
layers
.
matmul
(
gru_h0_drop
,
emb_all_label_drop
,
transpose_y
=
True
)
pos_label
=
fluid
.
layers
.
data
(
name
=
"pos_label"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
all_pre
,
label
=
pos_label
,
k
=
20
)
return
acc
fluid/PaddleRec/gru4rec/train.py
浏览文件 @
c34bd511
...
...
@@ -63,7 +63,7 @@ def train():
buffer_size
=
1000
,
word_freq_threshold
=
0
,
is_train
=
True
)
# Train program
src_wordseq
,
dst_wordseq
,
avg_cost
,
acc
=
net
.
network
(
src_wordseq
,
dst_wordseq
,
avg_cost
,
acc
=
net
.
all_vocab_
network
(
vocab_size
=
vocab_size
,
hid_size
=
hid_size
)
# Optimization to minimize lost
...
...
@@ -117,7 +117,6 @@ def train():
fetch_vars
=
[
avg_cost
,
acc
]
fluid
.
io
.
save_inference_model
(
save_dir
,
feed_var_names
,
fetch_vars
,
exe
)
print
(
"model saved in %s"
%
save_dir
)
#exe.close()
print
(
"finish training"
)
...
...
fluid/PaddleRec/gru4rec/train_
bpr
.py
→
fluid/PaddleRec/gru4rec/train_
sample_neg
.py
浏览文件 @
c34bd511
...
...
@@ -9,7 +9,7 @@ import paddle.fluid as fluid
import
paddle
import
time
import
utils
import
net
_bpr
as
net
import
net
SEED
=
102
...
...
@@ -26,6 +26,7 @@ def parse_args():
'--hid_size'
,
type
=
int
,
default
=
100
,
help
=
'hidden-dim size'
)
parser
.
add_argument
(
'--neg_size'
,
type
=
int
,
default
=
10
,
help
=
'neg item size'
)
parser
.
add_argument
(
'--loss'
,
type
=
str
,
default
=
"bpr"
,
help
=
'loss fuction'
)
parser
.
add_argument
(
'--model_dir'
,
type
=
str
,
default
=
'model_bpr_recall20'
,
help
=
'model dir'
)
parser
.
add_argument
(
...
...
@@ -65,8 +66,12 @@ def train():
buffer_size
=
1000
,
word_freq_threshold
=
0
,
is_train
=
True
)
# Train program
src
,
pos_label
,
label
,
avg_cost
=
net
.
train_network
(
neg_size
=
args
.
neg_size
,
vocab_size
=
vocab_size
,
hid_size
=
hid_size
)
if
args
.
loss
==
'bpr'
:
src
,
pos_label
,
label
,
avg_cost
=
net
.
train_bpr_network
(
neg_size
=
args
.
neg_size
,
vocab_size
=
vocab_size
,
hid_size
=
hid_size
)
else
:
src
,
pos_label
,
label
,
avg_cost
=
net
.
train_cross_entropy_network
(
neg_size
=
args
.
neg_size
,
vocab_size
=
vocab_size
,
hid_size
=
hid_size
)
# Optimization to minimize lost
sgd_optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
args
.
base_lr
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录