提交 c34bd511 编写于 作者: Z zhangwenhui03

fix style

上级 8f43b4fa
...@@ -5,12 +5,11 @@ ...@@ -5,12 +5,11 @@
```text ```text
. .
├── README.md # 文档 ├── README.md # 文档
├── train.py # 训练脚本 cross-entropy loss ├── train.py # 训练脚本 全词表 cross-entropy
├── train_bpr.py # 训练脚本 bpr loss ├── train_sample_neg.py # 训练脚本 sample负例 包含bpr loss 和cross-entropy
├── infer.py # 预测脚本 cross-entropy loss ├── infer.py # 预测脚本 全词表
├── infer_bpr.py # 预测脚本 bpr loss ├── infer_sample_neg.py # 预测脚本 sample负例
├── net.py # 网络结构 cross-entropy loss ├── net.py # 网络结构
├── net_bpr.py # 网络结构 bpr loss
├── text2paddle.py # 文本数据转paddle数据 ├── text2paddle.py # 文本数据转paddle数据
├── cluster_train.py # 多机训练 ├── cluster_train.py # 多机训练
├── cluster_train.sh # 多机训练脚本 ├── cluster_train.sh # 多机训练脚本
...@@ -33,6 +32,9 @@ GRU4REC模型的介绍可以参阅论文[Session-based Recommendations with Recu ...@@ -33,6 +32,9 @@ GRU4REC模型的介绍可以参阅论文[Session-based Recommendations with Recu
session-based推荐应用场景非常广泛,比如用户的商品浏览、新闻点击、地点签到等序列数据。 session-based推荐应用场景非常广泛,比如用户的商品浏览、新闻点击、地点签到等序列数据。
支持三种形式的损失函数, 分别是全词表的cross-entropy, 采负样本的Bayesian Pairwise Ranking和采负样本的Cross-entropy.
运行样例程序可跳过'RSC15 数据下载及预处理'部分 运行样例程序可跳过'RSC15 数据下载及预处理'部分
## RSC15 数据下载及预处理 ## RSC15 数据下载及预处理
...@@ -129,7 +131,10 @@ CPU 环境 ...@@ -129,7 +131,10 @@ CPU 环境
python train.py --train_dir train_data/ python train.py --train_dir train_data/
``` ```
bayesian pairwise ranking loss(bpr loss) 训练和cross-entropy的格式一样。 bayesian pairwise ranking loss(bpr loss) 训练
```
CUDA_VISIBLE_DEVICES=0 python train_sample_neg.py --loss bpr --use_cuda 1
```
请注意CPU环境下运行单机多卡任务(--parallel 1)时,batch_size应大于cpu核数。 请注意CPU环境下运行单机多卡任务(--parallel 1)时,batch_size应大于cpu核数。
......
...@@ -8,7 +8,7 @@ import numpy as np ...@@ -8,7 +8,7 @@ import numpy as np
import six import six
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle import paddle
import net_bpr as net import net
import utils import utils
......
import paddle.fluid as fluid import paddle.fluid as fluid
def network(vocab_size,
hid_size=100, def all_vocab_network(vocab_size,
init_low_bound=-0.04, hid_size=100,
init_high_bound=0.04): init_low_bound=-0.04,
init_high_bound=0.04):
""" network definition """ """ network definition """
emb_lr_x = 10.0 emb_lr_x = 10.0
gru_lr_x = 1.0 gru_lr_x = 1.0
...@@ -43,8 +44,173 @@ def network(vocab_size, ...@@ -43,8 +44,173 @@ def network(vocab_size,
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=init_low_bound, high=init_high_bound), low=init_low_bound, high=init_high_bound),
learning_rate=fc_lr_x)) learning_rate=fc_lr_x))
cost = fluid.layers.cross_entropy(input=fc, label=dst_wordseq) cost = fluid.layers.cross_entropy(input=fc, label=dst_wordseq)
acc = fluid.layers.accuracy(input=fc, label=dst_wordseq, k=20) acc = fluid.layers.accuracy(input=fc, label=dst_wordseq, k=20)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
return src_wordseq, dst_wordseq, avg_cost, acc return src_wordseq, dst_wordseq, avg_cost, acc
def train_bpr_network(vocab_size, neg_size, hid_size, drop_out=0.2):
""" network definition """
emb_lr_x = 1.0
gru_lr_x = 1.0
fc_lr_x = 1.0
# Input data
src = fluid.layers.data(name="src", shape=[1], dtype="int64", lod_level=1)
pos_label = fluid.layers.data(
name="pos_label", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(
name="label", shape=[neg_size + 1], dtype="int64", lod_level=1)
emb_src = fluid.layers.embedding(
input=src,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_src_drop = fluid.layers.dropout(emb_src, dropout_prob=drop_out)
fc0 = fluid.layers.fc(input=emb_src_drop,
size=hid_size * 3,
param_attr=fluid.ParamAttr(
name="gru_fc",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr=False)
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr=fluid.ParamAttr(
name="dy_gru.param",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr="dy_gru.bias")
gru_h0_drop = fluid.layers.dropout(gru_h0, dropout_prob=drop_out)
label_re = fluid.layers.sequence_reshape(input=label, new_dim=1)
emb_label = fluid.layers.embedding(
input=label_re,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_label_drop = fluid.layers.dropout(emb_label, dropout_prob=drop_out)
gru_exp = fluid.layers.expand(
x=gru_h0_drop, expand_times=[1, (neg_size + 1)])
gru = fluid.layers.sequence_reshape(input=gru_exp, new_dim=hid_size)
ele_mul = fluid.layers.elementwise_mul(emb_label_drop, gru)
red_sum = fluid.layers.reduce_sum(input=ele_mul, dim=1, keep_dim=True)
pre = fluid.layers.sequence_reshape(input=red_sum, new_dim=(neg_size + 1))
cost = fluid.layers.bpr_loss(input=pre, label=pos_label)
cost_sum = fluid.layers.reduce_sum(input=cost)
return src, pos_label, label, cost_sum
def train_cross_entropy_network(vocab_size, neg_size, hid_size, drop_out=0.2):
""" network definition """
emb_lr_x = 1.0
gru_lr_x = 1.0
fc_lr_x = 1.0
# Input data
src = fluid.layers.data(name="src", shape=[1], dtype="int64", lod_level=1)
pos_label = fluid.layers.data(
name="pos_label", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(
name="label", shape=[neg_size + 1], dtype="int64", lod_level=1)
emb_src = fluid.layers.embedding(
input=src,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_src_drop = fluid.layers.dropout(emb_src, dropout_prob=drop_out)
fc0 = fluid.layers.fc(input=emb_src_drop,
size=hid_size * 3,
param_attr=fluid.ParamAttr(
name="gru_fc",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr=False)
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr=fluid.ParamAttr(
name="dy_gru.param",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr="dy_gru.bias")
gru_h0_drop = fluid.layers.dropout(gru_h0, dropout_prob=drop_out)
label_re = fluid.layers.sequence_reshape(input=label, new_dim=1)
emb_label = fluid.layers.embedding(
input=label_re,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_label_drop = fluid.layers.dropout(emb_label, dropout_prob=drop_out)
gru_exp = fluid.layers.expand(
x=gru_h0_drop, expand_times=[1, (neg_size + 1)])
gru = fluid.layers.sequence_reshape(input=gru_exp, new_dim=hid_size)
ele_mul = fluid.layers.elementwise_mul(emb_label_drop, gru)
red_sum = fluid.layers.reduce_sum(input=ele_mul, dim=1, keep_dim=True)
pre = fluid.layers.sequence_reshape(input=red_sum, new_dim=(neg_size + 1))
cost = fluid.layers.cross_entropy(input=pre, label=pos_label)
cost_sum = fluid.layers.reduce_sum(input=cost)
return src, pos_label, label, cost_sum
def infer_bpr_network(vocab_size, batch_size, hid_size, dropout=0.2):
src = fluid.layers.data(name="src", shape=[1], dtype="int64", lod_level=1)
emb_src = fluid.layers.embedding(
input=src, size=[vocab_size, hid_size], param_attr="emb")
emb_src_drop = fluid.layers.dropout(
emb_src, dropout_prob=dropout, is_test=True)
fc0 = fluid.layers.fc(input=emb_src_drop,
size=hid_size * 3,
param_attr="gru_fc",
bias_attr=False)
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr="dy_gru.param",
bias_attr="dy_gru.bias")
gru_h0_drop = fluid.layers.dropout(
gru_h0, dropout_prob=dropout, is_test=True)
all_label = fluid.layers.data(
name="all_label",
shape=[vocab_size, 1],
dtype="int64",
append_batch_size=False)
emb_all_label = fluid.layers.embedding(
input=all_label, size=[vocab_size, hid_size], param_attr="emb")
emb_all_label_drop = fluid.layers.dropout(
emb_all_label, dropout_prob=dropout, is_test=True)
all_pre = fluid.layers.matmul(
gru_h0_drop, emb_all_label_drop, transpose_y=True)
pos_label = fluid.layers.data(
name="pos_label", shape=[1], dtype="int64", lod_level=1)
acc = fluid.layers.accuracy(input=all_pre, label=pos_label, k=20)
return acc
import paddle.fluid as fluid
def train_network(vocab_size, neg_size, hid_size, drop_out=0.2):
""" network definition """
emb_lr_x = 1.0
gru_lr_x = 1.0
fc_lr_x = 1.0
# Input data
src = fluid.layers.data(name="src", shape=[1], dtype="int64", lod_level=1)
pos_label = fluid.layers.data(
name="pos_label", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(
name="label", shape=[neg_size + 1], dtype="int64", lod_level=1)
emb_src = fluid.layers.embedding(
input=src,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_src_drop = fluid.layers.dropout(emb_src, dropout_prob=drop_out)
fc0 = fluid.layers.fc(input=emb_src_drop,
size=hid_size * 3,
param_attr=fluid.ParamAttr(
name="gru_fc",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr=False)
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr=fluid.ParamAttr(
name="dy_gru.param",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr="dy_gru.bias")
gru_h0_drop = fluid.layers.dropout(gru_h0, dropout_prob=drop_out)
label_re = fluid.layers.sequence_reshape(input=label, new_dim=1)
emb_label = fluid.layers.embedding(
input=label_re,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_label_drop = fluid.layers.dropout(emb_label, dropout_prob=drop_out)
gru_exp = fluid.layers.expand(
x=gru_h0_drop, expand_times=[1, (neg_size + 1)])
gru = fluid.layers.sequence_reshape(input=gru_exp, new_dim=hid_size)
ele_mul = fluid.layers.elementwise_mul(emb_label_drop, gru)
red_sum = fluid.layers.reduce_sum(input=ele_mul, dim=1, keep_dim=True)
pre = fluid.layers.sequence_reshape(input=red_sum, new_dim=(neg_size + 1))
cost = fluid.layers.bpr_loss(input=pre, label=pos_label)
cost_sum = fluid.layers.reduce_sum(input=cost)
return src, pos_label, label, cost_sum
def infer_network(vocab_size, batch_size, hid_size, dropout=0.2):
src = fluid.layers.data(name="src", shape=[1], dtype="int64", lod_level=1)
emb_src = fluid.layers.embedding(
input=src, size=[vocab_size, hid_size], param_attr="emb")
emb_src_drop = fluid.layers.dropout(
emb_src, dropout_prob=dropout, is_test=True)
fc0 = fluid.layers.fc(input=emb_src_drop,
size=hid_size * 3,
param_attr="gru_fc",
bias_attr=False)
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr="dy_gru.param",
bias_attr="dy_gru.bias")
gru_h0_drop = fluid.layers.dropout(
gru_h0, dropout_prob=dropout, is_test=True)
all_label = fluid.layers.data(
name="all_label",
shape=[vocab_size, 1],
dtype="int64",
append_batch_size=False)
emb_all_label = fluid.layers.embedding(
input=all_label, size=[vocab_size, hid_size], param_attr="emb")
emb_all_label_drop = fluid.layers.dropout(
emb_all_label, dropout_prob=dropout, is_test=True)
all_pre = fluid.layers.matmul(
gru_h0_drop, emb_all_label_drop, transpose_y=True)
pos_label = fluid.layers.data(
name="pos_label", shape=[1], dtype="int64", lod_level=1)
acc = fluid.layers.accuracy(input=all_pre, label=pos_label, k=20)
return acc
...@@ -63,7 +63,7 @@ def train(): ...@@ -63,7 +63,7 @@ def train():
buffer_size=1000, word_freq_threshold=0, is_train=True) buffer_size=1000, word_freq_threshold=0, is_train=True)
# Train program # Train program
src_wordseq, dst_wordseq, avg_cost, acc = net.network( src_wordseq, dst_wordseq, avg_cost, acc = net.all_vocab_network(
vocab_size=vocab_size, hid_size=hid_size) vocab_size=vocab_size, hid_size=hid_size)
# Optimization to minimize lost # Optimization to minimize lost
...@@ -117,7 +117,6 @@ def train(): ...@@ -117,7 +117,6 @@ def train():
fetch_vars = [avg_cost, acc] fetch_vars = [avg_cost, acc]
fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe) fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe)
print("model saved in %s" % save_dir) print("model saved in %s" % save_dir)
#exe.close()
print("finish training") print("finish training")
......
...@@ -9,7 +9,7 @@ import paddle.fluid as fluid ...@@ -9,7 +9,7 @@ import paddle.fluid as fluid
import paddle import paddle
import time import time
import utils import utils
import net_bpr as net import net
SEED = 102 SEED = 102
...@@ -26,6 +26,7 @@ def parse_args(): ...@@ -26,6 +26,7 @@ def parse_args():
'--hid_size', type=int, default=100, help='hidden-dim size') '--hid_size', type=int, default=100, help='hidden-dim size')
parser.add_argument( parser.add_argument(
'--neg_size', type=int, default=10, help='neg item size') '--neg_size', type=int, default=10, help='neg item size')
parser.add_argument('--loss', type=str, default="bpr", help='loss fuction')
parser.add_argument( parser.add_argument(
'--model_dir', type=str, default='model_bpr_recall20', help='model dir') '--model_dir', type=str, default='model_bpr_recall20', help='model dir')
parser.add_argument( parser.add_argument(
...@@ -65,8 +66,12 @@ def train(): ...@@ -65,8 +66,12 @@ def train():
buffer_size=1000, word_freq_threshold=0, is_train=True) buffer_size=1000, word_freq_threshold=0, is_train=True)
# Train program # Train program
src, pos_label, label, avg_cost = net.train_network( if args.loss == 'bpr':
neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size) src, pos_label, label, avg_cost = net.train_bpr_network(
neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size)
else:
src, pos_label, label, avg_cost = net.train_cross_entropy_network(
neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size)
# Optimization to minimize lost # Optimization to minimize lost
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.base_lr) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.base_lr)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册