提交 c34bd511 编写于 作者: Z zhangwenhui03

fix style

上级 8f43b4fa
......@@ -5,12 +5,11 @@
```text
.
├── README.md # 文档
├── train.py # 训练脚本 cross-entropy loss
├── train_bpr.py # 训练脚本 bpr loss
├── infer.py # 预测脚本 cross-entropy loss
├── infer_bpr.py # 预测脚本 bpr loss
├── net.py # 网络结构 cross-entropy loss
├── net_bpr.py # 网络结构 bpr loss
├── train.py # 训练脚本 全词表 cross-entropy
├── train_sample_neg.py # 训练脚本 sample负例 包含bpr loss 和cross-entropy
├── infer.py # 预测脚本 全词表
├── infer_sample_neg.py # 预测脚本 sample负例
├── net.py # 网络结构
├── text2paddle.py # 文本数据转paddle数据
├── cluster_train.py # 多机训练
├── cluster_train.sh # 多机训练脚本
......@@ -33,6 +32,9 @@ GRU4REC模型的介绍可以参阅论文[Session-based Recommendations with Recu
session-based推荐应用场景非常广泛,比如用户的商品浏览、新闻点击、地点签到等序列数据。
支持三种形式的损失函数, 分别是全词表的cross-entropy, 采负样本的Bayesian Pairwise Ranking和采负样本的Cross-entropy.
运行样例程序可跳过'RSC15 数据下载及预处理'部分
## RSC15 数据下载及预处理
......@@ -129,7 +131,10 @@ CPU 环境
python train.py --train_dir train_data/
```
bayesian pairwise ranking loss(bpr loss) 训练和cross-entropy的格式一样。
bayesian pairwise ranking loss(bpr loss) 训练
```
CUDA_VISIBLE_DEVICES=0 python train_sample_neg.py --loss bpr --use_cuda 1
```
请注意CPU环境下运行单机多卡任务(--parallel 1)时,batch_size应大于cpu核数。
......
......@@ -8,7 +8,7 @@ import numpy as np
import six
import paddle.fluid as fluid
import paddle
import net_bpr as net
import net
import utils
......
import paddle.fluid as fluid
def network(vocab_size,
hid_size=100,
init_low_bound=-0.04,
init_high_bound=0.04):
def all_vocab_network(vocab_size,
hid_size=100,
init_low_bound=-0.04,
init_high_bound=0.04):
""" network definition """
emb_lr_x = 10.0
gru_lr_x = 1.0
......@@ -43,8 +44,173 @@ def network(vocab_size,
initializer=fluid.initializer.Uniform(
low=init_low_bound, high=init_high_bound),
learning_rate=fc_lr_x))
cost = fluid.layers.cross_entropy(input=fc, label=dst_wordseq)
acc = fluid.layers.accuracy(input=fc, label=dst_wordseq, k=20)
avg_cost = fluid.layers.mean(x=cost)
return src_wordseq, dst_wordseq, avg_cost, acc
def train_bpr_network(vocab_size, neg_size, hid_size, drop_out=0.2):
""" network definition """
emb_lr_x = 1.0
gru_lr_x = 1.0
fc_lr_x = 1.0
# Input data
src = fluid.layers.data(name="src", shape=[1], dtype="int64", lod_level=1)
pos_label = fluid.layers.data(
name="pos_label", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(
name="label", shape=[neg_size + 1], dtype="int64", lod_level=1)
emb_src = fluid.layers.embedding(
input=src,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_src_drop = fluid.layers.dropout(emb_src, dropout_prob=drop_out)
fc0 = fluid.layers.fc(input=emb_src_drop,
size=hid_size * 3,
param_attr=fluid.ParamAttr(
name="gru_fc",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr=False)
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr=fluid.ParamAttr(
name="dy_gru.param",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr="dy_gru.bias")
gru_h0_drop = fluid.layers.dropout(gru_h0, dropout_prob=drop_out)
label_re = fluid.layers.sequence_reshape(input=label, new_dim=1)
emb_label = fluid.layers.embedding(
input=label_re,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_label_drop = fluid.layers.dropout(emb_label, dropout_prob=drop_out)
gru_exp = fluid.layers.expand(
x=gru_h0_drop, expand_times=[1, (neg_size + 1)])
gru = fluid.layers.sequence_reshape(input=gru_exp, new_dim=hid_size)
ele_mul = fluid.layers.elementwise_mul(emb_label_drop, gru)
red_sum = fluid.layers.reduce_sum(input=ele_mul, dim=1, keep_dim=True)
pre = fluid.layers.sequence_reshape(input=red_sum, new_dim=(neg_size + 1))
cost = fluid.layers.bpr_loss(input=pre, label=pos_label)
cost_sum = fluid.layers.reduce_sum(input=cost)
return src, pos_label, label, cost_sum
def train_cross_entropy_network(vocab_size, neg_size, hid_size, drop_out=0.2):
""" network definition """
emb_lr_x = 1.0
gru_lr_x = 1.0
fc_lr_x = 1.0
# Input data
src = fluid.layers.data(name="src", shape=[1], dtype="int64", lod_level=1)
pos_label = fluid.layers.data(
name="pos_label", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(
name="label", shape=[neg_size + 1], dtype="int64", lod_level=1)
emb_src = fluid.layers.embedding(
input=src,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_src_drop = fluid.layers.dropout(emb_src, dropout_prob=drop_out)
fc0 = fluid.layers.fc(input=emb_src_drop,
size=hid_size * 3,
param_attr=fluid.ParamAttr(
name="gru_fc",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr=False)
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr=fluid.ParamAttr(
name="dy_gru.param",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr="dy_gru.bias")
gru_h0_drop = fluid.layers.dropout(gru_h0, dropout_prob=drop_out)
label_re = fluid.layers.sequence_reshape(input=label, new_dim=1)
emb_label = fluid.layers.embedding(
input=label_re,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_label_drop = fluid.layers.dropout(emb_label, dropout_prob=drop_out)
gru_exp = fluid.layers.expand(
x=gru_h0_drop, expand_times=[1, (neg_size + 1)])
gru = fluid.layers.sequence_reshape(input=gru_exp, new_dim=hid_size)
ele_mul = fluid.layers.elementwise_mul(emb_label_drop, gru)
red_sum = fluid.layers.reduce_sum(input=ele_mul, dim=1, keep_dim=True)
pre = fluid.layers.sequence_reshape(input=red_sum, new_dim=(neg_size + 1))
cost = fluid.layers.cross_entropy(input=pre, label=pos_label)
cost_sum = fluid.layers.reduce_sum(input=cost)
return src, pos_label, label, cost_sum
def infer_bpr_network(vocab_size, batch_size, hid_size, dropout=0.2):
src = fluid.layers.data(name="src", shape=[1], dtype="int64", lod_level=1)
emb_src = fluid.layers.embedding(
input=src, size=[vocab_size, hid_size], param_attr="emb")
emb_src_drop = fluid.layers.dropout(
emb_src, dropout_prob=dropout, is_test=True)
fc0 = fluid.layers.fc(input=emb_src_drop,
size=hid_size * 3,
param_attr="gru_fc",
bias_attr=False)
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr="dy_gru.param",
bias_attr="dy_gru.bias")
gru_h0_drop = fluid.layers.dropout(
gru_h0, dropout_prob=dropout, is_test=True)
all_label = fluid.layers.data(
name="all_label",
shape=[vocab_size, 1],
dtype="int64",
append_batch_size=False)
emb_all_label = fluid.layers.embedding(
input=all_label, size=[vocab_size, hid_size], param_attr="emb")
emb_all_label_drop = fluid.layers.dropout(
emb_all_label, dropout_prob=dropout, is_test=True)
all_pre = fluid.layers.matmul(
gru_h0_drop, emb_all_label_drop, transpose_y=True)
pos_label = fluid.layers.data(
name="pos_label", shape=[1], dtype="int64", lod_level=1)
acc = fluid.layers.accuracy(input=all_pre, label=pos_label, k=20)
return acc
import paddle.fluid as fluid
def train_network(vocab_size, neg_size, hid_size, drop_out=0.2):
""" network definition """
emb_lr_x = 1.0
gru_lr_x = 1.0
fc_lr_x = 1.0
# Input data
src = fluid.layers.data(name="src", shape=[1], dtype="int64", lod_level=1)
pos_label = fluid.layers.data(
name="pos_label", shape=[1], dtype="int64", lod_level=1)
label = fluid.layers.data(
name="label", shape=[neg_size + 1], dtype="int64", lod_level=1)
emb_src = fluid.layers.embedding(
input=src,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_src_drop = fluid.layers.dropout(emb_src, dropout_prob=drop_out)
fc0 = fluid.layers.fc(input=emb_src_drop,
size=hid_size * 3,
param_attr=fluid.ParamAttr(
name="gru_fc",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr=False)
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr=fluid.ParamAttr(
name="dy_gru.param",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=gru_lr_x),
bias_attr="dy_gru.bias")
gru_h0_drop = fluid.layers.dropout(gru_h0, dropout_prob=drop_out)
label_re = fluid.layers.sequence_reshape(input=label, new_dim=1)
emb_label = fluid.layers.embedding(
input=label_re,
size=[vocab_size, hid_size],
param_attr=fluid.ParamAttr(
name="emb",
initializer=fluid.initializer.XavierInitializer(),
learning_rate=emb_lr_x))
emb_label_drop = fluid.layers.dropout(emb_label, dropout_prob=drop_out)
gru_exp = fluid.layers.expand(
x=gru_h0_drop, expand_times=[1, (neg_size + 1)])
gru = fluid.layers.sequence_reshape(input=gru_exp, new_dim=hid_size)
ele_mul = fluid.layers.elementwise_mul(emb_label_drop, gru)
red_sum = fluid.layers.reduce_sum(input=ele_mul, dim=1, keep_dim=True)
pre = fluid.layers.sequence_reshape(input=red_sum, new_dim=(neg_size + 1))
cost = fluid.layers.bpr_loss(input=pre, label=pos_label)
cost_sum = fluid.layers.reduce_sum(input=cost)
return src, pos_label, label, cost_sum
def infer_network(vocab_size, batch_size, hid_size, dropout=0.2):
src = fluid.layers.data(name="src", shape=[1], dtype="int64", lod_level=1)
emb_src = fluid.layers.embedding(
input=src, size=[vocab_size, hid_size], param_attr="emb")
emb_src_drop = fluid.layers.dropout(
emb_src, dropout_prob=dropout, is_test=True)
fc0 = fluid.layers.fc(input=emb_src_drop,
size=hid_size * 3,
param_attr="gru_fc",
bias_attr=False)
gru_h0 = fluid.layers.dynamic_gru(
input=fc0,
size=hid_size,
param_attr="dy_gru.param",
bias_attr="dy_gru.bias")
gru_h0_drop = fluid.layers.dropout(
gru_h0, dropout_prob=dropout, is_test=True)
all_label = fluid.layers.data(
name="all_label",
shape=[vocab_size, 1],
dtype="int64",
append_batch_size=False)
emb_all_label = fluid.layers.embedding(
input=all_label, size=[vocab_size, hid_size], param_attr="emb")
emb_all_label_drop = fluid.layers.dropout(
emb_all_label, dropout_prob=dropout, is_test=True)
all_pre = fluid.layers.matmul(
gru_h0_drop, emb_all_label_drop, transpose_y=True)
pos_label = fluid.layers.data(
name="pos_label", shape=[1], dtype="int64", lod_level=1)
acc = fluid.layers.accuracy(input=all_pre, label=pos_label, k=20)
return acc
......@@ -63,7 +63,7 @@ def train():
buffer_size=1000, word_freq_threshold=0, is_train=True)
# Train program
src_wordseq, dst_wordseq, avg_cost, acc = net.network(
src_wordseq, dst_wordseq, avg_cost, acc = net.all_vocab_network(
vocab_size=vocab_size, hid_size=hid_size)
# Optimization to minimize lost
......@@ -117,7 +117,6 @@ def train():
fetch_vars = [avg_cost, acc]
fluid.io.save_inference_model(save_dir, feed_var_names, fetch_vars, exe)
print("model saved in %s" % save_dir)
#exe.close()
print("finish training")
......
......@@ -9,7 +9,7 @@ import paddle.fluid as fluid
import paddle
import time
import utils
import net_bpr as net
import net
SEED = 102
......@@ -26,6 +26,7 @@ def parse_args():
'--hid_size', type=int, default=100, help='hidden-dim size')
parser.add_argument(
'--neg_size', type=int, default=10, help='neg item size')
parser.add_argument('--loss', type=str, default="bpr", help='loss fuction')
parser.add_argument(
'--model_dir', type=str, default='model_bpr_recall20', help='model dir')
parser.add_argument(
......@@ -65,8 +66,12 @@ def train():
buffer_size=1000, word_freq_threshold=0, is_train=True)
# Train program
src, pos_label, label, avg_cost = net.train_network(
neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size)
if args.loss == 'bpr':
src, pos_label, label, avg_cost = net.train_bpr_network(
neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size)
else:
src, pos_label, label, avg_cost = net.train_cross_entropy_network(
neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size)
# Optimization to minimize lost
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=args.base_lr)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册