未验证 提交 921a0ef3 编写于 作者: Z zhang wenhui 提交者: GitHub

Merge pull request #1707 from frankwhzhang/fix_bug

fix bug
...@@ -79,7 +79,7 @@ SessionId ItemId Time ...@@ -79,7 +79,7 @@ SessionId ItemId Time
2 214757407 1396850438.247 2 214757407 1396850438.247
``` ```
数据格式需要转换 运行脚本 数据格式需要转换, 运行脚本如下
``` ```
python convert_format.py python convert_format.py
``` ```
...@@ -101,7 +101,7 @@ python convert_format.py ...@@ -101,7 +101,7 @@ python convert_format.py
根据训练和测试文件生成字典和对应的paddle输入文件 根据训练和测试文件生成字典和对应的paddle输入文件
注意需要将训练文件放到一个目录下面,测试文件放到一个目录下面,同时支持多训练文件 需要将训练文件放到目录raw_train_data下,测试文件放到目录raw_test_data下,并生成对应的train_data,test_data和vocab.txt文件
``` ```
python text2paddle.py raw_train_data/ raw_test_data/ train_data test_data vocab.txt python text2paddle.py raw_train_data/ raw_test_data/ train_data test_data vocab.txt
``` ```
......
...@@ -171,7 +171,8 @@ def train_cross_entropy_network(vocab_size, neg_size, hid_size, drop_out=0.2): ...@@ -171,7 +171,8 @@ def train_cross_entropy_network(vocab_size, neg_size, hid_size, drop_out=0.2):
ele_mul = fluid.layers.elementwise_mul(emb_label_drop, gru) ele_mul = fluid.layers.elementwise_mul(emb_label_drop, gru)
red_sum = fluid.layers.reduce_sum(input=ele_mul, dim=1, keep_dim=True) red_sum = fluid.layers.reduce_sum(input=ele_mul, dim=1, keep_dim=True)
pre = fluid.layers.sequence_reshape(input=red_sum, new_dim=(neg_size + 1)) pre_ = fluid.layers.sequence_reshape(input=red_sum, new_dim=(neg_size + 1))
pre = fluid.layers.softmax(input=pre_)
cost = fluid.layers.cross_entropy(input=pre, label=pos_label) cost = fluid.layers.cross_entropy(input=pre, label=pos_label)
cost_sum = fluid.layers.reduce_sum(input=cost) cost_sum = fluid.layers.reduce_sum(input=cost)
......
...@@ -68,9 +68,11 @@ def train(): ...@@ -68,9 +68,11 @@ def train():
# Train program # Train program
if args.loss == 'bpr': if args.loss == 'bpr':
print('bpr loss')
src, pos_label, label, avg_cost = net.train_bpr_network( src, pos_label, label, avg_cost = net.train_bpr_network(
neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size) neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size)
else: else:
print('cross-entory loss')
src, pos_label, label, avg_cost = net.train_cross_entropy_network( src, pos_label, label, avg_cost = net.train_cross_entropy_network(
neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size) neg_size=args.neg_size, vocab_size=vocab_size, hid_size=hid_size)
......
...@@ -45,8 +45,8 @@ def to_lodtensor_bpr(raw_data, neg_size, vocab_size, place): ...@@ -45,8 +45,8 @@ def to_lodtensor_bpr(raw_data, neg_size, vocab_size, place):
neg_data = np.tile(pos_data, neg_size) neg_data = np.tile(pos_data, neg_size)
np.random.shuffle(neg_data) np.random.shuffle(neg_data)
for ii in range(length * neg_size): for ii in range(length * neg_size):
if neg_data[ii] == pos_data[ii / neg_size]: if neg_data[ii] == pos_data[ii // neg_size]:
neg_data[ii] = pos_data[length - 1 - ii / neg_size] neg_data[ii] = pos_data[length - 1 - ii // neg_size]
label_data = np.column_stack( label_data = np.column_stack(
(pos_data.reshape(length, 1), neg_data.reshape(length, neg_size))) (pos_data.reshape(length, 1), neg_data.reshape(length, neg_size)))
......
...@@ -81,7 +81,7 @@ def infer(args, vocab_size, test_reader): ...@@ -81,7 +81,7 @@ def infer(args, vocab_size, test_reader):
start_up_program = fluid.Program() start_up_program = fluid.Program()
with fluid.program_guard(main_program, start_up_program): with fluid.program_guard(main_program, start_up_program):
acc = model(vocab_size, emb_size, hid_size) acc = model(vocab_size, emb_size, hid_size)
for epoch in xrange(start_index, last_index + 1): for epoch in range(start_index, last_index + 1):
copy_program = main_program.clone() copy_program = main_program.clone()
model_path = model_dir + "/epoch_" + str(epoch) model_path = model_dir + "/epoch_" + str(epoch)
fluid.io.load_params( fluid.io.load_params(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册