未验证 提交 bc249915 编写于 作者: Z zhang wenhui 提交者: GitHub

update api 1.8 (#4615)

上级 a03fc02a
......@@ -76,11 +76,10 @@ class DCN(object):
def backward(self, lr):
p_g_clip = fluid.backward.append_backward(loss=self.loss)
fluid.clip.set_gradient_clip(
fluid.clip.GradientClipByGlobalNorm(clip_norm=self.clip_by_norm))
clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=self.clip_by_norm)
p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
optimizer = fluid.optimizer.Adam(learning_rate=lr)
optimizer = fluid.optimizer.Adam(learning_rate=lr, grad_clip=clip)
# params_grads = optimizer.backward(self.loss)
optimizer.apply_gradients(p_g_clip)
......
......@@ -86,7 +86,6 @@ def train():
logger.info("reading data completes")
avg_cost, pred = network.network(item_count, cat_count, 433)
#fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
base_lr = args.base_lr
boundaries = [410000]
values = [base_lr, 0.2]
......@@ -101,12 +100,13 @@ def train():
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
feeder = fluid.DataFeeder(
feed_list=[
"hist_item_seq", "hist_cat_seq", "target_item", "target_cat",
"label", "mask", "target_item_seq", "target_cat_seq"
],
place=place)
feed_list = [
"hist_item_seq", "hist_cat_seq", "target_item", "target_cat",
"label", "mask", "target_item_seq", "target_cat_seq"
]
loader = fluid.io.DataLoader.from_generator(
feed_list=feed_list, capacity=10000, iterable=True)
loader.set_sample_list_generator(data_reader, places=place)
if use_parallel:
train_exe = fluid.ParallelExecutor(
use_cuda=use_cuda,
......@@ -122,10 +122,10 @@ def train():
loss_sum = 0.0
for id in range(epoch_num):
epoch = id + 1
for data in data_reader():
for data in loader():
global_step += 1
results = train_exe.run(main_program,
feed=feeder.feed(data),
feed=data,
fetch_list=[avg_cost.name, pred.name],
return_numpy=True)
loss_sum += results[0].mean()
......
......@@ -92,14 +92,15 @@ def train():
logger.info("reading data completes")
avg_cost, pred, feed_list = network.network(item_count, cat_count)
fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm(
clip_norm=5.0))
clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0)
base_lr = args.base_lr
boundaries = [410000]
values = [base_lr, 0.2]
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.piecewise_decay(
boundaries=boundaries, values=values))
boundaries=boundaries, values=values),
grad_clip=clip)
sgd_optimizer.minimize(avg_cost)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
......
......@@ -190,9 +190,8 @@ class MultiviewSimnet(object):
# pairwise hinge_loss
loss_part1 = fluid.layers.elementwise_sub(
tensor.fill_constant_batch_size_like(
input=cos_pos,
shape=[-1, 1],
fluid.layers.fill_constant(
shape=[fluid.layers.shape(cos_pos)[0], 1],
value=self.margin,
dtype='float32'),
cos_pos)
......@@ -200,8 +199,10 @@ class MultiviewSimnet(object):
loss_part2 = fluid.layers.elementwise_add(loss_part1, cos_neg)
loss_part3 = fluid.layers.elementwise_max(
tensor.fill_constant_batch_size_like(
input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'),
fluid.layers.fill_constant(
shape=[fluid.layers.shape(loss_part2)[0], 1],
value=0.0,
dtype='float32'),
loss_part2)
avg_cost = fluid.layers.mean(loss_part3)
......
......@@ -91,9 +91,11 @@ def get_cards(args):
def train_loop(main_program, avg_cost, acc, train_input_data, place, args,
train_reader):
data_list = [var.name for var in train_input_data]
feeder = fluid.DataFeeder(feed_list=data_list, place=place)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
loader = fluid.io.DataLoader.from_generator(
feed_list=train_input_data, capacity=10000, iterable=True)
loader.set_sample_list_generator(train_reader, places=place)
train_exe = exe
total_time = 0.0
......@@ -103,10 +105,10 @@ def train_loop(main_program, avg_cost, acc, train_input_data, place, args,
print("epoch_%d start" % epoch_idx)
t0 = time.time()
i = 0
for batch_id, data in enumerate(train_reader()):
for batch_id, data in enumerate(loader()):
i += 1
loss_val, correct_val = train_exe.run(
feed=feeder.feed(data), fetch_list=[avg_cost.name, acc.name])
feed=data, fetch_list=[avg_cost.name, acc.name])
ce_info.append(float(np.mean(correct_val)) / args.batch_size)
if i % args.print_batch == 0:
logger.info(
......
......@@ -57,13 +57,17 @@ class PairwiseHingeLoss(object):
def forward(self, pos, neg):
loss_part1 = fluid.layers.elementwise_sub(
tensor.fill_constant_batch_size_like(
input=pos, shape=[-1, 1], value=self.margin, dtype='float32'),
fluid.layers.fill_constant(
shape=[fluid.layers.shape(pos)[0], 1],
value=self.margin,
dtype='float32'),
pos)
loss_part2 = fluid.layers.elementwise_add(loss_part1, neg)
loss_part3 = fluid.layers.elementwise_max(
tensor.fill_constant_batch_size_like(
input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'),
fluid.layers.fill_constant(
shape=[fluid.layers.shape(loss_part2)[0], 1],
value=0.0,
dtype='float32'),
loss_part2)
return loss_part3
......
......@@ -46,13 +46,17 @@ def network(vocab_text_size,
cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True)
#calculate hinge loss
loss_part1 = nn.elementwise_sub(
tensor.fill_constant_batch_size_like(
input=cos_pos, shape=[-1, 1], value=margin, dtype='float32'),
fluid.layers.fill_constant(
shape=[fluid.layers.shape(cos_pos)[0], 1],
value=margin,
dtype='float32'),
cos_pos)
loss_part2 = nn.elementwise_add(loss_part1, cos_neg)
loss_part3 = nn.elementwise_max(
tensor.fill_constant_batch_size_like(
input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'),
fluid.layers.fill_constant(
shape=[fluid.layers.shape(loss_part2)[0], 1],
value=0.0,
dtype='float32'),
loss_part2)
avg_cost = nn.mean(loss_part3)
less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32')
......
......@@ -20,7 +20,10 @@ import numpy as np
import paddle.fluid as fluid
def skip_gram_word2vec_shuffle_batch(dict_size, embedding_size, is_sparse=False, neg_num=5):
def skip_gram_word2vec_shuffle_batch(dict_size,
embedding_size,
is_sparse=False,
neg_num=5):
words = []
input_word = fluid.data(name="input_word", shape=[None, 1], dtype='int64')
......@@ -61,7 +64,8 @@ def skip_gram_word2vec_shuffle_batch(dict_size, embedding_size, is_sparse=False,
# add shuffle_batch after embedding.
neg_emb_w_list = []
for i in range(neg_num):
neg_emb_w_list.append(fluid.contrib.layers.shuffle_batch(true_emb_w)) # shuffle true_word
neg_emb_w_list.append(
fluid.contrib.layers.shuffle_batch(true_emb_w)) # shuffle true_word
neg_emb_w = fluid.layers.concat(neg_emb_w_list, axis=0)
neg_emb_w_re = fluid.layers.reshape(
......@@ -69,7 +73,8 @@ def skip_gram_word2vec_shuffle_batch(dict_size, embedding_size, is_sparse=False,
neg_emb_b_list = []
for i in range(neg_num):
neg_emb_b_list.append(fluid.contrib.layers.shuffle_batch(true_emb_b)) # shuffle true_word
neg_emb_b_list.append(
fluid.contrib.layers.shuffle_batch(true_emb_b)) # shuffle true_word
neg_emb_b = fluid.layers.concat(neg_emb_b_list, axis=0)
neg_emb_b_vec = fluid.layers.reshape(neg_emb_b, shape=[-1, neg_num])
......@@ -81,15 +86,20 @@ def skip_gram_word2vec_shuffle_batch(dict_size, embedding_size, is_sparse=False,
true_emb_b)
input_emb_re = fluid.layers.reshape(
input_emb, shape=[-1, 1, embedding_size])
neg_matmul = fluid.layers.matmul(input_emb_re, neg_emb_w_re, transpose_y=True)
neg_matmul = fluid.layers.matmul(
input_emb_re, neg_emb_w_re, transpose_y=True)
neg_matmul_re = fluid.layers.reshape(neg_matmul, shape=[-1, neg_num])
neg_logits = fluid.layers.elementwise_add(neg_matmul_re, neg_emb_b_vec)
#nce loss
label_ones = fluid.layers.fill_constant_batch_size_like(
true_logits, shape=[-1, 1], value=1.0, dtype='float32')
label_zeros = fluid.layers.fill_constant_batch_size_like(
true_logits, shape=[-1, neg_num], value=0.0, dtype='float32')
label_ones = fluid.layers.fill_constant(
shape=[fluid.layers.shape(true_logits)[0], 1],
value=1.0,
dtype='float32')
label_zeros = fluid.layers.fill_constant(
shape=[fluid.layers.shape(true_logits)[0], neg_num],
value=0.0,
dtype='float32')
true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(true_logits,
label_ones)
......@@ -103,6 +113,7 @@ def skip_gram_word2vec_shuffle_batch(dict_size, embedding_size, is_sparse=False,
avg_cost = fluid.layers.reduce_mean(cost)
return avg_cost, data_loader
def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5):
words = []
......@@ -171,10 +182,14 @@ def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5):
neg_logits = fluid.layers.elementwise_add(neg_matmul_re, neg_emb_b_vec)
#nce loss
label_ones = fluid.layers.fill_constant_batch_size_like(
true_logits, shape=[-1, 1], value=1.0, dtype='float32')
label_zeros = fluid.layers.fill_constant_batch_size_like(
true_logits, shape=[-1, neg_num], value=0.0, dtype='float32')
label_ones = fluid.layers.fill_constant(
shape=[fluid.layers.shape(true_logits)[0], 1],
value=1.0,
dtype='float32')
label_zeros = fluid.layers.fill_constant(
shape=[fluid.layers.shape(true_logits)[0], neg_num],
value=0.0,
dtype='float32')
true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(true_logits,
label_ones)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册