From bc2499156d24fe77da4117866ee4fe6874da86d5 Mon Sep 17 00:00:00 2001 From: zhang wenhui Date: Wed, 13 May 2020 16:37:01 +0800 Subject: [PATCH] update api 1.8 (#4615) --- PaddleRec/ctr/dcn/network.py | 5 ++-- PaddleRec/ctr/din/cluster_train.py | 18 +++++++------- PaddleRec/ctr/din/train.py | 7 +++--- PaddleRec/multiview_simnet/nets.py | 11 +++++---- PaddleRec/ssr/cluster_train.py | 8 +++--- PaddleRec/ssr/nets.py | 12 ++++++--- PaddleRec/tagspace/net.py | 12 ++++++--- PaddleRec/word2vec/net.py | 39 +++++++++++++++++++++--------- 8 files changed, 69 insertions(+), 43 deletions(-) diff --git a/PaddleRec/ctr/dcn/network.py b/PaddleRec/ctr/dcn/network.py index ffa399ed..8dd65038 100644 --- a/PaddleRec/ctr/dcn/network.py +++ b/PaddleRec/ctr/dcn/network.py @@ -76,11 +76,10 @@ class DCN(object): def backward(self, lr): p_g_clip = fluid.backward.append_backward(loss=self.loss) - fluid.clip.set_gradient_clip( - fluid.clip.GradientClipByGlobalNorm(clip_norm=self.clip_by_norm)) + clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=self.clip_by_norm) p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip) - optimizer = fluid.optimizer.Adam(learning_rate=lr) + optimizer = fluid.optimizer.Adam(learning_rate=lr, grad_clip=clip) # params_grads = optimizer.backward(self.loss) optimizer.apply_gradients(p_g_clip) diff --git a/PaddleRec/ctr/din/cluster_train.py b/PaddleRec/ctr/din/cluster_train.py index 6b327236..8cd7e15c 100644 --- a/PaddleRec/ctr/din/cluster_train.py +++ b/PaddleRec/ctr/din/cluster_train.py @@ -86,7 +86,6 @@ def train(): logger.info("reading data completes") avg_cost, pred = network.network(item_count, cat_count, 433) - #fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0)) base_lr = args.base_lr boundaries = [410000] values = [base_lr, 0.2] @@ -101,12 +100,13 @@ def train(): place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - feeder = fluid.DataFeeder( - feed_list=[ - "hist_item_seq", "hist_cat_seq", "target_item", "target_cat", - "label", "mask", "target_item_seq", "target_cat_seq" - ], - place=place) + feed_list = [ + "hist_item_seq", "hist_cat_seq", "target_item", "target_cat", + "label", "mask", "target_item_seq", "target_cat_seq" + ] + loader = fluid.io.DataLoader.from_generator( + feed_list=feed_list, capacity=10000, iterable=True) + loader.set_sample_list_generator(data_reader, places=place) if use_parallel: train_exe = fluid.ParallelExecutor( use_cuda=use_cuda, @@ -122,10 +122,10 @@ def train(): loss_sum = 0.0 for id in range(epoch_num): epoch = id + 1 - for data in data_reader(): + for data in loader(): global_step += 1 results = train_exe.run(main_program, - feed=feeder.feed(data), + feed=data, fetch_list=[avg_cost.name, pred.name], return_numpy=True) loss_sum += results[0].mean() diff --git a/PaddleRec/ctr/din/train.py b/PaddleRec/ctr/din/train.py index a5197211..4461eb67 100644 --- a/PaddleRec/ctr/din/train.py +++ b/PaddleRec/ctr/din/train.py @@ -92,14 +92,15 @@ def train(): logger.info("reading data completes") avg_cost, pred, feed_list = network.network(item_count, cat_count) - fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm( - clip_norm=5.0)) + + clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0) base_lr = args.base_lr boundaries = [410000] values = [base_lr, 0.2] sgd_optimizer = fluid.optimizer.SGD( learning_rate=fluid.layers.piecewise_decay( - boundaries=boundaries, values=values)) + boundaries=boundaries, values=values), + grad_clip=clip) sgd_optimizer.minimize(avg_cost) place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() diff --git a/PaddleRec/multiview_simnet/nets.py b/PaddleRec/multiview_simnet/nets.py index 104101e2..6a228203 100644 --- a/PaddleRec/multiview_simnet/nets.py +++ b/PaddleRec/multiview_simnet/nets.py @@ -190,9 +190,8 @@ class MultiviewSimnet(object): # pairwise hinge_loss loss_part1 = fluid.layers.elementwise_sub( - tensor.fill_constant_batch_size_like( - input=cos_pos, - shape=[-1, 1], + fluid.layers.fill_constant( + shape=[fluid.layers.shape(cos_pos)[0], 1], value=self.margin, dtype='float32'), cos_pos) @@ -200,8 +199,10 @@ class MultiviewSimnet(object): loss_part2 = fluid.layers.elementwise_add(loss_part1, cos_neg) loss_part3 = fluid.layers.elementwise_max( - tensor.fill_constant_batch_size_like( - input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant( + shape=[fluid.layers.shape(loss_part2)[0], 1], + value=0.0, + dtype='float32'), loss_part2) avg_cost = fluid.layers.mean(loss_part3) diff --git a/PaddleRec/ssr/cluster_train.py b/PaddleRec/ssr/cluster_train.py index 7cbe7ab4..0b76934f 100644 --- a/PaddleRec/ssr/cluster_train.py +++ b/PaddleRec/ssr/cluster_train.py @@ -91,9 +91,11 @@ def get_cards(args): def train_loop(main_program, avg_cost, acc, train_input_data, place, args, train_reader): data_list = [var.name for var in train_input_data] - feeder = fluid.DataFeeder(feed_list=data_list, place=place) exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) + loader = fluid.io.DataLoader.from_generator( + feed_list=train_input_data, capacity=10000, iterable=True) + loader.set_sample_list_generator(train_reader, places=place) train_exe = exe total_time = 0.0 @@ -103,10 +105,10 @@ def train_loop(main_program, avg_cost, acc, train_input_data, place, args, print("epoch_%d start" % epoch_idx) t0 = time.time() i = 0 - for batch_id, data in enumerate(train_reader()): + for batch_id, data in enumerate(loader()): i += 1 loss_val, correct_val = train_exe.run( - feed=feeder.feed(data), fetch_list=[avg_cost.name, acc.name]) + feed=data, fetch_list=[avg_cost.name, acc.name]) ce_info.append(float(np.mean(correct_val)) / args.batch_size) if i % args.print_batch == 0: logger.info( diff --git a/PaddleRec/ssr/nets.py b/PaddleRec/ssr/nets.py index 7b78adae..3026562d 100644 --- a/PaddleRec/ssr/nets.py +++ b/PaddleRec/ssr/nets.py @@ -57,13 +57,17 @@ class PairwiseHingeLoss(object): def forward(self, pos, neg): loss_part1 = fluid.layers.elementwise_sub( - tensor.fill_constant_batch_size_like( - input=pos, shape=[-1, 1], value=self.margin, dtype='float32'), + fluid.layers.fill_constant( + shape=[fluid.layers.shape(pos)[0], 1], + value=self.margin, + dtype='float32'), pos) loss_part2 = fluid.layers.elementwise_add(loss_part1, neg) loss_part3 = fluid.layers.elementwise_max( - tensor.fill_constant_batch_size_like( - input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant( + shape=[fluid.layers.shape(loss_part2)[0], 1], + value=0.0, + dtype='float32'), loss_part2) return loss_part3 diff --git a/PaddleRec/tagspace/net.py b/PaddleRec/tagspace/net.py index 479d6620..3a8287e7 100644 --- a/PaddleRec/tagspace/net.py +++ b/PaddleRec/tagspace/net.py @@ -46,13 +46,17 @@ def network(vocab_text_size, cos_neg = nn.reduce_max(cos_neg_all, dim=1, keep_dim=True) #calculate hinge loss loss_part1 = nn.elementwise_sub( - tensor.fill_constant_batch_size_like( - input=cos_pos, shape=[-1, 1], value=margin, dtype='float32'), + fluid.layers.fill_constant( + shape=[fluid.layers.shape(cos_pos)[0], 1], + value=margin, + dtype='float32'), cos_pos) loss_part2 = nn.elementwise_add(loss_part1, cos_neg) loss_part3 = nn.elementwise_max( - tensor.fill_constant_batch_size_like( - input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), + fluid.layers.fill_constant( + shape=[fluid.layers.shape(loss_part2)[0], 1], + value=0.0, + dtype='float32'), loss_part2) avg_cost = nn.mean(loss_part3) less = tensor.cast(cf.less_than(cos_neg, cos_pos), dtype='float32') diff --git a/PaddleRec/word2vec/net.py b/PaddleRec/word2vec/net.py index 3e2ab33b..c71e1bb5 100644 --- a/PaddleRec/word2vec/net.py +++ b/PaddleRec/word2vec/net.py @@ -20,7 +20,10 @@ import numpy as np import paddle.fluid as fluid -def skip_gram_word2vec_shuffle_batch(dict_size, embedding_size, is_sparse=False, neg_num=5): +def skip_gram_word2vec_shuffle_batch(dict_size, + embedding_size, + is_sparse=False, + neg_num=5): words = [] input_word = fluid.data(name="input_word", shape=[None, 1], dtype='int64') @@ -61,7 +64,8 @@ def skip_gram_word2vec_shuffle_batch(dict_size, embedding_size, is_sparse=False, # add shuffle_batch after embedding. neg_emb_w_list = [] for i in range(neg_num): - neg_emb_w_list.append(fluid.contrib.layers.shuffle_batch(true_emb_w)) # shuffle true_word + neg_emb_w_list.append( + fluid.contrib.layers.shuffle_batch(true_emb_w)) # shuffle true_word neg_emb_w = fluid.layers.concat(neg_emb_w_list, axis=0) neg_emb_w_re = fluid.layers.reshape( @@ -69,7 +73,8 @@ def skip_gram_word2vec_shuffle_batch(dict_size, embedding_size, is_sparse=False, neg_emb_b_list = [] for i in range(neg_num): - neg_emb_b_list.append(fluid.contrib.layers.shuffle_batch(true_emb_b)) # shuffle true_word + neg_emb_b_list.append( + fluid.contrib.layers.shuffle_batch(true_emb_b)) # shuffle true_word neg_emb_b = fluid.layers.concat(neg_emb_b_list, axis=0) neg_emb_b_vec = fluid.layers.reshape(neg_emb_b, shape=[-1, neg_num]) @@ -81,15 +86,20 @@ def skip_gram_word2vec_shuffle_batch(dict_size, embedding_size, is_sparse=False, true_emb_b) input_emb_re = fluid.layers.reshape( input_emb, shape=[-1, 1, embedding_size]) - neg_matmul = fluid.layers.matmul(input_emb_re, neg_emb_w_re, transpose_y=True) + neg_matmul = fluid.layers.matmul( + input_emb_re, neg_emb_w_re, transpose_y=True) neg_matmul_re = fluid.layers.reshape(neg_matmul, shape=[-1, neg_num]) neg_logits = fluid.layers.elementwise_add(neg_matmul_re, neg_emb_b_vec) #nce loss - label_ones = fluid.layers.fill_constant_batch_size_like( - true_logits, shape=[-1, 1], value=1.0, dtype='float32') - label_zeros = fluid.layers.fill_constant_batch_size_like( - true_logits, shape=[-1, neg_num], value=0.0, dtype='float32') + label_ones = fluid.layers.fill_constant( + shape=[fluid.layers.shape(true_logits)[0], 1], + value=1.0, + dtype='float32') + label_zeros = fluid.layers.fill_constant( + shape=[fluid.layers.shape(true_logits)[0], neg_num], + value=0.0, + dtype='float32') true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(true_logits, label_ones) @@ -103,6 +113,7 @@ def skip_gram_word2vec_shuffle_batch(dict_size, embedding_size, is_sparse=False, avg_cost = fluid.layers.reduce_mean(cost) return avg_cost, data_loader + def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5): words = [] @@ -171,10 +182,14 @@ def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5): neg_logits = fluid.layers.elementwise_add(neg_matmul_re, neg_emb_b_vec) #nce loss - label_ones = fluid.layers.fill_constant_batch_size_like( - true_logits, shape=[-1, 1], value=1.0, dtype='float32') - label_zeros = fluid.layers.fill_constant_batch_size_like( - true_logits, shape=[-1, neg_num], value=0.0, dtype='float32') + label_ones = fluid.layers.fill_constant( + shape=[fluid.layers.shape(true_logits)[0], 1], + value=1.0, + dtype='float32') + label_zeros = fluid.layers.fill_constant( + shape=[fluid.layers.shape(true_logits)[0], neg_num], + value=0.0, + dtype='float32') true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(true_logits, label_ones) -- GitLab