From 5312aaa1713231c57a5a727ee7a0b7ddc7f3f57c Mon Sep 17 00:00:00 2001 From: hutuxian Date: Mon, 17 Feb 2020 19:37:16 +0800 Subject: [PATCH] upgrade API for 1.7(din & sr-gnn) (#4301) Upgrade API for 1.7 * PaddleRec/din * PaddleRec/gnn --- PaddleRec/din/infer.py | 15 +++++---- PaddleRec/din/network.py | 59 ++++++++++++++++---------------- PaddleRec/din/reader.py | 6 ++-- PaddleRec/din/train.py | 15 ++++----- PaddleRec/gnn/infer.py | 4 +-- PaddleRec/gnn/network.py | 72 ++++++++++++++++------------------------ PaddleRec/gnn/reader.py | 16 +++++---- PaddleRec/gnn/train.py | 7 ++-- 8 files changed, 91 insertions(+), 103 deletions(-) diff --git a/PaddleRec/din/infer.py b/PaddleRec/din/infer.py index fc1484e5..7108d044 100644 --- a/PaddleRec/din/infer.py +++ b/PaddleRec/din/infer.py @@ -78,20 +78,21 @@ def infer(): [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(model_path, exe) - feeder = fluid.DataFeeder( - feed_list=feed_target_names, place=place, program=inference_program) + loader = fluid.io.DataLoader.from_generator( + feed_list=[inference_program.block(0).var(e) for e in feed_target_names], capacity=10000, iterable=True) + loader.set_sample_list_generator(data_reader, places=fluid.cuda_places()) loss_sum = 0.0 score = [] count = 0 - for data in data_reader(): + for data in loader(): res = exe.run(inference_program, - feed=feeder.feed(data), + feed=data, fetch_list=fetch_targets) loss_sum += res[0] - - for i in range(len(data)): - if data[i][4] > 0.5: + label_data = list(np.array(data[0]["label"])) + for i in range(len(label_data)): + if label_data[i] > 0.5: score.append([0, 1, res[1][i]]) else: score.append([1, 0, res[1][i]]) diff --git a/PaddleRec/din/network.py b/PaddleRec/din/network.py index a65e155f..17d4ae59 100644 --- a/PaddleRec/din/network.py +++ b/PaddleRec/din/network.py @@ -14,7 +14,7 @@ import paddle.fluid as fluid -def din_attention(hist, target_expand, max_len, mask): +def din_attention(hist, target_expand, mask): """activation weight""" hidden_size = hist.shape[-1] @@ -45,9 +45,10 @@ def din_attention(hist, target_expand, max_len, mask): return out -def network(item_count, cat_count, max_len): +def network(item_count, cat_count): """network definition""" + seq_len = -1 item_emb_size = 64 cat_emb_size = 64 is_sparse = False @@ -56,60 +57,60 @@ def network(item_count, cat_count, max_len): item_emb_attr = fluid.ParamAttr(name="item_emb") cat_emb_attr = fluid.ParamAttr(name="cat_emb") - hist_item_seq = fluid.layers.data( - name="hist_item_seq", shape=[max_len, 1], dtype="int64") - hist_cat_seq = fluid.layers.data( - name="hist_cat_seq", shape=[max_len, 1], dtype="int64") - target_item = fluid.layers.data( - name="target_item", shape=[1], dtype="int64") - target_cat = fluid.layers.data( - name="target_cat", shape=[1], dtype="int64") - label = fluid.layers.data( - name="label", shape=[1], dtype="float32") - mask = fluid.layers.data( - name="mask", shape=[max_len, 1], dtype="float32") - target_item_seq = fluid.layers.data( - name="target_item_seq", shape=[max_len, 1], dtype="int64") - target_cat_seq = fluid.layers.data( - name="target_cat_seq", shape=[max_len, 1], dtype="int64", lod_level=0) - - hist_item_emb = fluid.layers.embedding( + hist_item_seq = fluid.data( + name="hist_item_seq", shape=[None, seq_len], dtype="int64") + hist_cat_seq = fluid.data( + name="hist_cat_seq", shape=[None, seq_len], dtype="int64") + target_item = fluid.data( + name="target_item", shape=[None], dtype="int64") + target_cat = fluid.data( + name="target_cat", shape=[None], dtype="int64") + label = fluid.data( + name="label", shape=[None, 1], dtype="float32") + mask = fluid.data( + name="mask", shape=[None, seq_len, 1], dtype="float32") + target_item_seq = fluid.data( + name="target_item_seq", shape=[None, seq_len], dtype="int64") + target_cat_seq = fluid.data( + name="target_cat_seq", shape=[None, seq_len], dtype="int64") + + hist_item_emb = fluid.embedding( input=hist_item_seq, size=[item_count, item_emb_size], param_attr=item_emb_attr, is_sparse=is_sparse) - hist_cat_emb = fluid.layers.embedding( + hist_cat_emb = fluid.embedding( input=hist_cat_seq, size=[cat_count, cat_emb_size], param_attr=cat_emb_attr, is_sparse=is_sparse) - target_item_emb = fluid.layers.embedding( + target_item_emb = fluid.embedding( input=target_item, size=[item_count, item_emb_size], param_attr=item_emb_attr, is_sparse=is_sparse) - target_cat_emb = fluid.layers.embedding( + target_cat_emb = fluid.embedding( input=target_cat, size=[cat_count, cat_emb_size], param_attr=cat_emb_attr, is_sparse=is_sparse) - target_item_seq_emb = fluid.layers.embedding( + target_item_seq_emb = fluid.embedding( input=target_item_seq, size=[item_count, item_emb_size], param_attr=item_emb_attr, is_sparse=is_sparse) - target_cat_seq_emb = fluid.layers.embedding( + target_cat_seq_emb = fluid.embedding( input=target_cat_seq, size=[cat_count, cat_emb_size], param_attr=cat_emb_attr, is_sparse=is_sparse) - item_b = fluid.layers.embedding( + item_b = fluid.embedding( input=target_item, size=[item_count, 1], param_attr=fluid.initializer.Constant(value=0.0)) @@ -120,7 +121,7 @@ def network(item_count, cat_count, max_len): target_concat = fluid.layers.concat( [target_item_emb, target_cat_emb], axis=1) - out = din_attention(hist_seq_concat, target_seq_concat, max_len, mask) + out = din_attention(hist_seq_concat, target_seq_concat, mask) out_fc = fluid.layers.fc(name="out_fc", input=out, size=item_emb_size + cat_emb_size, @@ -137,4 +138,6 @@ def network(item_count, cat_count, max_len): loss = fluid.layers.sigmoid_cross_entropy_with_logits(x=logit, label=label) avg_loss = fluid.layers.mean(loss) - return avg_loss, fluid.layers.sigmoid(logit) + return avg_loss, fluid.layers.sigmoid(logit), \ + [hist_item_seq, hist_cat_seq, target_item, \ + target_cat, label, mask, target_item_seq, target_cat_seq] diff --git a/PaddleRec/din/reader.py b/PaddleRec/din/reader.py index a50a9089..07d1c33c 100644 --- a/PaddleRec/din/reader.py +++ b/PaddleRec/din/reader.py @@ -20,7 +20,7 @@ import pickle def pad_batch_data(input, max_len): res = np.array([x + [0] * (max_len - len(x)) for x in input]) - res = res.astype("int64").reshape([-1, max_len, 1]) + res = res.astype("int64").reshape([-1, max_len]) return res @@ -34,10 +34,10 @@ def make_data(b): [-1, max_len, 1]) target_item_seq = np.array( [[x[2]] * max_len for x in b]).astype("int64").reshape( - [-1, max_len, 1]) + [-1, max_len]) target_cat_seq = np.array( [[x[3]] * max_len for x in b]).astype("int64").reshape( - [-1, max_len, 1]) + [-1, max_len]) res = [] for i in range(len(b)): res.append([ diff --git a/PaddleRec/din/train.py b/PaddleRec/din/train.py index dbf0e77e..1be7c6ca 100644 --- a/PaddleRec/din/train.py +++ b/PaddleRec/din/train.py @@ -78,7 +78,7 @@ def train(): args.num_devices) logger.info("reading data completes") - avg_cost, pred = network.network(item_count, cat_count, max_len) + avg_cost, pred, feed_list = network.network(item_count, cat_count) fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm( clip_norm=5.0)) base_lr = args.base_lr @@ -94,12 +94,9 @@ def train(): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - feeder = fluid.DataFeeder( - feed_list=[ - "hist_item_seq", "hist_cat_seq", "target_item", "target_cat", - "label", "mask", "target_item_seq", "target_cat_seq" - ], - place=place) + loader = fluid.io.DataLoader.from_generator( + feed_list=feed_list, capacity=10000, iterable=True) + loader.set_sample_list_generator(data_reader, places=fluid.cuda_places()) if use_parallel: train_exe = fluid.ParallelExecutor( use_cuda=use_cuda, loss_name=avg_cost.name) @@ -117,9 +114,9 @@ def train(): loss_sum = 0.0 for id in range(epoch_num): epoch = id + 1 - for data in data_reader(): + for data in loader(): global_step += 1 - results = train_exe.run(feed=feeder.feed(data), + results = train_exe.run(feed=data, fetch_list=[avg_cost.name, pred.name], return_numpy=True) loss_sum += results[0].mean() diff --git a/PaddleRec/gnn/infer.py b/PaddleRec/gnn/infer.py index 90c916dc..f8d1f111 100644 --- a/PaddleRec/gnn/infer.py +++ b/PaddleRec/gnn/infer.py @@ -55,7 +55,7 @@ def infer(args): test_data = reader.Data(args.test_path, False) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) - loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, args.step) + loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, args.step, batch_size) exe.run(fluid.default_startup_program()) infer_program = fluid.default_main_program().clone(for_test=True) @@ -70,7 +70,7 @@ def infer(args): loss_sum = 0.0 acc_sum = 0.0 count = 0 - py_reader.decorate_paddle_reader(test_data.reader(batch_size, batch_size*20, False)) + py_reader.set_sample_list_generator(test_data.reader(batch_size, batch_size*20, False)) py_reader.start() try: while True: diff --git a/PaddleRec/gnn/network.py b/PaddleRec/gnn/network.py index d445dbe9..737f646f 100644 --- a/PaddleRec/gnn/network.py +++ b/PaddleRec/gnn/network.py @@ -19,57 +19,53 @@ import paddle.fluid as fluid import paddle.fluid.layers as layers -def network(items_num, hidden_size, step): +def network(items_num, hidden_size, step, bs): stdv = 1.0 / math.sqrt(hidden_size) - items = layers.data( + items = fluid.data( name="items", - shape=[1, 1], - dtype="int64") #[batch_size, uniq_max, 1] - seq_index = layers.data( + shape=[bs, -1], + dtype="int64") #[batch_size, uniq_max] + seq_index = fluid.data( name="seq_index", - shape=[1], - dtype="int32") #[batch_size, seq_max] - last_index = layers.data( + shape=[bs, -1, 2], + dtype="int32") #[batch_size, seq_max, 2] + last_index = fluid.data( name="last_index", - shape=[1], - dtype="int32") #[batch_size, 1] - adj_in = layers.data( + shape=[bs, 2], + dtype="int32") #[batch_size, 2] + adj_in = fluid.data( name="adj_in", - shape=[1,1], + shape=[bs, -1, -1], dtype="float32") #[batch_size, seq_max, seq_max] - adj_out = layers.data( + adj_out = fluid.data( name="adj_out", - shape=[1,1], + shape=[bs, -1, -1], dtype="float32") #[batch_size, seq_max, seq_max] - mask = layers.data( + mask = fluid.data( name="mask", - shape=[1, 1], + shape=[bs, -1, 1], dtype="float32") #[batch_size, seq_max, 1] - label = layers.data( + label = fluid.data( name="label", - shape=[1], + shape=[bs, 1], dtype="int64") #[batch_size, 1] datas = [items, seq_index, last_index, adj_in, adj_out, mask, label] - py_reader = fluid.layers.create_py_reader_by_data( - capacity=256, feed_list=datas, name='py_reader', use_double_buffer=True) - feed_datas = fluid.layers.read_file(py_reader) - items, seq_index, last_index, adj_in, adj_out, mask, label = feed_datas + py_reader = fluid.io.DataLoader.from_generator(capacity=256, feed_list=datas, iterable=False) + feed_datas = datas - items_emb = layers.embedding( + items_emb = fluid.embedding( input=items, param_attr=fluid.ParamAttr( name="emb", initializer=fluid.initializer.Uniform( low=-stdv, high=stdv)), size=[items_num, hidden_size]) #[batch_size, uniq_max, h] - items_emb_shape = layers.shape(items_emb) pre_state = items_emb for i in range(step): - pre_state = layers.reshape( - x=pre_state, shape=[-1, 1, hidden_size], actual_shape=items_emb_shape) + pre_state = layers.reshape(x=pre_state, shape=[bs, -1, hidden_size]) state_in = layers.fc( input=pre_state, name="state_in", @@ -104,24 +100,12 @@ def network(items_num, hidden_size, step): bias_attr=False) pre_state, _, _ = fluid.layers.gru_unit( input=gru_fc, - hidden=layers.reshape( - x=pre_state, shape=[-1, hidden_size]), + hidden=layers.reshape(x=pre_state, shape=[-1, hidden_size]), size=3 * hidden_size) - final_state = pre_state #[batch_size * uniq_max, h] - - seq_origin_shape = layers.assign(np.array([0,0,hidden_size-1]).astype("int32")) - seq_origin_shape += layers.shape(layers.unsqueeze(seq_index,[2])) #value: [batch_size, seq_max, h] - seq_origin_shape.stop_gradient = True - - seq_index = layers.reshape(seq_index, shape=[-1]) - seq = layers.gather(final_state, seq_index) #[batch_size * seq_max, h] - last = layers.gather(final_state, last_index) #[batch_size, h] - - seq = layers.reshape( - seq, shape=[-1, 1, hidden_size], actual_shape=seq_origin_shape) #[batch_size, seq_max, h] - last = layers.reshape( - last, shape=[-1, hidden_size]) #[batch_size, h] + final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size]) + seq = layers.gather_nd(final_state, seq_index) + last = layers.gather_nd(final_state, last_index) seq_fc = layers.fc( input=seq, @@ -184,13 +168,13 @@ def network(items_num, hidden_size, step): low=-stdv, high=stdv))) #[batch_size, h] all_vocab = layers.create_global_var( - shape=[items_num - 1, 1], + shape=[items_num - 1], value=0, dtype="int64", persistable=True, name="all_vocab") - all_emb = layers.embedding( + all_emb = fluid.embedding( input=all_vocab, param_attr=fluid.ParamAttr( name="emb", diff --git a/PaddleRec/gnn/reader.py b/PaddleRec/gnn/reader.py index effcc242..e1adb8f3 100644 --- a/PaddleRec/gnn/reader.py +++ b/PaddleRec/gnn/reader.py @@ -64,19 +64,19 @@ class Data(): adj_out.append(np.divide(adj.transpose(), u_deg_out).transpose()) seq_index.append( - [np.where(node == i)[0][0] + id * max_uniq_len for i in e[0]]) + [[id, np.where(node == i)[0][0]] for i in e[0]]) last_index.append( - np.where(node == e[0][last_id[id]])[0][0] + id * max_uniq_len) + [id, np.where(node == e[0][last_id[id]])[0][0]]) label.append(e[1] - 1) mask.append([[1] * (last_id[id] + 1) + [0] * (max_seq_len - last_id[id] - 1)]) id += 1 - items = np.array(items).astype("int64").reshape((batch_size, -1, 1)) + items = np.array(items).astype("int64").reshape((batch_size, -1)) seq_index = np.array(seq_index).astype("int32").reshape( - (batch_size, -1)) + (batch_size, -1, 2)) last_index = np.array(last_index).astype("int32").reshape( - (batch_size)) + (batch_size, 2)) adj_in = np.array(adj_in).astype("float32").reshape( (batch_size, max_uniq_len, max_uniq_len)) adj_out = np.array(adj_out).astype("float32").reshape( @@ -110,8 +110,10 @@ class Data(): cur_batch = remain_data[i:i + batch_size] yield self.make_data(cur_batch, batch_size) else: - cur_batch = remain_data[i:] - yield self.make_data(cur_batch, group_remain % batch_size) + # Due to fixed batch_size, discard the remaining ins + return + #cur_batch = remain_data[i:] + #yield self.make_data(cur_batch, group_remain % batch_size) return _reader diff --git a/PaddleRec/gnn/train.py b/PaddleRec/gnn/train.py index 790d23d9..a4a1898e 100644 --- a/PaddleRec/gnn/train.py +++ b/PaddleRec/gnn/train.py @@ -72,7 +72,7 @@ def train(): batch_size = args.batch_size items_num = reader.read_config(args.config_path) loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, - args.step) + args.step, batch_size) data_reader = reader.Data(args.train_path, True) logger.info("load data complete") @@ -96,7 +96,7 @@ def train(): all_vocab = fluid.global_scope().var("all_vocab").get_tensor() all_vocab.set( - np.arange(1, items_num).astype("int64").reshape((-1, 1)), place) + np.arange(1, items_num).astype("int64").reshape((-1)), place) feed_list = [e.name for e in feed_datas] @@ -115,7 +115,8 @@ def train(): acc_sum = 0.0 global_step = 0 PRINT_STEP = 500 - py_reader.decorate_paddle_reader(data_reader.reader(batch_size, batch_size * 20, True)) + #py_reader.decorate_paddle_reader(data_reader.reader(batch_size, batch_size * 20, True)) + py_reader.set_sample_list_generator(data_reader.reader(batch_size, batch_size * 20, True)) for i in range(args.epoch_num): epoch_sum = [] py_reader.start() -- GitLab