From ce853f675069f921fbec5e11f5b495255456c9bf Mon Sep 17 00:00:00 2001 From: hutuxian Date: Tue, 9 Jul 2019 16:29:13 +0800 Subject: [PATCH] SR-GNN: some optimization (#2676) use all data network: support variable batch_size add some shape comment infer: use pyreader --- PaddleRec/gnn/infer.py | 62 +++++++++++++++++++++-------------- PaddleRec/gnn/network.py | 71 ++++++++++++++++++++-------------------- PaddleRec/gnn/reader.py | 11 ++++--- PaddleRec/gnn/train.py | 2 +- 4 files changed, 81 insertions(+), 65 deletions(-) diff --git a/PaddleRec/gnn/infer.py b/PaddleRec/gnn/infer.py index ee196702..6f935745 100644 --- a/PaddleRec/gnn/infer.py +++ b/PaddleRec/gnn/infer.py @@ -19,6 +19,7 @@ import os import paddle import paddle.fluid as fluid import reader +import network logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger("fluid") @@ -31,6 +32,8 @@ def parse_args(): '--model_path', type=str, default='./saved_model/', help="path of model parameters") parser.add_argument( '--test_path', type=str, default='./data/diginetica/test.txt', help='dir of test file') + parser.add_argument( + '--config_path', type=str, default='./data/diginetica/config.txt', help='dir of config') parser.add_argument( '--use_cuda', type=int, default=1, help='whether to use gpu') parser.add_argument( @@ -39,40 +42,51 @@ def parse_args(): '--start_index', type=int, default='0', help='start index') parser.add_argument( '--last_index', type=int, default='10', help='end index') + parser.add_argument( + '--hidden_size', type=int, default=100, help='hidden state size') + parser.add_argument( + '--step', type=int, default=1, help='gnn propogation steps') return parser.parse_args() -def infer(epoch_num): - args = parse_args() +def infer(args): batch_size = args.batch_size + items_num = reader.read_config(args.config_path) test_data = reader.Data(args.test_path, False) place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace() exe = fluid.Executor(place) + loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, args.step) + exe.run(fluid.default_startup_program()) + infer_program = fluid.default_main_program().clone(for_test=True) - model_path = args.model_path + "epoch_" + str(epoch_num) - try: - [infer_program, feed_names, fetch_targets] = fluid.io.load_inference_model( - model_path, exe) - feeder = fluid.DataFeeder( - feed_list=feed_names, place=place, program=infer_program) + for epoch_num in range(args.start_index, args.last_index + 1): + model_path = args.model_path + "epoch_" + str(epoch_num) + try: + if not os.path.exists(model_path): + raise ValueError() + fluid.io.load_persistables(executor=exe, dirname=model_path, + main_program=infer_program) - loss_sum = 0.0 - acc_sum = 0.0 - count = 0 - for data in test_data.reader(batch_size, batch_size, False)(): - res = exe.run(infer_program, - feed=feeder.feed(data), - fetch_list=fetch_targets) - loss_sum += res[0] - acc_sum += res[1] - count += 1 - logger.info("TEST --> loss: %.4lf, Recall@20: %.4lf" % - (loss_sum / count, acc_sum / count)) - except ValueError as e: - logger.info("TEST --> error: there is no model in " + model_path) + loss_sum = 0.0 + acc_sum = 0.0 + count = 0 + py_reader.decorate_paddle_reader(test_data.reader(batch_size, batch_size*20, False)) + py_reader.start() + try: + while True: + res = exe.run(infer_program, + fetch_list=[loss.name, acc.name], use_program_cache=True) + loss_sum += res[0] + acc_sum += res[1] + count += 1 + except fluid.core.EOFException: + py_reader.reset() + logger.info("TEST --> loss: %.4lf, Recall@20: %.4lf" % + (loss_sum / count, acc_sum / count)) + except ValueError as e: + logger.info("TEST --> error: there is no model in " + model_path) if __name__ == "__main__": args = parse_args() - for index in range(args.start_index, args.last_index + 1): - infer(index) + infer(args) diff --git a/PaddleRec/gnn/network.py b/PaddleRec/gnn/network.py index 44d53f94..d445dbe9 100644 --- a/PaddleRec/gnn/network.py +++ b/PaddleRec/gnn/network.py @@ -19,44 +19,37 @@ import paddle.fluid as fluid import paddle.fluid.layers as layers -def network(batch_size, items_num, hidden_size, step): +def network(items_num, hidden_size, step): stdv = 1.0 / math.sqrt(hidden_size) items = layers.data( name="items", - shape=[batch_size, items_num, 1], - dtype="int64", - append_batch_size=False) #[bs, uniq_max, 1] + shape=[1, 1], + dtype="int64") #[batch_size, uniq_max, 1] seq_index = layers.data( name="seq_index", - shape=[batch_size, items_num], - dtype="int32", - append_batch_size=False) #[-1(seq_max)*batch_size, 1] + shape=[1], + dtype="int32") #[batch_size, seq_max] last_index = layers.data( name="last_index", - shape=[batch_size], - dtype="int32", - append_batch_size=False) #[batch_size, 1] + shape=[1], + dtype="int32") #[batch_size, 1] adj_in = layers.data( name="adj_in", - shape=[batch_size, items_num, items_num], - dtype="float32", - append_batch_size=False) + shape=[1,1], + dtype="float32") #[batch_size, seq_max, seq_max] adj_out = layers.data( name="adj_out", - shape=[batch_size, items_num, items_num], - dtype="float32", - append_batch_size=False) + shape=[1,1], + dtype="float32") #[batch_size, seq_max, seq_max] mask = layers.data( name="mask", - shape=[batch_size, -1, 1], - dtype="float32", - append_batch_size=False) + shape=[1, 1], + dtype="float32") #[batch_size, seq_max, 1] label = layers.data( name="label", - shape=[batch_size, 1], - dtype="int64", - append_batch_size=False) + shape=[1], + dtype="int64") #[batch_size, 1] datas = [items, seq_index, last_index, adj_in, adj_out, mask, label] py_reader = fluid.layers.create_py_reader_by_data( @@ -71,11 +64,12 @@ def network(batch_size, items_num, hidden_size, step): initializer=fluid.initializer.Uniform( low=-stdv, high=stdv)), size=[items_num, hidden_size]) #[batch_size, uniq_max, h] + items_emb_shape = layers.shape(items_emb) pre_state = items_emb for i in range(step): pre_state = layers.reshape( - x=pre_state, shape=[batch_size, -1, hidden_size]) + x=pre_state, shape=[-1, 1, hidden_size], actual_shape=items_emb_shape) state_in = layers.fc( input=pre_state, name="state_in", @@ -114,15 +108,20 @@ def network(batch_size, items_num, hidden_size, step): x=pre_state, shape=[-1, hidden_size]), size=3 * hidden_size) - final_state = pre_state + final_state = pre_state #[batch_size * uniq_max, h] + + seq_origin_shape = layers.assign(np.array([0,0,hidden_size-1]).astype("int32")) + seq_origin_shape += layers.shape(layers.unsqueeze(seq_index,[2])) #value: [batch_size, seq_max, h] + seq_origin_shape.stop_gradient = True + seq_index = layers.reshape(seq_index, shape=[-1]) - seq = layers.gather(final_state, seq_index) #[batch_size*-1(seq_max), h] + seq = layers.gather(final_state, seq_index) #[batch_size * seq_max, h] last = layers.gather(final_state, last_index) #[batch_size, h] seq = layers.reshape( - seq, shape=[batch_size, -1, hidden_size]) #[batch_size, -1(seq_max), h] + seq, shape=[-1, 1, hidden_size], actual_shape=seq_origin_shape) #[batch_size, seq_max, h] last = layers.reshape( - last, shape=[batch_size, hidden_size]) #[batch_size, h] + last, shape=[-1, hidden_size]) #[batch_size, h] seq_fc = layers.fc( input=seq, @@ -133,7 +132,7 @@ def network(batch_size, items_num, hidden_size, step): num_flatten_dims=2, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( - low=-stdv, high=stdv))) #[batch_size, -1(seq_max), h] + low=-stdv, high=stdv))) #[batch_size, seq_max, h] last_fc = layers.fc( input=last, name="last_fc", @@ -146,18 +145,18 @@ def network(batch_size, items_num, hidden_size, step): low=-stdv, high=stdv))) #[bathc_size, h] seq_fc_t = layers.transpose( - seq_fc, perm=[1, 0, 2]) #[-1(seq_max), batch_size, h] + seq_fc, perm=[1, 0, 2]) #[seq_max, batch_size, h] add = layers.elementwise_add( - seq_fc_t, last_fc) #[-1(seq_max), batch_size, h] + seq_fc_t, last_fc) #[seq_max, batch_size, h] b = layers.create_parameter( shape=[hidden_size], dtype='float32', default_initializer=fluid.initializer.Constant(value=0.0)) #[h] - add = layers.elementwise_add(add, b) #[-1(seq_max), batch_size, h] + add = layers.elementwise_add(add, b) #[seq_max, batch_size, h] - add_sigmoid = layers.sigmoid(add) #[-1(seq_max), batch_size, h] + add_sigmoid = layers.sigmoid(add) #[seq_max, batch_size, h] add_sigmoid = layers.transpose( - add_sigmoid, perm=[1, 0, 2]) #[batch_size, -1(seq_max), h] + add_sigmoid, perm=[1, 0, 2]) #[batch_size, seq_max, h] weight = layers.fc( input=add_sigmoid, @@ -168,10 +167,10 @@ def network(batch_size, items_num, hidden_size, step): bias_attr=False, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Uniform( - low=-stdv, high=stdv))) #[batch_size, -1, 1] + low=-stdv, high=stdv))) #[batch_size, seq_max, 1] weight *= mask - weight_mask = layers.elementwise_mul(seq, weight, axis=0) - global_attention = layers.reduce_sum(weight_mask, dim=1) + weight_mask = layers.elementwise_mul(seq, weight, axis=0) #[batch_size, seq_max, h] + global_attention = layers.reduce_sum(weight_mask, dim=1) #[batch_size, h] final_attention = layers.concat( [global_attention, last], axis=1) #[batch_size, 2*h] diff --git a/PaddleRec/gnn/reader.py b/PaddleRec/gnn/reader.py index 82d56e59..effcc242 100644 --- a/PaddleRec/gnn/reader.py +++ b/PaddleRec/gnn/reader.py @@ -98,17 +98,20 @@ class Data(): cur_batch = cur_bg[i:i + batch_size] yield self.make_data(cur_batch, batch_size) - #deal with the remaining, discard at most batch_size data - if group_remain < batch_size: + #deal with the last batch group + if group_remain == 0: return remain_data = copy.deepcopy(self.input[-group_remain:]) if train: remain_data = sorted( remain_data, key=lambda x: len(x[0]), reverse=True) - for i in range(0, batch_group_size, batch_size): - if i + batch_size <= len(remain_data): + for i in range(0, group_remain, batch_size): + if i + batch_size <= group_remain: cur_batch = remain_data[i:i + batch_size] yield self.make_data(cur_batch, batch_size) + else: + cur_batch = remain_data[i:] + yield self.make_data(cur_batch, group_remain % batch_size) return _reader diff --git a/PaddleRec/gnn/train.py b/PaddleRec/gnn/train.py index 1da8dc0b..ac245af5 100644 --- a/PaddleRec/gnn/train.py +++ b/PaddleRec/gnn/train.py @@ -71,7 +71,7 @@ def train(): batch_size = args.batch_size items_num = reader.read_config(args.config_path) - loss, acc, py_reader, feed_datas = network.network(batch_size, items_num, args.hidden_size, + loss, acc, py_reader, feed_datas = network.network(items_num, args.hidden_size, args.step) data_reader = reader.Data(args.train_path, True) -- GitLab