From 90bceaafdc0b8845462301ac3301082e7faf5e59 Mon Sep 17 00:00:00 2001 From: Yu Ji Date: Mon, 29 Jul 2019 06:04:56 +0000 Subject: [PATCH] Replace PyReader with PipeReader --- PaddleRec/word2vec/net.py | 13 +++---------- PaddleRec/word2vec/train.py | 18 ++++++------------ 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/PaddleRec/word2vec/net.py b/PaddleRec/word2vec/net.py index ab2abbc7..21903f4a 100644 --- a/PaddleRec/word2vec/net.py +++ b/PaddleRec/word2vec/net.py @@ -21,21 +21,14 @@ import paddle.fluid as fluid def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5): - - datas = [] input_word = fluid.layers.data(name="input_word", shape=[1], dtype='int64') true_word = fluid.layers.data(name='true_label', shape=[1], dtype='int64') neg_word = fluid.layers.data( name="neg_label", shape=[neg_num], dtype='int64') - datas.append(input_word) - datas.append(true_word) - datas.append(neg_word) - - py_reader = fluid.layers.create_py_reader_by_data( - capacity=64, feed_list=datas, name='py_reader', use_double_buffer=True) + words = [input_word, true_word, neg_word] + pipe_reader = fluid.reader.PipeReader(feed_list=words) - words = fluid.layers.read_file(py_reader) init_width = 0.5 / embedding_size input_emb = fluid.layers.embedding( input=words[0], @@ -107,7 +100,7 @@ def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5): fluid.layers.reduce_sum( neg_xent, dim=1)) avg_cost = fluid.layers.reduce_mean(cost) - return avg_cost, py_reader + return avg_cost, pipe_reader def infer_network(vocab_size, emb_size): diff --git a/PaddleRec/word2vec/train.py b/PaddleRec/word2vec/train.py index 430ec132..648ecdf2 100644 --- a/PaddleRec/word2vec/train.py +++ b/PaddleRec/word2vec/train.py @@ -91,21 +91,15 @@ def convert_python_to_tensor(weight, batch_size, sample_reader): if len(result[0]) == batch_size: tensor_result = [] for tensor in result: - t = fluid.Tensor() dat = np.array(tensor, dtype='int64') if len(dat.shape) > 2: dat = dat.reshape((dat.shape[0], dat.shape[2])) elif len(dat.shape) == 1: dat = dat.reshape((-1, 1)) - t.set(dat, fluid.CPUPlace()) - tensor_result.append(t) - tt = fluid.Tensor() + tensor_result.append(dat) neg_array = cs.searchsorted(np.random.sample(args.nce_num)) - neg_array = np.tile(neg_array, batch_size) - tt.set( - neg_array.reshape((batch_size, args.nce_num)), - fluid.CPUPlace()) - tensor_result.append(tt) + neg_array = np.tile(neg_array, batch_size).reshape((batch_size, args.nce_num)) + tensor_result.append(neg_array) yield tensor_result result = [[], []] @@ -115,7 +109,7 @@ def convert_python_to_tensor(weight, batch_size, sample_reader): def train_loop(args, train_program, reader, py_reader, loss, trainer_id, weight): - py_reader.decorate_tensor_provider( + py_reader.decorate_batch_generator( convert_python_to_tensor(weight, args.batch_size, reader.train())) place = fluid.CPUPlace() @@ -153,9 +147,9 @@ def train_loop(args, train_program, reader, py_reader, loss, trainer_id, if batch_id % args.print_batch == 0: logger.info( - "TRAIN --> pass: {} batch: {} loss: {} reader queue:{}". + "TRAIN --> pass: {} batch: {} loss: {}". format(pass_id, batch_id, - loss_val.mean(), py_reader.queue.size())) + loss_val.mean())) if args.with_speed: if batch_id % 500 == 0 and batch_id != 0: elapsed = (time.time() - start) -- GitLab