提交 90bceaaf 编写于 作者: Y Yu Ji

Replace PyReader with PipeReader

上级 4ed7b251
......@@ -21,21 +21,14 @@ import paddle.fluid as fluid
def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5):
datas = []
input_word = fluid.layers.data(name="input_word", shape=[1], dtype='int64')
true_word = fluid.layers.data(name='true_label', shape=[1], dtype='int64')
neg_word = fluid.layers.data(
name="neg_label", shape=[neg_num], dtype='int64')
datas.append(input_word)
datas.append(true_word)
datas.append(neg_word)
py_reader = fluid.layers.create_py_reader_by_data(
capacity=64, feed_list=datas, name='py_reader', use_double_buffer=True)
words = [input_word, true_word, neg_word]
pipe_reader = fluid.reader.PipeReader(feed_list=words)
words = fluid.layers.read_file(py_reader)
init_width = 0.5 / embedding_size
input_emb = fluid.layers.embedding(
input=words[0],
......@@ -107,7 +100,7 @@ def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5):
fluid.layers.reduce_sum(
neg_xent, dim=1))
avg_cost = fluid.layers.reduce_mean(cost)
return avg_cost, py_reader
return avg_cost, pipe_reader
def infer_network(vocab_size, emb_size):
......
......@@ -91,21 +91,15 @@ def convert_python_to_tensor(weight, batch_size, sample_reader):
if len(result[0]) == batch_size:
tensor_result = []
for tensor in result:
t = fluid.Tensor()
dat = np.array(tensor, dtype='int64')
if len(dat.shape) > 2:
dat = dat.reshape((dat.shape[0], dat.shape[2]))
elif len(dat.shape) == 1:
dat = dat.reshape((-1, 1))
t.set(dat, fluid.CPUPlace())
tensor_result.append(t)
tt = fluid.Tensor()
tensor_result.append(dat)
neg_array = cs.searchsorted(np.random.sample(args.nce_num))
neg_array = np.tile(neg_array, batch_size)
tt.set(
neg_array.reshape((batch_size, args.nce_num)),
fluid.CPUPlace())
tensor_result.append(tt)
neg_array = np.tile(neg_array, batch_size).reshape((batch_size, args.nce_num))
tensor_result.append(neg_array)
yield tensor_result
result = [[], []]
......@@ -115,7 +109,7 @@ def convert_python_to_tensor(weight, batch_size, sample_reader):
def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
weight):
py_reader.decorate_tensor_provider(
py_reader.decorate_batch_generator(
convert_python_to_tensor(weight, args.batch_size, reader.train()))
place = fluid.CPUPlace()
......@@ -153,9 +147,9 @@ def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
if batch_id % args.print_batch == 0:
logger.info(
"TRAIN --> pass: {} batch: {} loss: {} reader queue:{}".
"TRAIN --> pass: {} batch: {} loss: {}".
format(pass_id, batch_id,
loss_val.mean(), py_reader.queue.size()))
loss_val.mean()))
if args.with_speed:
if batch_id % 500 == 0 and batch_id != 0:
elapsed = (time.time() - start)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册