提交 90bceaaf 编写于 作者: Y Yu Ji

Replace PyReader with PipeReader

上级 4ed7b251
...@@ -21,21 +21,14 @@ import paddle.fluid as fluid ...@@ -21,21 +21,14 @@ import paddle.fluid as fluid
def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5): def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5):
datas = []
input_word = fluid.layers.data(name="input_word", shape=[1], dtype='int64') input_word = fluid.layers.data(name="input_word", shape=[1], dtype='int64')
true_word = fluid.layers.data(name='true_label', shape=[1], dtype='int64') true_word = fluid.layers.data(name='true_label', shape=[1], dtype='int64')
neg_word = fluid.layers.data( neg_word = fluid.layers.data(
name="neg_label", shape=[neg_num], dtype='int64') name="neg_label", shape=[neg_num], dtype='int64')
datas.append(input_word) words = [input_word, true_word, neg_word]
datas.append(true_word) pipe_reader = fluid.reader.PipeReader(feed_list=words)
datas.append(neg_word)
py_reader = fluid.layers.create_py_reader_by_data(
capacity=64, feed_list=datas, name='py_reader', use_double_buffer=True)
words = fluid.layers.read_file(py_reader)
init_width = 0.5 / embedding_size init_width = 0.5 / embedding_size
input_emb = fluid.layers.embedding( input_emb = fluid.layers.embedding(
input=words[0], input=words[0],
...@@ -107,7 +100,7 @@ def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5): ...@@ -107,7 +100,7 @@ def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5):
fluid.layers.reduce_sum( fluid.layers.reduce_sum(
neg_xent, dim=1)) neg_xent, dim=1))
avg_cost = fluid.layers.reduce_mean(cost) avg_cost = fluid.layers.reduce_mean(cost)
return avg_cost, py_reader return avg_cost, pipe_reader
def infer_network(vocab_size, emb_size): def infer_network(vocab_size, emb_size):
......
...@@ -91,21 +91,15 @@ def convert_python_to_tensor(weight, batch_size, sample_reader): ...@@ -91,21 +91,15 @@ def convert_python_to_tensor(weight, batch_size, sample_reader):
if len(result[0]) == batch_size: if len(result[0]) == batch_size:
tensor_result = [] tensor_result = []
for tensor in result: for tensor in result:
t = fluid.Tensor()
dat = np.array(tensor, dtype='int64') dat = np.array(tensor, dtype='int64')
if len(dat.shape) > 2: if len(dat.shape) > 2:
dat = dat.reshape((dat.shape[0], dat.shape[2])) dat = dat.reshape((dat.shape[0], dat.shape[2]))
elif len(dat.shape) == 1: elif len(dat.shape) == 1:
dat = dat.reshape((-1, 1)) dat = dat.reshape((-1, 1))
t.set(dat, fluid.CPUPlace()) tensor_result.append(dat)
tensor_result.append(t)
tt = fluid.Tensor()
neg_array = cs.searchsorted(np.random.sample(args.nce_num)) neg_array = cs.searchsorted(np.random.sample(args.nce_num))
neg_array = np.tile(neg_array, batch_size) neg_array = np.tile(neg_array, batch_size).reshape((batch_size, args.nce_num))
tt.set( tensor_result.append(neg_array)
neg_array.reshape((batch_size, args.nce_num)),
fluid.CPUPlace())
tensor_result.append(tt)
yield tensor_result yield tensor_result
result = [[], []] result = [[], []]
...@@ -115,7 +109,7 @@ def convert_python_to_tensor(weight, batch_size, sample_reader): ...@@ -115,7 +109,7 @@ def convert_python_to_tensor(weight, batch_size, sample_reader):
def train_loop(args, train_program, reader, py_reader, loss, trainer_id, def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
weight): weight):
py_reader.decorate_tensor_provider( py_reader.decorate_batch_generator(
convert_python_to_tensor(weight, args.batch_size, reader.train())) convert_python_to_tensor(weight, args.batch_size, reader.train()))
place = fluid.CPUPlace() place = fluid.CPUPlace()
...@@ -153,9 +147,9 @@ def train_loop(args, train_program, reader, py_reader, loss, trainer_id, ...@@ -153,9 +147,9 @@ def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
if batch_id % args.print_batch == 0: if batch_id % args.print_batch == 0:
logger.info( logger.info(
"TRAIN --> pass: {} batch: {} loss: {} reader queue:{}". "TRAIN --> pass: {} batch: {} loss: {}".
format(pass_id, batch_id, format(pass_id, batch_id,
loss_val.mean(), py_reader.queue.size())) loss_val.mean()))
if args.with_speed: if args.with_speed:
if batch_id % 500 == 0 and batch_id != 0: if batch_id % 500 == 0 and batch_id != 0:
elapsed = (time.time() - start) elapsed = (time.time() - start)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册