未验证 提交 4f86e9d0 编写于 作者: Y Yibing Liu 提交者: GitHub

Merge pull request #1189 from PaddlePaddle/lang_model_py3

Adapt language model to py3
......@@ -4,9 +4,10 @@ import math
import unittest
import contextlib
import numpy as np
import six
import paddle
import paddle.fluid as fluid
import paddle.v2 as paddle
import utils
......@@ -24,8 +25,8 @@ def infer(test_reader, use_cuda, model_path):
accum_words = 0
t0 = time.time()
for data in test_reader():
src_wordseq = utils.to_lodtensor(map(lambda x: x[0], data), place)
dst_wordseq = utils.to_lodtensor(map(lambda x: x[1], data), place)
src_wordseq = utils.to_lodtensor([dat[0] for dat in data], place)
dst_wordseq = utils.to_lodtensor([dat[1] for dat in data], place)
avg_cost = exe.run(
infer_program,
feed={"src_wordseq": src_wordseq,
......@@ -60,6 +61,6 @@ if __name__ == "__main__":
vocab, train_reader, test_reader = utils.prepare_data(
batch_size=20, buffer_size=1000, word_freq_threshold=0)
for epoch in xrange(start_index, last_index + 1):
for epoch in six.moves.xrange(start_index, last_index + 1):
epoch_path = model_dir + "/epoch_" + str(epoch)
infer(test_reader=test_reader, use_cuda=True, model_path=epoch_path)
import os
import sys
import time
import six
import numpy as np
import math
......@@ -114,9 +115,9 @@ def train(train_reader,
total_time = 0.0
fetch_list = [avg_cost.name]
for pass_idx in xrange(pass_num):
for pass_idx in six.moves.xrange(pass_num):
epoch_idx = pass_idx + 1
print "epoch_%d start" % epoch_idx
print("epoch_%d start" % epoch_idx)
t0 = time.time()
i = 0
......@@ -124,9 +125,9 @@ def train(train_reader,
for data in train_reader():
i += 1
lod_src_wordseq = utils.to_lodtensor(
map(lambda x: x[0], data), place)
[dat[0] for dat in data], place)
lod_dst_wordseq = utils.to_lodtensor(
map(lambda x: x[1], data), place)
[dat[1] for dat in data], place)
ret_avg_cost = train_exe.run(feed={
"src_wordseq": lod_src_wordseq,
"dst_wordseq": lod_dst_wordseq
......@@ -135,12 +136,12 @@ def train(train_reader,
avg_ppl = np.exp(ret_avg_cost[0])
newest_ppl = np.mean(avg_ppl)
if i % 100 == 0:
print "step:%d ppl:%.3f" % (i, newest_ppl)
print("step:%d ppl:%.3f" % (i, newest_ppl))
t1 = time.time()
total_time += t1 - t0
print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i,
total_time / epoch_idx)
print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i,
total_time / epoch_idx))
if pass_idx == pass_num - 1 and args.enable_ce:
#Note: The following logs are special for CE monitoring.
......
import os
import sys
import time
import six
import numpy as np
import math
......@@ -49,7 +50,7 @@ def build_dict(min_word_freq=50):
word_freq = filter(lambda x: x[1] > min_word_freq, word_freq.items())
word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*word_freq_sorted))
word_idx = dict(zip(words, xrange(len(words))))
word_idx = dict(zip(words, six.moves.xrange(len(words))))
word_idx['<unk>'] = len(words)
return word_idx
......@@ -212,16 +213,16 @@ def do_train(train_reader,
exe.run(fluid.default_startup_program())
total_time = 0.0
for pass_idx in xrange(pass_num):
for pass_idx in six.moves.xrange(pass_num):
epoch_idx = pass_idx + 1
print "epoch_%d start" % epoch_idx
print("epoch_%d start" % epoch_idx)
t0 = time.time()
i = 0
for data in train_reader():
i += 1
lod_src_wordseq = to_lodtensor(map(lambda x: x[0], data), place)
lod_dst_wordseq = to_lodtensor(map(lambda x: x[1], data), place)
lod_src_wordseq = to_lodtensor([dat[0] for dat in data], place)
lod_dst_wordseq = to_lodtensor([dat[1] for dat in data], place)
ret_avg_cost = exe.run(fluid.default_main_program(),
feed={
"src_wordseq": lod_src_wordseq,
......@@ -231,12 +232,12 @@ def do_train(train_reader,
use_program_cache=True)
avg_ppl = math.exp(ret_avg_cost[0])
if i % 100 == 0:
print "step:%d ppl:%.3f" % (i, avg_ppl)
print("step:%d ppl:%.3f" % (i, avg_ppl))
t1 = time.time()
total_time += t1 - t0
print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i,
total_time / epoch_idx)
print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i,
total_time / epoch_idx))
save_dir = "%s/epoch_%d" % (model_dir, epoch_idx)
feed_var_names = ["src_wordseq", "dst_wordseq"]
......@@ -258,13 +259,13 @@ def train():
""" event handler """
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
print("\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics))
else:
sys.stdout.write('.')
sys.stdout.flush()
if isinstance(event, paddle.event.EndPass):
print "isinstance(event, paddle.event.EndPass)"
print("isinstance(event, paddle.event.EndPass)")
do_train(
train_reader=train_reader,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册