From 9a1eb4ebd835196b0dcaddff04e96c8a37a153c0 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Sat, 25 Aug 2018 14:21:44 +0000 Subject: [PATCH] Adapt language model to py3 --- fluid/language_model/infer.py | 9 +++++---- fluid/language_model/train.py | 15 ++++++++------- fluid/language_model/train_on_cloud.py | 23 ++++++++++++----------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/fluid/language_model/infer.py b/fluid/language_model/infer.py index a183d548..ad03ef39 100644 --- a/fluid/language_model/infer.py +++ b/fluid/language_model/infer.py @@ -4,9 +4,10 @@ import math import unittest import contextlib import numpy as np +import six +import paddle import paddle.fluid as fluid -import paddle.v2 as paddle import utils @@ -24,8 +25,8 @@ def infer(test_reader, use_cuda, model_path): accum_words = 0 t0 = time.time() for data in test_reader(): - src_wordseq = utils.to_lodtensor(map(lambda x: x[0], data), place) - dst_wordseq = utils.to_lodtensor(map(lambda x: x[1], data), place) + src_wordseq = utils.to_lodtensor([dat[0] for dat in data], place) + dst_wordseq = utils.to_lodtensor([dat[1] for dat in data], place) avg_cost = exe.run( infer_program, feed={"src_wordseq": src_wordseq, @@ -60,6 +61,6 @@ if __name__ == "__main__": vocab, train_reader, test_reader = utils.prepare_data( batch_size=20, buffer_size=1000, word_freq_threshold=0) - for epoch in xrange(start_index, last_index + 1): + for epoch in six.moves.xrange(start_index, last_index + 1): epoch_path = model_dir + "/epoch_" + str(epoch) infer(test_reader=test_reader, use_cuda=True, model_path=epoch_path) diff --git a/fluid/language_model/train.py b/fluid/language_model/train.py index e719f8f1..b5923a90 100644 --- a/fluid/language_model/train.py +++ b/fluid/language_model/train.py @@ -1,6 +1,7 @@ import os import sys import time +import six import numpy as np import math @@ -114,9 +115,9 @@ def train(train_reader, total_time = 0.0 fetch_list = [avg_cost.name] - for pass_idx in xrange(pass_num): + for pass_idx in six.moves.xrange(pass_num): epoch_idx = pass_idx + 1 - print "epoch_%d start" % epoch_idx + print("epoch_%d start" % epoch_idx) t0 = time.time() i = 0 @@ -124,9 +125,9 @@ def train(train_reader, for data in train_reader(): i += 1 lod_src_wordseq = utils.to_lodtensor( - map(lambda x: x[0], data), place) + [dat[0] for dat in data], place) lod_dst_wordseq = utils.to_lodtensor( - map(lambda x: x[1], data), place) + [dat[1] for dat in data], place) ret_avg_cost = train_exe.run(feed={ "src_wordseq": lod_src_wordseq, "dst_wordseq": lod_dst_wordseq @@ -135,12 +136,12 @@ def train(train_reader, avg_ppl = np.exp(ret_avg_cost[0]) newest_ppl = np.mean(avg_ppl) if i % 100 == 0: - print "step:%d ppl:%.3f" % (i, newest_ppl) + print("step:%d ppl:%.3f" % (i, newest_ppl)) t1 = time.time() total_time += t1 - t0 - print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, - total_time / epoch_idx) + print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, + total_time / epoch_idx)) if pass_idx == pass_num - 1 and args.enable_ce: #Note: The following logs are special for CE monitoring. diff --git a/fluid/language_model/train_on_cloud.py b/fluid/language_model/train_on_cloud.py index 17318400..41a0c609 100644 --- a/fluid/language_model/train_on_cloud.py +++ b/fluid/language_model/train_on_cloud.py @@ -1,6 +1,7 @@ import os import sys import time +import six import numpy as np import math @@ -49,7 +50,7 @@ def build_dict(min_word_freq=50): word_freq = filter(lambda x: x[1] > min_word_freq, word_freq.items()) word_freq_sorted = sorted(word_freq, key=lambda x: (-x[1], x[0])) words, _ = list(zip(*word_freq_sorted)) - word_idx = dict(zip(words, xrange(len(words)))) + word_idx = dict(zip(words, six.moves.xrange(len(words)))) word_idx[''] = len(words) return word_idx @@ -212,16 +213,16 @@ def do_train(train_reader, exe.run(fluid.default_startup_program()) total_time = 0.0 - for pass_idx in xrange(pass_num): + for pass_idx in six.moves.xrange(pass_num): epoch_idx = pass_idx + 1 - print "epoch_%d start" % epoch_idx + print("epoch_%d start" % epoch_idx) t0 = time.time() i = 0 for data in train_reader(): i += 1 - lod_src_wordseq = to_lodtensor(map(lambda x: x[0], data), place) - lod_dst_wordseq = to_lodtensor(map(lambda x: x[1], data), place) + lod_src_wordseq = to_lodtensor([dat[0] for dat in data], place) + lod_dst_wordseq = to_lodtensor([dat[1] for dat in data], place) ret_avg_cost = exe.run(fluid.default_main_program(), feed={ "src_wordseq": lod_src_wordseq, @@ -231,12 +232,12 @@ def do_train(train_reader, use_program_cache=True) avg_ppl = math.exp(ret_avg_cost[0]) if i % 100 == 0: - print "step:%d ppl:%.3f" % (i, avg_ppl) + print("step:%d ppl:%.3f" % (i, avg_ppl)) t1 = time.time() total_time += t1 - t0 - print "epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, - total_time / epoch_idx) + print("epoch:%d num_steps:%d time_cost(s):%f" % (epoch_idx, i, + total_time / epoch_idx)) save_dir = "%s/epoch_%d" % (model_dir, epoch_idx) feed_var_names = ["src_wordseq", "dst_wordseq"] @@ -258,13 +259,13 @@ def train(): """ event handler """ if isinstance(event, paddle.event.EndIteration): if event.batch_id % 100 == 0: - print "\nPass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) + print("\nPass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics)) else: sys.stdout.write('.') sys.stdout.flush() if isinstance(event, paddle.event.EndPass): - print "isinstance(event, paddle.event.EndPass)" + print("isinstance(event, paddle.event.EndPass)") do_train( train_reader=train_reader, -- GitLab