diff --git a/fluid/text_classification/clouds/scdb_parallel_executor.py b/fluid/text_classification/clouds/scdb_parallel_executor.py index 9d7722e9776d11c591f1ff0bd97b3e295205d300..cc5cd4ee9f9c86a0ed3f7c27e482026d6dbf7a13 100644 --- a/fluid/text_classification/clouds/scdb_parallel_executor.py +++ b/fluid/text_classification/clouds/scdb_parallel_executor.py @@ -3,6 +3,7 @@ import contextlib import paddle import paddle.fluid as fluid import numpy as np +import six import sys import time import os @@ -46,8 +47,8 @@ def data2tensor(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq, "label": y_data} @@ -56,8 +57,8 @@ def data2pred(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq} @@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): Save dict into file """ with open(vocab, "w") as fout: - for k, v in word_dict.iteritems(): + for k, v in six.iteritems(word_dict): outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") fout.write(outstr) @@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", def scdb_test_data(test_file, w_dict): """ - test_set=["car", "lbs", "spot", "weibo", + test_set=["car", "lbs", "spot", "weibo", "baby", "toutiao", "3c", "movie", "haogan"] """ return data_reader(test_file, w_dict) @@ -424,7 +425,7 @@ def start_train(train_reader, start_exe.run(fluid.default_startup_program()) exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name) - for pass_id in xrange(pass_num): + for pass_id in six.moves.xrange(pass_num): total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0 for data in train_reader(): cost_val, acc_val = exe.run(feed=feeder.feed(data), @@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab", """ w_dict = scdb_word_dict(vocab=vocab) test_files = [ "./thirdparty" + os.sep + f for f in test_list] - + train_reader = paddle.batch( scdb_train_data(train_dir, w_dict), batch_size = 256) diff --git a/fluid/text_classification/clouds/scdb_single_card.py b/fluid/text_classification/clouds/scdb_single_card.py index 9cc39269913ab97341e5389b31ad9a5da2e8af51..3690e92776636d8a7c8cef0d9cd4d72414b7a628 100644 --- a/fluid/text_classification/clouds/scdb_single_card.py +++ b/fluid/text_classification/clouds/scdb_single_card.py @@ -3,6 +3,7 @@ import contextlib import paddle import paddle.fluid as fluid import numpy as np +import six import sys import time import os @@ -46,8 +47,8 @@ def data2tensor(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq, "label": y_data} @@ -56,8 +57,8 @@ def data2pred(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq} @@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): Save dict into file """ with open(vocab, "w") as fout: - for k, v in word_dict.iteritems(): + for k, v in six.iteritems(word_dict): outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") fout.write(outstr) @@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", def scdb_test_data(test_file, w_dict): """ - test_set=["car", "lbs", "spot", "weibo", + test_set=["car", "lbs", "spot", "weibo", "baby", "toutiao", "3c", "movie", "haogan"] """ return data_reader(test_file, w_dict) @@ -422,7 +423,7 @@ def start_train(train_reader, feeder = fluid.DataFeeder(feed_list=[data, label], place=place) exe.run(fluid.default_startup_program()) - for pass_id in xrange(pass_num): + for pass_id in six.moves.xrange(pass_num): data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 for data in train_reader(): avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(), diff --git a/fluid/text_classification/train.py b/fluid/text_classification/train.py index 9078f4788319dbf76677c86eef53445fa1e85c1a..ecf39aa9a5d6a148e25e8606524225393d72806b 100644 --- a/fluid/text_classification/train.py +++ b/fluid/text_classification/train.py @@ -1,4 +1,5 @@ import os +import six import sys import time import unittest @@ -58,7 +59,7 @@ def train(train_reader, if "CE_MODE_X" in os.environ: fluid.default_startup_program().random_seed = 110 exe.run(fluid.default_startup_program()) - for pass_id in xrange(pass_num): + for pass_id in six.moves.xrange(pass_num): pass_start = time.time() data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 for data in train_reader(): diff --git a/fluid/text_classification/utils.py b/fluid/text_classification/utils.py index 3673946b6f39eade1811dfc4d81c99b0ef9400bb..dce4743d9219aa9ed5ca78b9f690eb1366d92304 100644 --- a/fluid/text_classification/utils.py +++ b/fluid/text_classification/utils.py @@ -43,8 +43,8 @@ def data2tensor(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq, "label": y_data}