From 06820fcbe5c4d11f2f47f9211bb64a8b39100a45 Mon Sep 17 00:00:00 2001 From: minqiyang Date: Mon, 13 Aug 2018 20:01:15 +0800 Subject: [PATCH] Port text_classification to Python3 --- .../clouds/scdb_parallel_executor.py | 17 +++++++++-------- .../clouds/scdb_single_card.py | 15 ++++++++------- fluid/text_classification/train.py | 3 ++- fluid/text_classification/utils.py | 4 ++-- 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/fluid/text_classification/clouds/scdb_parallel_executor.py b/fluid/text_classification/clouds/scdb_parallel_executor.py index 9d7722e9..cc5cd4ee 100644 --- a/fluid/text_classification/clouds/scdb_parallel_executor.py +++ b/fluid/text_classification/clouds/scdb_parallel_executor.py @@ -3,6 +3,7 @@ import contextlib import paddle import paddle.fluid as fluid import numpy as np +import six import sys import time import os @@ -46,8 +47,8 @@ def data2tensor(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq, "label": y_data} @@ -56,8 +57,8 @@ def data2pred(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq} @@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): Save dict into file """ with open(vocab, "w") as fout: - for k, v in word_dict.iteritems(): + for k, v in six.iteritems(word_dict): outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") fout.write(outstr) @@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", def scdb_test_data(test_file, w_dict): """ - test_set=["car", "lbs", "spot", "weibo", + test_set=["car", "lbs", "spot", "weibo", "baby", "toutiao", "3c", "movie", "haogan"] """ return data_reader(test_file, w_dict) @@ -424,7 +425,7 @@ def start_train(train_reader, start_exe.run(fluid.default_startup_program()) exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name) - for pass_id in xrange(pass_num): + for pass_id in six.moves.xrange(pass_num): total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0 for data in train_reader(): cost_val, acc_val = exe.run(feed=feeder.feed(data), @@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab", """ w_dict = scdb_word_dict(vocab=vocab) test_files = [ "./thirdparty" + os.sep + f for f in test_list] - + train_reader = paddle.batch( scdb_train_data(train_dir, w_dict), batch_size = 256) diff --git a/fluid/text_classification/clouds/scdb_single_card.py b/fluid/text_classification/clouds/scdb_single_card.py index 9cc39269..3690e927 100644 --- a/fluid/text_classification/clouds/scdb_single_card.py +++ b/fluid/text_classification/clouds/scdb_single_card.py @@ -3,6 +3,7 @@ import contextlib import paddle import paddle.fluid as fluid import numpy as np +import six import sys import time import os @@ -46,8 +47,8 @@ def data2tensor(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq, "label": y_data} @@ -56,8 +57,8 @@ def data2pred(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq} @@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): Save dict into file """ with open(vocab, "w") as fout: - for k, v in word_dict.iteritems(): + for k, v in six.iteritems(word_dict): outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") fout.write(outstr) @@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", def scdb_test_data(test_file, w_dict): """ - test_set=["car", "lbs", "spot", "weibo", + test_set=["car", "lbs", "spot", "weibo", "baby", "toutiao", "3c", "movie", "haogan"] """ return data_reader(test_file, w_dict) @@ -422,7 +423,7 @@ def start_train(train_reader, feeder = fluid.DataFeeder(feed_list=[data, label], place=place) exe.run(fluid.default_startup_program()) - for pass_id in xrange(pass_num): + for pass_id in six.moves.xrange(pass_num): data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 for data in train_reader(): avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(), diff --git a/fluid/text_classification/train.py b/fluid/text_classification/train.py index 9078f478..ecf39aa9 100644 --- a/fluid/text_classification/train.py +++ b/fluid/text_classification/train.py @@ -1,4 +1,5 @@ import os +import six import sys import time import unittest @@ -58,7 +59,7 @@ def train(train_reader, if "CE_MODE_X" in os.environ: fluid.default_startup_program().random_seed = 110 exe.run(fluid.default_startup_program()) - for pass_id in xrange(pass_num): + for pass_id in six.moves.xrange(pass_num): pass_start = time.time() data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 for data in train_reader(): diff --git a/fluid/text_classification/utils.py b/fluid/text_classification/utils.py index 3673946b..dce4743d 100644 --- a/fluid/text_classification/utils.py +++ b/fluid/text_classification/utils.py @@ -43,8 +43,8 @@ def data2tensor(data, place): """ data2tensor """ - input_seq = to_lodtensor(map(lambda x: x[0], data), place) - y_data = np.array(map(lambda x: x[1], data)).astype("int64") + input_seq = to_lodtensor([x[0] for x in data], place) + y_data = np.array([x[1] for x in data]).astype("int64") y_data = y_data.reshape([-1, 1]) return {"words": input_seq, "label": y_data} -- GitLab