提交 06820fcb 编写于 作者: M minqiyang

Port text_classification to Python3

上级 b1837db3
...@@ -3,6 +3,7 @@ import contextlib ...@@ -3,6 +3,7 @@ import contextlib
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import six
import sys import sys
import time import time
import os import os
...@@ -46,8 +47,8 @@ def data2tensor(data, place): ...@@ -46,8 +47,8 @@ def data2tensor(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data} return {"words": input_seq, "label": y_data}
...@@ -56,8 +57,8 @@ def data2pred(data, place): ...@@ -56,8 +57,8 @@ def data2pred(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq} return {"words": input_seq}
...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): ...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
Save dict into file Save dict into file
""" """
with open(vocab, "w") as fout: with open(vocab, "w") as fout:
for k, v in word_dict.iteritems(): for k, v in six.iteritems(word_dict):
outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
fout.write(outstr) fout.write(outstr)
...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", ...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
def scdb_test_data(test_file, w_dict): def scdb_test_data(test_file, w_dict):
""" """
test_set=["car", "lbs", "spot", "weibo", test_set=["car", "lbs", "spot", "weibo",
"baby", "toutiao", "3c", "movie", "haogan"] "baby", "toutiao", "3c", "movie", "haogan"]
""" """
return data_reader(test_file, w_dict) return data_reader(test_file, w_dict)
...@@ -424,7 +425,7 @@ def start_train(train_reader, ...@@ -424,7 +425,7 @@ def start_train(train_reader,
start_exe.run(fluid.default_startup_program()) start_exe.run(fluid.default_startup_program())
exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name) exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name)
for pass_id in xrange(pass_num): for pass_id in six.moves.xrange(pass_num):
total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0 total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0
for data in train_reader(): for data in train_reader():
cost_val, acc_val = exe.run(feed=feeder.feed(data), cost_val, acc_val = exe.run(feed=feeder.feed(data),
...@@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab", ...@@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab",
""" """
w_dict = scdb_word_dict(vocab=vocab) w_dict = scdb_word_dict(vocab=vocab)
test_files = [ "./thirdparty" + os.sep + f for f in test_list] test_files = [ "./thirdparty" + os.sep + f for f in test_list]
train_reader = paddle.batch( train_reader = paddle.batch(
scdb_train_data(train_dir, w_dict), scdb_train_data(train_dir, w_dict),
batch_size = 256) batch_size = 256)
......
...@@ -3,6 +3,7 @@ import contextlib ...@@ -3,6 +3,7 @@ import contextlib
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import six
import sys import sys
import time import time
import os import os
...@@ -46,8 +47,8 @@ def data2tensor(data, place): ...@@ -46,8 +47,8 @@ def data2tensor(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data} return {"words": input_seq, "label": y_data}
...@@ -56,8 +57,8 @@ def data2pred(data, place): ...@@ -56,8 +57,8 @@ def data2pred(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq} return {"words": input_seq}
...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): ...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
Save dict into file Save dict into file
""" """
with open(vocab, "w") as fout: with open(vocab, "w") as fout:
for k, v in word_dict.iteritems(): for k, v in six.iteritems(word_dict):
outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
fout.write(outstr) fout.write(outstr)
...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", ...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
def scdb_test_data(test_file, w_dict): def scdb_test_data(test_file, w_dict):
""" """
test_set=["car", "lbs", "spot", "weibo", test_set=["car", "lbs", "spot", "weibo",
"baby", "toutiao", "3c", "movie", "haogan"] "baby", "toutiao", "3c", "movie", "haogan"]
""" """
return data_reader(test_file, w_dict) return data_reader(test_file, w_dict)
...@@ -422,7 +423,7 @@ def start_train(train_reader, ...@@ -422,7 +423,7 @@ def start_train(train_reader,
feeder = fluid.DataFeeder(feed_list=[data, label], place=place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
for pass_id in xrange(pass_num): for pass_id in six.moves.xrange(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader(): for data in train_reader():
avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(), avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),
......
import os import os
import six
import sys import sys
import time import time
import unittest import unittest
...@@ -58,7 +59,7 @@ def train(train_reader, ...@@ -58,7 +59,7 @@ def train(train_reader,
if "CE_MODE_X" in os.environ: if "CE_MODE_X" in os.environ:
fluid.default_startup_program().random_seed = 110 fluid.default_startup_program().random_seed = 110
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
for pass_id in xrange(pass_num): for pass_id in six.moves.xrange(pass_num):
pass_start = time.time() pass_start = time.time()
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader(): for data in train_reader():
......
...@@ -43,8 +43,8 @@ def data2tensor(data, place): ...@@ -43,8 +43,8 @@ def data2tensor(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data} return {"words": input_seq, "label": y_data}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册