未验证 提交 995627c5 编写于 作者: Q Qiyang Min 提交者: GitHub

Merge pull request #1142 from velconia/port_py3_text_classification

Port text_classification model and object_detection to Python3
......@@ -22,6 +22,7 @@ import xml.etree.ElementTree
import os
import time
import copy
import six
class Settings(object):
......@@ -151,7 +152,7 @@ def preprocess(img, bbox_labels, mode, settings):
mirror = int(random.uniform(0, 2))
if mirror == 1:
img = img[:, ::-1, :]
for i in xrange(len(sampled_labels)):
for i in six.moves.xrange(len(sampled_labels)):
tmp = sampled_labels[i][1]
sampled_labels[i][1] = 1 - sampled_labels[i][3]
sampled_labels[i][3] = 1 - tmp
......
......@@ -87,16 +87,16 @@ def train(args,
if 'coco' in data_args.dataset:
# learning rate decay in 12, 19 pass, respectively
if '2014' in train_file_list:
epocs = 82783 / batch_size
epocs = 82783 // batch_size
boundaries = [epocs * 12, epocs * 19]
elif '2017' in train_file_list:
epocs = 118287 / batch_size
epocs = 118287 // batch_size
boundaries = [epocs * 12, epocs * 19]
values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25
]
elif 'pascalvoc' in data_args.dataset:
epocs = 19200 / batch_size
epocs = 19200 // batch_size
boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100]
values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25,
......@@ -139,7 +139,7 @@ def train(args,
model_path = os.path.join(model_save_dir, postfix)
if os.path.isdir(model_path):
shutil.rmtree(model_path)
print 'save models to %s' % (model_path)
print('save models to %s' % (model_path))
fluid.io.save_persistables(exe, model_path)
best_map = 0.
......@@ -193,15 +193,15 @@ def train(args,
total_time += end_time - start_time
train_avg_loss = np.mean(every_pass_loss)
if devices_num == 1:
print ("kpis train_cost %s" % train_avg_loss)
print ("kpis test_acc %s" % mean_map)
print ("kpis train_speed %s" % (total_time / epoch_idx))
print("kpis train_cost %s" % train_avg_loss)
print("kpis test_acc %s" % mean_map)
print("kpis train_speed %s" % (total_time / epoch_idx))
else:
print ("kpis train_cost_card%s %s" %
print("kpis train_cost_card%s %s" %
(devices_num, train_avg_loss))
print ("kpis test_acc_card%s %s" %
print("kpis test_acc_card%s %s" %
(devices_num, mean_map))
print ("kpis train_speed_card%s %f" %
print("kpis train_speed_card%s %f" %
(devices_num, total_time / epoch_idx))
......
......@@ -16,8 +16,10 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import distutils.util
import numpy as np
import six
from paddle.fluid import core
......@@ -37,7 +39,7 @@ def print_arguments(args):
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()):
for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
......
......@@ -3,6 +3,7 @@ import contextlib
import paddle
import paddle.fluid as fluid
import numpy as np
import six
import sys
import time
import os
......@@ -46,8 +47,8 @@ def data2tensor(data, place):
"""
data2tensor
"""
input_seq = to_lodtensor(map(lambda x: x[0], data), place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data}
......@@ -56,8 +57,8 @@ def data2pred(data, place):
"""
data2tensor
"""
input_seq = to_lodtensor(map(lambda x: x[0], data), place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq}
......@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
Save dict into file
"""
with open(vocab, "w") as fout:
for k, v in word_dict.iteritems():
for k, v in six.iteritems(word_dict):
outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
fout.write(outstr)
......@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
def scdb_test_data(test_file, w_dict):
"""
test_set=["car", "lbs", "spot", "weibo",
test_set=["car", "lbs", "spot", "weibo",
"baby", "toutiao", "3c", "movie", "haogan"]
"""
return data_reader(test_file, w_dict)
......@@ -424,7 +425,7 @@ def start_train(train_reader,
start_exe.run(fluid.default_startup_program())
exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name)
for pass_id in xrange(pass_num):
for pass_id in six.moves.xrange(pass_num):
total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0
for data in train_reader():
cost_val, acc_val = exe.run(feed=feeder.feed(data),
......@@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab",
"""
w_dict = scdb_word_dict(vocab=vocab)
test_files = [ "./thirdparty" + os.sep + f for f in test_list]
train_reader = paddle.batch(
scdb_train_data(train_dir, w_dict),
batch_size = 256)
......
......@@ -3,6 +3,7 @@ import contextlib
import paddle
import paddle.fluid as fluid
import numpy as np
import six
import sys
import time
import os
......@@ -46,8 +47,8 @@ def data2tensor(data, place):
"""
data2tensor
"""
input_seq = to_lodtensor(map(lambda x: x[0], data), place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data}
......@@ -56,8 +57,8 @@ def data2pred(data, place):
"""
data2tensor
"""
input_seq = to_lodtensor(map(lambda x: x[0], data), place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq}
......@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
Save dict into file
"""
with open(vocab, "w") as fout:
for k, v in word_dict.iteritems():
for k, v in six.iteritems(word_dict):
outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
fout.write(outstr)
......@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
def scdb_test_data(test_file, w_dict):
"""
test_set=["car", "lbs", "spot", "weibo",
test_set=["car", "lbs", "spot", "weibo",
"baby", "toutiao", "3c", "movie", "haogan"]
"""
return data_reader(test_file, w_dict)
......@@ -422,7 +423,7 @@ def start_train(train_reader,
feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
exe.run(fluid.default_startup_program())
for pass_id in xrange(pass_num):
for pass_id in six.moves.xrange(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader():
avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),
......
import os
import six
import sys
import time
import unittest
......@@ -58,7 +59,7 @@ def train(train_reader,
if "CE_MODE_X" in os.environ:
fluid.default_startup_program().random_seed = 110
exe.run(fluid.default_startup_program())
for pass_id in xrange(pass_num):
for pass_id in six.moves.xrange(pass_num):
pass_start = time.time()
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader():
......
......@@ -43,8 +43,8 @@ def data2tensor(data, place):
"""
data2tensor
"""
input_seq = to_lodtensor(map(lambda x: x[0], data), place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64")
input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册