未验证 提交 995627c5 编写于 作者: Q Qiyang Min 提交者: GitHub

Merge pull request #1142 from velconia/port_py3_text_classification

Port text_classification model and object_detection to Python3
...@@ -22,6 +22,7 @@ import xml.etree.ElementTree ...@@ -22,6 +22,7 @@ import xml.etree.ElementTree
import os import os
import time import time
import copy import copy
import six
class Settings(object): class Settings(object):
...@@ -151,7 +152,7 @@ def preprocess(img, bbox_labels, mode, settings): ...@@ -151,7 +152,7 @@ def preprocess(img, bbox_labels, mode, settings):
mirror = int(random.uniform(0, 2)) mirror = int(random.uniform(0, 2))
if mirror == 1: if mirror == 1:
img = img[:, ::-1, :] img = img[:, ::-1, :]
for i in xrange(len(sampled_labels)): for i in six.moves.xrange(len(sampled_labels)):
tmp = sampled_labels[i][1] tmp = sampled_labels[i][1]
sampled_labels[i][1] = 1 - sampled_labels[i][3] sampled_labels[i][1] = 1 - sampled_labels[i][3]
sampled_labels[i][3] = 1 - tmp sampled_labels[i][3] = 1 - tmp
......
...@@ -87,16 +87,16 @@ def train(args, ...@@ -87,16 +87,16 @@ def train(args,
if 'coco' in data_args.dataset: if 'coco' in data_args.dataset:
# learning rate decay in 12, 19 pass, respectively # learning rate decay in 12, 19 pass, respectively
if '2014' in train_file_list: if '2014' in train_file_list:
epocs = 82783 / batch_size epocs = 82783 // batch_size
boundaries = [epocs * 12, epocs * 19] boundaries = [epocs * 12, epocs * 19]
elif '2017' in train_file_list: elif '2017' in train_file_list:
epocs = 118287 / batch_size epocs = 118287 // batch_size
boundaries = [epocs * 12, epocs * 19] boundaries = [epocs * 12, epocs * 19]
values = [ values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25 learning_rate, learning_rate * 0.5, learning_rate * 0.25
] ]
elif 'pascalvoc' in data_args.dataset: elif 'pascalvoc' in data_args.dataset:
epocs = 19200 / batch_size epocs = 19200 // batch_size
boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100] boundaries = [epocs * 40, epocs * 60, epocs * 80, epocs * 100]
values = [ values = [
learning_rate, learning_rate * 0.5, learning_rate * 0.25, learning_rate, learning_rate * 0.5, learning_rate * 0.25,
...@@ -139,7 +139,7 @@ def train(args, ...@@ -139,7 +139,7 @@ def train(args,
model_path = os.path.join(model_save_dir, postfix) model_path = os.path.join(model_save_dir, postfix)
if os.path.isdir(model_path): if os.path.isdir(model_path):
shutil.rmtree(model_path) shutil.rmtree(model_path)
print 'save models to %s' % (model_path) print('save models to %s' % (model_path))
fluid.io.save_persistables(exe, model_path) fluid.io.save_persistables(exe, model_path)
best_map = 0. best_map = 0.
...@@ -193,15 +193,15 @@ def train(args, ...@@ -193,15 +193,15 @@ def train(args,
total_time += end_time - start_time total_time += end_time - start_time
train_avg_loss = np.mean(every_pass_loss) train_avg_loss = np.mean(every_pass_loss)
if devices_num == 1: if devices_num == 1:
print ("kpis train_cost %s" % train_avg_loss) print("kpis train_cost %s" % train_avg_loss)
print ("kpis test_acc %s" % mean_map) print("kpis test_acc %s" % mean_map)
print ("kpis train_speed %s" % (total_time / epoch_idx)) print("kpis train_speed %s" % (total_time / epoch_idx))
else: else:
print ("kpis train_cost_card%s %s" % print("kpis train_cost_card%s %s" %
(devices_num, train_avg_loss)) (devices_num, train_avg_loss))
print ("kpis test_acc_card%s %s" % print("kpis test_acc_card%s %s" %
(devices_num, mean_map)) (devices_num, mean_map))
print ("kpis train_speed_card%s %f" % print("kpis train_speed_card%s %f" %
(devices_num, total_time / epoch_idx)) (devices_num, total_time / epoch_idx))
......
...@@ -16,8 +16,10 @@ ...@@ -16,8 +16,10 @@
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import distutils.util import distutils.util
import numpy as np import numpy as np
import six
from paddle.fluid import core from paddle.fluid import core
...@@ -37,7 +39,7 @@ def print_arguments(args): ...@@ -37,7 +39,7 @@ def print_arguments(args):
:type args: argparse.Namespace :type args: argparse.Namespace
""" """
print("----------- Configuration Arguments -----------") print("----------- Configuration Arguments -----------")
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value)) print("%s: %s" % (arg, value))
print("------------------------------------------------") print("------------------------------------------------")
......
...@@ -3,6 +3,7 @@ import contextlib ...@@ -3,6 +3,7 @@ import contextlib
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import six
import sys import sys
import time import time
import os import os
...@@ -46,8 +47,8 @@ def data2tensor(data, place): ...@@ -46,8 +47,8 @@ def data2tensor(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data} return {"words": input_seq, "label": y_data}
...@@ -56,8 +57,8 @@ def data2pred(data, place): ...@@ -56,8 +57,8 @@ def data2pred(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq} return {"words": input_seq}
...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): ...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
Save dict into file Save dict into file
""" """
with open(vocab, "w") as fout: with open(vocab, "w") as fout:
for k, v in word_dict.iteritems(): for k, v in six.iteritems(word_dict):
outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
fout.write(outstr) fout.write(outstr)
...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", ...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
def scdb_test_data(test_file, w_dict): def scdb_test_data(test_file, w_dict):
""" """
test_set=["car", "lbs", "spot", "weibo", test_set=["car", "lbs", "spot", "weibo",
"baby", "toutiao", "3c", "movie", "haogan"] "baby", "toutiao", "3c", "movie", "haogan"]
""" """
return data_reader(test_file, w_dict) return data_reader(test_file, w_dict)
...@@ -424,7 +425,7 @@ def start_train(train_reader, ...@@ -424,7 +425,7 @@ def start_train(train_reader,
start_exe.run(fluid.default_startup_program()) start_exe.run(fluid.default_startup_program())
exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name) exe = fluid.ParallelExecutor(use_cuda, loss_name=cost.name)
for pass_id in xrange(pass_num): for pass_id in six.moves.xrange(pass_num):
total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0 total_acc, total_cost, total_count, avg_cost, avg_acc = 0.0, 0.0, 0.0, 0.0, 0.0
for data in train_reader(): for data in train_reader():
cost_val, acc_val = exe.run(feed=feeder.feed(data), cost_val, acc_val = exe.run(feed=feeder.feed(data),
...@@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab", ...@@ -452,7 +453,7 @@ def train_net(vocab="./thirdparty/train.vocab",
""" """
w_dict = scdb_word_dict(vocab=vocab) w_dict = scdb_word_dict(vocab=vocab)
test_files = [ "./thirdparty" + os.sep + f for f in test_list] test_files = [ "./thirdparty" + os.sep + f for f in test_list]
train_reader = paddle.batch( train_reader = paddle.batch(
scdb_train_data(train_dir, w_dict), scdb_train_data(train_dir, w_dict),
batch_size = 256) batch_size = 256)
......
...@@ -3,6 +3,7 @@ import contextlib ...@@ -3,6 +3,7 @@ import contextlib
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import numpy as np import numpy as np
import six
import sys import sys
import time import time
import os import os
...@@ -46,8 +47,8 @@ def data2tensor(data, place): ...@@ -46,8 +47,8 @@ def data2tensor(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data} return {"words": input_seq, "label": y_data}
...@@ -56,8 +57,8 @@ def data2pred(data, place): ...@@ -56,8 +57,8 @@ def data2pred(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq} return {"words": input_seq}
...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab): ...@@ -79,7 +80,7 @@ def save_dict(word_dict, vocab):
Save dict into file Save dict into file
""" """
with open(vocab, "w") as fout: with open(vocab, "w") as fout:
for k, v in word_dict.iteritems(): for k, v in six.iteritems(word_dict):
outstr = ("%s\t%s\n" % (k, v)).encode("gb18030") outstr = ("%s\t%s\n" % (k, v)).encode("gb18030")
fout.write(outstr) fout.write(outstr)
...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg", ...@@ -163,7 +164,7 @@ def scdb_train_data(train_dir="scdb_data/train_set/corpus.train.seg",
def scdb_test_data(test_file, w_dict): def scdb_test_data(test_file, w_dict):
""" """
test_set=["car", "lbs", "spot", "weibo", test_set=["car", "lbs", "spot", "weibo",
"baby", "toutiao", "3c", "movie", "haogan"] "baby", "toutiao", "3c", "movie", "haogan"]
""" """
return data_reader(test_file, w_dict) return data_reader(test_file, w_dict)
...@@ -422,7 +423,7 @@ def start_train(train_reader, ...@@ -422,7 +423,7 @@ def start_train(train_reader,
feeder = fluid.DataFeeder(feed_list=[data, label], place=place) feeder = fluid.DataFeeder(feed_list=[data, label], place=place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
for pass_id in xrange(pass_num): for pass_id in six.moves.xrange(pass_num):
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader(): for data in train_reader():
avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(), avg_cost_np, avg_acc_np = exe.run(fluid.default_main_program(),
......
import os import os
import six
import sys import sys
import time import time
import unittest import unittest
...@@ -58,7 +59,7 @@ def train(train_reader, ...@@ -58,7 +59,7 @@ def train(train_reader,
if "CE_MODE_X" in os.environ: if "CE_MODE_X" in os.environ:
fluid.default_startup_program().random_seed = 110 fluid.default_startup_program().random_seed = 110
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
for pass_id in xrange(pass_num): for pass_id in six.moves.xrange(pass_num):
pass_start = time.time() pass_start = time.time()
data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0 data_size, data_count, total_acc, total_cost = 0, 0, 0.0, 0.0
for data in train_reader(): for data in train_reader():
......
...@@ -43,8 +43,8 @@ def data2tensor(data, place): ...@@ -43,8 +43,8 @@ def data2tensor(data, place):
""" """
data2tensor data2tensor
""" """
input_seq = to_lodtensor(map(lambda x: x[0], data), place) input_seq = to_lodtensor([x[0] for x in data], place)
y_data = np.array(map(lambda x: x[1], data)).astype("int64") y_data = np.array([x[1] for x in data]).astype("int64")
y_data = y_data.reshape([-1, 1]) y_data = y_data.reshape([-1, 1])
return {"words": input_seq, "label": y_data} return {"words": input_seq, "label": y_data}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册