未验证 提交 20abbf31 编写于 作者: Z zhengya01 提交者: GitHub

Merge pull request #3 from PaddlePaddle/develop

update
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
DATASET_PATH=${HOME}/.cache/paddle/dataset/cityscape/
cudaid=${deeplabv3plus:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py \
--batch_size=2 \
--train_crop_size=769 \
--total_step=50 \
--save_weights_path=output1 \
--dataset_path=$DATASET_PATH \
--enable_ce | python _ce.py
cudaid=${deeplabv3plus_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py \
--batch_size=2 \
--train_crop_size=769 \
--total_step=50 \
--save_weights_path=output4 \
--dataset_path=$DATASET_PATH \
--enable_ce | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.1, 0, actived=True)
train_loss_card1_kpi = CostKpi('train_loss_card1', 0.05, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.1, 0, actived=True)
train_loss_card4_kpi = CostKpi('train_loss_card4', 0.05, 0)
tracking_kpis = [
each_pass_duration_card1_kpi,
train_loss_card1_kpi,
each_pass_duration_card4_kpi,
train_loss_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
......@@ -34,6 +34,10 @@ def add_arguments():
add_argument('parallel', bool, False, "using ParallelExecutor.")
add_argument('use_gpu', bool, True, "Whether use GPU or CPU.")
add_argument('num_classes', int, 19, "Number of classes.")
parser.add_argument(
'--enable_ce',
action='store_true',
help='If set, run the task with continuous evaluation logs.')
def load_model():
......@@ -51,7 +55,10 @@ def load_model():
else:
if args.num_classes == 19:
fluid.io.load_params(
exe, dirname=args.init_weights_path, main_program=tp)
exe,
dirname="",
filename=args.init_weights_path,
main_program=tp)
else:
fluid.io.load_vars(
exe, dirname="", filename=args.init_weights_path, vars=myvars)
......@@ -84,6 +91,15 @@ def loss(logit, label):
return loss, label_nignore
def get_cards(args):
if args.enable_ce:
cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(","))
return num
else:
return args.num_devices
CityscapeDataset = reader.CityscapeDataset
parser = argparse.ArgumentParser()
......@@ -99,6 +115,13 @@ deeplabv3p = models.deeplabv3p
sp = fluid.Program()
tp = fluid.Program()
# only for ce
if args.enable_ce:
SEED = 102
sp.random_seed = SEED
tp.random_seed = SEED
crop_size = args.train_crop_size
batch_size = args.batch_size
image_shape = [crop_size, crop_size]
......@@ -155,7 +178,13 @@ if args.parallel:
batches = dataset.get_batch_generator(batch_size, total_step)
total_time = 0.0
epoch_idx = 0
train_loss = 0
for i, imgs, labels, names in batches:
epoch_idx += 1
begin_time = time.time()
prev_start_time = time.time()
if args.parallel:
retv = exe_p.run(fetch_list=[pred.name, loss_mean.name],
......@@ -167,11 +196,21 @@ for i, imgs, labels, names in batches:
'label': labels},
fetch_list=[pred, loss_mean])
end_time = time.time()
total_time += end_time - begin_time
if i % 100 == 0:
print("Model is saved to", args.save_weights_path)
save_model()
print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}".format(
i, np.mean(retv[1]), end_time - prev_start_time))
# only for ce
train_loss = np.mean(retv[1])
if args.enable_ce:
gpu_num = get_cards(args)
print("kpis\teach_pass_duration_card%s\t%s" %
(gpu_num, total_time / epoch_idx))
print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, train_loss))
print("Training done. Model is saved to", args.save_weights_path)
save_model()
......@@ -7,6 +7,7 @@ cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid
python train.py --batch_size=${BATCH_SIZE} --num_epochs=5 --enable_ce=True --lr_strategy=cosine_decay | python _ce.py
BATCH_SIZE=224
cudaid=${object_detection_cudaid_m:=0, 1, 2, 3}
export CUDA_VISIBLE_DEVICES=$cudaid
python train.py --batch_size=${BATCH_SIZE} --num_epochs=5 --enable_ce=True --lr_strategy=cosine_decay | python _ce.py
......@@ -242,7 +242,7 @@ def train(args):
device_num = subprocess.check_output(['nvidia-smi', '-L']).decode().count('\n')
train_batch_size = args.batch_size / device_num
test_batch_size = 8
test_batch_size = 16
if not args.enable_ce:
train_reader = paddle.batch(
reader.train(), batch_size=train_batch_size, drop_last=True)
......
......@@ -68,6 +68,7 @@ class GeneratorEnqueuer(object):
try:
task()
except Exception:
traceback.print_exc()
self._stop_event.set()
break
else:
......@@ -75,6 +76,7 @@ class GeneratorEnqueuer(object):
try:
task()
except Exception:
traceback.print_exc()
self._stop_event.set()
break
......
......@@ -176,10 +176,17 @@ def coco(settings, file_list, mode, batch_size, shuffle):
if mode == 'train' and shuffle:
np.random.shuffle(images)
batch_out = []
if '2014' in file_list:
sub_dir = "train2014" if model == "train" else "val2014"
elif '2017' in file_list:
sub_dir = "train2017" if mode == "train" else "val2017"
data_dir = os.path.join(settings.data_dir, sub_dir)
for image in images:
image_name = image['file_name']
image_path = os.path.join(settings.data_dir, image_name)
image_path = os.path.join(data_dir, image_name)
if not os.path.exists(image_path):
raise ValueError("%s is not exist, you should specify "
"data path correctly." % image_path)
im = Image.open(image_path)
if im.mode == 'L':
im = im.convert('RGB')
......@@ -242,7 +249,9 @@ def pascalvoc(settings, file_list, mode, batch_size, shuffle):
image_path, label_path = image.split()
image_path = os.path.join(settings.data_dir, image_path)
label_path = os.path.join(settings.data_dir, label_path)
if not os.path.exists(image_path):
raise ValueError("%s is not exist, you should specify "
"data path correctly." % image_path)
im = Image.open(image_path)
if im.mode == 'L':
im = im.convert('RGB')
......@@ -295,7 +304,6 @@ def train(settings,
max_queue=24,
enable_ce=False):
file_list = os.path.join(settings.data_dir, file_list)
if 'coco' in settings.dataset:
generator = coco(settings, file_list, "train", batch_size, shuffle)
else:
......@@ -341,6 +349,9 @@ def test(settings, file_list, batch_size):
def infer(settings, image_path):
def reader():
if not os.path.exists(image_path):
raise ValueError("%s is not exist, you should specify "
"data path correctly." % image_path)
img = Image.open(image_path)
if img.mode == 'L':
img = im.convert('RGB')
......
......@@ -52,7 +52,7 @@ def parse_args():
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
......@@ -61,6 +61,7 @@ def load_reverse_dict(dict_path):
return dict((idx, line.strip().split("\t")[0])
for idx, line in enumerate(open(dict_path, "r").readlines()))
def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data]
cur_len = 0
......@@ -76,7 +77,6 @@ def to_lodtensor(data, place):
return res
def infer(args):
word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
mention = fluid.layers.data(
......@@ -108,8 +108,8 @@ def infer(args):
profiler.reset_profiler()
iters = 0
for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place)
mention = to_lodtensor(map(lambda x: x[1], data), place)
word = to_lodtensor(list(map(lambda x: x[0], data)), place)
mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
start = time.time()
crf_decode = exe.run(inference_program,
......@@ -122,12 +122,12 @@ def infer(args):
np_data = np.array(crf_decode[0])
word_count = 0
assert len(data) == len(lod_info) - 1
for sen_index in xrange(len(data)):
for sen_index in range(len(data)):
assert len(data[sen_index][0]) == lod_info[
sen_index + 1] - lod_info[sen_index]
word_index = 0
for tag_index in xrange(lod_info[sen_index],
lod_info[sen_index + 1]):
for tag_index in range(lod_info[sen_index],
lod_info[sen_index + 1]):
word = str(data[sen_index][0][word_index])
gold_tag = label_reverse_dict[data[sen_index][2][
word_index]]
......
......@@ -65,7 +65,7 @@ def parse_args():
def print_arguments(args):
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()):
for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
......@@ -220,9 +220,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place,
cur_fetch_list):
chunk_evaluator.reset()
for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place)
mention = to_lodtensor(map(lambda x: x[1], data), place)
target = to_lodtensor(map(lambda x: x[2], data), place)
word = to_lodtensor(list(map(lambda x: x[0], data)), place)
mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
target = to_lodtensor(list(map(lambda x: x[2], data)), place)
result_list = exe.run(
inference_program,
feed={"word": word,
......@@ -232,8 +232,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place,
number_infer = np.array(result_list[0])
number_label = np.array(result_list[1])
number_correct = np.array(result_list[2])
chunk_evaluator.update(number_infer[0], number_label[0],
number_correct[0])
chunk_evaluator.update(number_infer[0].astype('int64'),
number_label[0].astype('int64'),
number_correct[0].astype('int64'))
return chunk_evaluator.eval()
......@@ -241,9 +242,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place,
cur_fetch_list):
chunk_evaluator.reset()
for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place)
mention = to_lodtensor(map(lambda x: x[1], data), place)
target = to_lodtensor(map(lambda x: x[2], data), place)
word = to_lodtensor(list(map(lambda x: x[0], data)), place)
mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
target = to_lodtensor(list(map(lambda x: x[2], data)), place)
result_list = test_exe.run(
fetch_list=cur_fetch_list,
feed={"word": word,
......@@ -252,8 +253,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place,
number_infer = np.array(result_list[0])
number_label = np.array(result_list[1])
number_correct = np.array(result_list[2])
chunk_evaluator.update(number_infer.sum(),
number_label.sum(), number_correct.sum())
chunk_evaluator.update(number_infer.sum().astype('int64'),
number_label.sum().astype('int64'),
number_correct.sum().astype('int64'))
return chunk_evaluator.eval()
......@@ -270,11 +272,6 @@ def main(args):
crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
inference_program = fluid.default_main_program().clone(for_test=True)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
(precision, recall, f1_score, num_infer_chunks, num_label_chunks,
num_correct_chunks) = fluid.layers.chunk_eval(
input=crf_decode,
......@@ -282,6 +279,11 @@ def main(args):
chunk_scheme="IOB",
num_chunk_types=int(math.ceil((args.label_dict_len - 1) / 2.0)))
inference_program = fluid.default_main_program().clone(for_test=True)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
chunk_evaluator = fluid.metrics.ChunkEvaluator()
train_reader = paddle.batch(
......@@ -312,7 +314,7 @@ def main(args):
test_exe = exe
batch_id = 0
for pass_id in xrange(args.num_passes):
for pass_id in range(args.num_passes):
chunk_evaluator.reset()
train_reader_iter = train_reader()
start_time = time.time()
......@@ -326,9 +328,9 @@ def main(args):
],
feed=feeder.feed(cur_batch))
chunk_evaluator.update(
np.array(nums_infer).sum(),
np.array(nums_label).sum(),
np.array(nums_correct).sum())
np.array(nums_infer).sum().astype("int64"),
np.array(nums_label).sum().astype("int64"),
np.array(nums_correct).sum().astype("int64"))
cost_list = np.array(cost)
batch_id += 1
except StopIteration:
......
......@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True)
train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.05, 0, actived=True)
tracking_kpis = [
train_cost_kpi,
......
......@@ -3,6 +3,7 @@
import os
import sys
#sys.path.insert(0, os.environ['ceroot'])
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!!
......
......@@ -23,6 +23,7 @@ import json
import logging
import numpy as np
from collections import Counter
import io
class BRCDataset(object):
......@@ -67,7 +68,7 @@ class BRCDataset(object):
Args:
data_path: the data file to load
"""
with open(data_path) as fin:
with io.open(data_path, 'r', encoding='utf-8') as fin:
data_set = []
for lidx, line in enumerate(fin):
sample = json.loads(line.strip())
......
......@@ -22,6 +22,7 @@ import os
import random
import json
import six
import multiprocessing
import paddle
import paddle.fluid as fluid
......
......@@ -469,7 +469,7 @@ def train_loop(exe,
# For faster executor
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.use_experimental_executor = True
# exec_strategy.num_iteration_per_drop_scope = 5
exec_strategy.num_iteration_per_drop_scope = int(args.fetch_steps)
build_strategy = fluid.BuildStrategy()
# Since the token number differs among devices, customize gradient scale to
# use token average cost among multi-devices. and the gradient scale is
......
......@@ -89,7 +89,7 @@ def train(train_reader,
def train_net():
word_dict, train_reader, test_reader = utils.prepare_data(
"imdb", self_dict=False, batch_size=4, buf_size=50000)
"imdb", self_dict=False, batch_size=128, buf_size=50000)
if sys.argv[1] == "bow":
train(
......
......@@ -6,9 +6,9 @@ export OMP_NUM_THREADS=1
cudaid=${text_matching_on_quora:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py
FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce --epoch_num=5 | python _ce.py
cudaid=${text_matching_on_quora_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py
FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce --epoch_num=5 | python _ce.py
......@@ -7,11 +7,11 @@ from kpi import CostKpi
from kpi import DurationKpi
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.05, 0, actived=True)
train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.2, 0)
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.08, 0, actived=True)
train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.08, 0)
train_avg_acc_card1_kpi = CostKpi('train_avg_acc_card1', 0.02, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.05, 0, actived=True)
train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.2, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.08, 0, actived=True)
train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.08, 0)
train_avg_acc_card4_kpi = CostKpi('train_avg_acc_card4', 0.02, 0)
tracking_kpis = [
......
......@@ -34,6 +34,7 @@ parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--model_name', type=str, default='cdssmNet', help="Which model to train")
parser.add_argument('--config', type=str, default='cdssm_base', help="The global config setting")
parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.')
parser.add_argument('--epoch_num', type=int, help='Number of epoch')
DATA_DIR = os.path.join(os.path.expanduser('~'), '.cache/paddle/dataset')
......@@ -241,6 +242,9 @@ def main():
args = parser.parse_args()
global_config = configs.__dict__[args.config]()
if args.epoch_num != None:
global_config.epoch_num = args.epoch_num
print("net_name: ", args.model_name)
net = models.__dict__[args.model_name](global_config)
......
......@@ -33,7 +33,7 @@ class CNNEncoder(object):
""" cnn-encoder"""
def __init__(self,
param_name="cnn.w",
param_name="cnn",
win_size=3,
ksize=128,
act='tanh',
......@@ -51,13 +51,15 @@ class CNNEncoder(object):
filter_size=self.win_size,
act=self.act,
pool_type=self.pool_type,
param_attr=str(self.param_name))
param_attr=self.param_name + ".param",
bias_attr=self.param_name + ".bias")
class GrnnEncoder(object):
""" grnn-encoder """
def __init__(self, param_name="grnn.w", hidden_size=128):
def __init__(self, param_name="grnn", hidden_size=128):
self.param_name = param_name
self.hidden_size = hidden_size
......@@ -65,13 +67,15 @@ class GrnnEncoder(object):
fc0 = nn.fc(
input=emb,
size=self.hidden_size * 3,
param_attr=str(str(self.param_name) + "_fc")
)
param_attr=self.param_name + "_fc.w",
bias_attr=False)
gru_h = nn.dynamic_gru(
input=fc0,
size=self.hidden_size,
is_reverse=False,
param_attr=str(self.param_name))
param_attr=self.param_name + ".param",
bias_attr=self.param_name + ".bias")
return nn.sequence_pool(input=gru_h, pool_type='max')
......@@ -139,17 +143,17 @@ class MultiviewSimnet(object):
# lookup embedding for each slot
q_embs = [
nn.embedding(
input=query, size=self.emb_shape, param_attr="emb.w")
input=query, size=self.emb_shape, param_attr="emb")
for query in q_slots
]
pt_embs = [
nn.embedding(
input=title, size=self.emb_shape, param_attr="emb.w")
input=title, size=self.emb_shape, param_attr="emb")
for title in pt_slots
]
nt_embs = [
nn.embedding(
input=title, size=self.emb_shape, param_attr="emb.w")
input=title, size=self.emb_shape, param_attr="emb")
for title in nt_slots
]
......@@ -170,9 +174,9 @@ class MultiviewSimnet(object):
nt_concat = nn.concat(nt_encodes)
# projection of hidden layer
q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w')
pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w')
nt_hid = nn.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w')
q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b')
pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b')
nt_hid = nn.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b')
# cosine of hidden layers
cos_pos = nn.cos_sim(q_hid, pt_hid)
......@@ -213,12 +217,12 @@ class MultiviewSimnet(object):
# lookup embedding for each slot
q_embs = [
nn.embedding(
input=query, size=self.emb_shape, param_attr="emb.w")
input=query, size=self.emb_shape, param_attr="emb")
for query in q_slots
]
pt_embs = [
nn.embedding(
input=title, size=self.emb_shape, param_attr="emb.w")
input=title, size=self.emb_shape, param_attr="emb")
for title in pt_slots
]
# encode each embedding field with encoder
......@@ -232,8 +236,8 @@ class MultiviewSimnet(object):
q_concat = nn.concat(q_encodes)
pt_concat = nn.concat(pt_encodes)
# projection of hidden layer
q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w')
pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w')
q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b')
pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b')
# cosine of hidden layers
cos = nn.cos_sim(q_hid, pt_hid)
return cos
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册