未验证 提交 20abbf31 编写于 作者: Z zhengya01 提交者: GitHub

Merge pull request #3 from PaddlePaddle/develop

update
#!/bin/bash
export MKL_NUM_THREADS=1
export OMP_NUM_THREADS=1
DATASET_PATH=${HOME}/.cache/paddle/dataset/cityscape/
cudaid=${deeplabv3plus:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py \
--batch_size=2 \
--train_crop_size=769 \
--total_step=50 \
--save_weights_path=output1 \
--dataset_path=$DATASET_PATH \
--enable_ce | python _ce.py
cudaid=${deeplabv3plus_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train.py \
--batch_size=2 \
--train_crop_size=769 \
--total_step=50 \
--save_weights_path=output4 \
--dataset_path=$DATASET_PATH \
--enable_ce | python _ce.py
# this file is only used for continuous evaluation test!
import os
import sys
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi
from kpi import DurationKpi
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.1, 0, actived=True)
train_loss_card1_kpi = CostKpi('train_loss_card1', 0.05, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.1, 0, actived=True)
train_loss_card4_kpi = CostKpi('train_loss_card4', 0.05, 0)
tracking_kpis = [
each_pass_duration_card1_kpi,
train_loss_card1_kpi,
each_pass_duration_card4_kpi,
train_loss_card4_kpi,
]
def parse_log(log):
'''
This method should be implemented by model developers.
The suggestion:
each line in the log should be key, value, for example:
"
train_cost\t1.0
test_cost\t1.0
train_cost\t1.0
train_cost\t1.0
train_acc\t1.2
"
'''
for line in log.split('\n'):
fs = line.strip().split('\t')
print(fs)
if len(fs) == 3 and fs[0] == 'kpis':
kpi_name = fs[1]
kpi_value = float(fs[2])
yield kpi_name, kpi_value
def log_to_ce(log):
kpi_tracker = {}
for kpi in tracking_kpis:
kpi_tracker[kpi.name] = kpi
for (kpi_name, kpi_value) in parse_log(log):
print(kpi_name, kpi_value)
kpi_tracker[kpi_name].add_record(kpi_value)
kpi_tracker[kpi_name].persist()
if __name__ == '__main__':
log = sys.stdin.read()
log_to_ce(log)
...@@ -34,6 +34,10 @@ def add_arguments(): ...@@ -34,6 +34,10 @@ def add_arguments():
add_argument('parallel', bool, False, "using ParallelExecutor.") add_argument('parallel', bool, False, "using ParallelExecutor.")
add_argument('use_gpu', bool, True, "Whether use GPU or CPU.") add_argument('use_gpu', bool, True, "Whether use GPU or CPU.")
add_argument('num_classes', int, 19, "Number of classes.") add_argument('num_classes', int, 19, "Number of classes.")
parser.add_argument(
'--enable_ce',
action='store_true',
help='If set, run the task with continuous evaluation logs.')
def load_model(): def load_model():
...@@ -51,7 +55,10 @@ def load_model(): ...@@ -51,7 +55,10 @@ def load_model():
else: else:
if args.num_classes == 19: if args.num_classes == 19:
fluid.io.load_params( fluid.io.load_params(
exe, dirname=args.init_weights_path, main_program=tp) exe,
dirname="",
filename=args.init_weights_path,
main_program=tp)
else: else:
fluid.io.load_vars( fluid.io.load_vars(
exe, dirname="", filename=args.init_weights_path, vars=myvars) exe, dirname="", filename=args.init_weights_path, vars=myvars)
...@@ -84,6 +91,15 @@ def loss(logit, label): ...@@ -84,6 +91,15 @@ def loss(logit, label):
return loss, label_nignore return loss, label_nignore
def get_cards(args):
if args.enable_ce:
cards = os.environ.get('CUDA_VISIBLE_DEVICES')
num = len(cards.split(","))
return num
else:
return args.num_devices
CityscapeDataset = reader.CityscapeDataset CityscapeDataset = reader.CityscapeDataset
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
...@@ -99,6 +115,13 @@ deeplabv3p = models.deeplabv3p ...@@ -99,6 +115,13 @@ deeplabv3p = models.deeplabv3p
sp = fluid.Program() sp = fluid.Program()
tp = fluid.Program() tp = fluid.Program()
# only for ce
if args.enable_ce:
SEED = 102
sp.random_seed = SEED
tp.random_seed = SEED
crop_size = args.train_crop_size crop_size = args.train_crop_size
batch_size = args.batch_size batch_size = args.batch_size
image_shape = [crop_size, crop_size] image_shape = [crop_size, crop_size]
...@@ -155,7 +178,13 @@ if args.parallel: ...@@ -155,7 +178,13 @@ if args.parallel:
batches = dataset.get_batch_generator(batch_size, total_step) batches = dataset.get_batch_generator(batch_size, total_step)
total_time = 0.0
epoch_idx = 0
train_loss = 0
for i, imgs, labels, names in batches: for i, imgs, labels, names in batches:
epoch_idx += 1
begin_time = time.time()
prev_start_time = time.time() prev_start_time = time.time()
if args.parallel: if args.parallel:
retv = exe_p.run(fetch_list=[pred.name, loss_mean.name], retv = exe_p.run(fetch_list=[pred.name, loss_mean.name],
...@@ -167,11 +196,21 @@ for i, imgs, labels, names in batches: ...@@ -167,11 +196,21 @@ for i, imgs, labels, names in batches:
'label': labels}, 'label': labels},
fetch_list=[pred, loss_mean]) fetch_list=[pred, loss_mean])
end_time = time.time() end_time = time.time()
total_time += end_time - begin_time
if i % 100 == 0: if i % 100 == 0:
print("Model is saved to", args.save_weights_path) print("Model is saved to", args.save_weights_path)
save_model() save_model()
print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}".format( print("step {:d}, loss: {:.6f}, step_time_cost: {:.3f}".format(
i, np.mean(retv[1]), end_time - prev_start_time)) i, np.mean(retv[1]), end_time - prev_start_time))
# only for ce
train_loss = np.mean(retv[1])
if args.enable_ce:
gpu_num = get_cards(args)
print("kpis\teach_pass_duration_card%s\t%s" %
(gpu_num, total_time / epoch_idx))
print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, train_loss))
print("Training done. Model is saved to", args.save_weights_path) print("Training done. Model is saved to", args.save_weights_path)
save_model() save_model()
...@@ -7,6 +7,7 @@ cudaid=${object_detection_cudaid:=0} ...@@ -7,6 +7,7 @@ cudaid=${object_detection_cudaid:=0}
export CUDA_VISIBLE_DEVICES=$cudaid export CUDA_VISIBLE_DEVICES=$cudaid
python train.py --batch_size=${BATCH_SIZE} --num_epochs=5 --enable_ce=True --lr_strategy=cosine_decay | python _ce.py python train.py --batch_size=${BATCH_SIZE} --num_epochs=5 --enable_ce=True --lr_strategy=cosine_decay | python _ce.py
BATCH_SIZE=224
cudaid=${object_detection_cudaid_m:=0, 1, 2, 3} cudaid=${object_detection_cudaid_m:=0, 1, 2, 3}
export CUDA_VISIBLE_DEVICES=$cudaid export CUDA_VISIBLE_DEVICES=$cudaid
python train.py --batch_size=${BATCH_SIZE} --num_epochs=5 --enable_ce=True --lr_strategy=cosine_decay | python _ce.py python train.py --batch_size=${BATCH_SIZE} --num_epochs=5 --enable_ce=True --lr_strategy=cosine_decay | python _ce.py
...@@ -242,7 +242,7 @@ def train(args): ...@@ -242,7 +242,7 @@ def train(args):
device_num = subprocess.check_output(['nvidia-smi', '-L']).decode().count('\n') device_num = subprocess.check_output(['nvidia-smi', '-L']).decode().count('\n')
train_batch_size = args.batch_size / device_num train_batch_size = args.batch_size / device_num
test_batch_size = 8 test_batch_size = 16
if not args.enable_ce: if not args.enable_ce:
train_reader = paddle.batch( train_reader = paddle.batch(
reader.train(), batch_size=train_batch_size, drop_last=True) reader.train(), batch_size=train_batch_size, drop_last=True)
......
...@@ -68,6 +68,7 @@ class GeneratorEnqueuer(object): ...@@ -68,6 +68,7 @@ class GeneratorEnqueuer(object):
try: try:
task() task()
except Exception: except Exception:
traceback.print_exc()
self._stop_event.set() self._stop_event.set()
break break
else: else:
...@@ -75,6 +76,7 @@ class GeneratorEnqueuer(object): ...@@ -75,6 +76,7 @@ class GeneratorEnqueuer(object):
try: try:
task() task()
except Exception: except Exception:
traceback.print_exc()
self._stop_event.set() self._stop_event.set()
break break
......
...@@ -176,10 +176,17 @@ def coco(settings, file_list, mode, batch_size, shuffle): ...@@ -176,10 +176,17 @@ def coco(settings, file_list, mode, batch_size, shuffle):
if mode == 'train' and shuffle: if mode == 'train' and shuffle:
np.random.shuffle(images) np.random.shuffle(images)
batch_out = [] batch_out = []
if '2014' in file_list:
sub_dir = "train2014" if model == "train" else "val2014"
elif '2017' in file_list:
sub_dir = "train2017" if mode == "train" else "val2017"
data_dir = os.path.join(settings.data_dir, sub_dir)
for image in images: for image in images:
image_name = image['file_name'] image_name = image['file_name']
image_path = os.path.join(settings.data_dir, image_name) image_path = os.path.join(data_dir, image_name)
if not os.path.exists(image_path):
raise ValueError("%s is not exist, you should specify "
"data path correctly." % image_path)
im = Image.open(image_path) im = Image.open(image_path)
if im.mode == 'L': if im.mode == 'L':
im = im.convert('RGB') im = im.convert('RGB')
...@@ -242,7 +249,9 @@ def pascalvoc(settings, file_list, mode, batch_size, shuffle): ...@@ -242,7 +249,9 @@ def pascalvoc(settings, file_list, mode, batch_size, shuffle):
image_path, label_path = image.split() image_path, label_path = image.split()
image_path = os.path.join(settings.data_dir, image_path) image_path = os.path.join(settings.data_dir, image_path)
label_path = os.path.join(settings.data_dir, label_path) label_path = os.path.join(settings.data_dir, label_path)
if not os.path.exists(image_path):
raise ValueError("%s is not exist, you should specify "
"data path correctly." % image_path)
im = Image.open(image_path) im = Image.open(image_path)
if im.mode == 'L': if im.mode == 'L':
im = im.convert('RGB') im = im.convert('RGB')
...@@ -295,7 +304,6 @@ def train(settings, ...@@ -295,7 +304,6 @@ def train(settings,
max_queue=24, max_queue=24,
enable_ce=False): enable_ce=False):
file_list = os.path.join(settings.data_dir, file_list) file_list = os.path.join(settings.data_dir, file_list)
if 'coco' in settings.dataset: if 'coco' in settings.dataset:
generator = coco(settings, file_list, "train", batch_size, shuffle) generator = coco(settings, file_list, "train", batch_size, shuffle)
else: else:
...@@ -341,6 +349,9 @@ def test(settings, file_list, batch_size): ...@@ -341,6 +349,9 @@ def test(settings, file_list, batch_size):
def infer(settings, image_path): def infer(settings, image_path):
def reader(): def reader():
if not os.path.exists(image_path):
raise ValueError("%s is not exist, you should specify "
"data path correctly." % image_path)
img = Image.open(image_path) img = Image.open(image_path)
if img.mode == 'L': if img.mode == 'L':
img = im.convert('RGB') img = im.convert('RGB')
......
...@@ -52,7 +52,7 @@ def parse_args(): ...@@ -52,7 +52,7 @@ def parse_args():
def print_arguments(args): def print_arguments(args):
print('----------- Configuration Arguments -----------') print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value)) print('%s: %s' % (arg, value))
print('------------------------------------------------') print('------------------------------------------------')
...@@ -61,6 +61,7 @@ def load_reverse_dict(dict_path): ...@@ -61,6 +61,7 @@ def load_reverse_dict(dict_path):
return dict((idx, line.strip().split("\t")[0]) return dict((idx, line.strip().split("\t")[0])
for idx, line in enumerate(open(dict_path, "r").readlines())) for idx, line in enumerate(open(dict_path, "r").readlines()))
def to_lodtensor(data, place): def to_lodtensor(data, place):
seq_lens = [len(seq) for seq in data] seq_lens = [len(seq) for seq in data]
cur_len = 0 cur_len = 0
...@@ -76,7 +77,6 @@ def to_lodtensor(data, place): ...@@ -76,7 +77,6 @@ def to_lodtensor(data, place):
return res return res
def infer(args): def infer(args):
word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1) word = fluid.layers.data(name='word', shape=[1], dtype='int64', lod_level=1)
mention = fluid.layers.data( mention = fluid.layers.data(
...@@ -108,8 +108,8 @@ def infer(args): ...@@ -108,8 +108,8 @@ def infer(args):
profiler.reset_profiler() profiler.reset_profiler()
iters = 0 iters = 0
for data in test_data(): for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place) word = to_lodtensor(list(map(lambda x: x[0], data)), place)
mention = to_lodtensor(map(lambda x: x[1], data), place) mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
start = time.time() start = time.time()
crf_decode = exe.run(inference_program, crf_decode = exe.run(inference_program,
...@@ -122,12 +122,12 @@ def infer(args): ...@@ -122,12 +122,12 @@ def infer(args):
np_data = np.array(crf_decode[0]) np_data = np.array(crf_decode[0])
word_count = 0 word_count = 0
assert len(data) == len(lod_info) - 1 assert len(data) == len(lod_info) - 1
for sen_index in xrange(len(data)): for sen_index in range(len(data)):
assert len(data[sen_index][0]) == lod_info[ assert len(data[sen_index][0]) == lod_info[
sen_index + 1] - lod_info[sen_index] sen_index + 1] - lod_info[sen_index]
word_index = 0 word_index = 0
for tag_index in xrange(lod_info[sen_index], for tag_index in range(lod_info[sen_index],
lod_info[sen_index + 1]): lod_info[sen_index + 1]):
word = str(data[sen_index][0][word_index]) word = str(data[sen_index][0][word_index])
gold_tag = label_reverse_dict[data[sen_index][2][ gold_tag = label_reverse_dict[data[sen_index][2][
word_index]] word_index]]
......
...@@ -65,7 +65,7 @@ def parse_args(): ...@@ -65,7 +65,7 @@ def parse_args():
def print_arguments(args): def print_arguments(args):
print('----------- Configuration Arguments -----------') print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).iteritems()): for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value)) print('%s: %s' % (arg, value))
print('------------------------------------------------') print('------------------------------------------------')
...@@ -220,9 +220,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place, ...@@ -220,9 +220,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place,
cur_fetch_list): cur_fetch_list):
chunk_evaluator.reset() chunk_evaluator.reset()
for data in test_data(): for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place) word = to_lodtensor(list(map(lambda x: x[0], data)), place)
mention = to_lodtensor(map(lambda x: x[1], data), place) mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
target = to_lodtensor(map(lambda x: x[2], data), place) target = to_lodtensor(list(map(lambda x: x[2], data)), place)
result_list = exe.run( result_list = exe.run(
inference_program, inference_program,
feed={"word": word, feed={"word": word,
...@@ -232,8 +232,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place, ...@@ -232,8 +232,9 @@ def test2(exe, chunk_evaluator, inference_program, test_data, place,
number_infer = np.array(result_list[0]) number_infer = np.array(result_list[0])
number_label = np.array(result_list[1]) number_label = np.array(result_list[1])
number_correct = np.array(result_list[2]) number_correct = np.array(result_list[2])
chunk_evaluator.update(number_infer[0], number_label[0], chunk_evaluator.update(number_infer[0].astype('int64'),
number_correct[0]) number_label[0].astype('int64'),
number_correct[0].astype('int64'))
return chunk_evaluator.eval() return chunk_evaluator.eval()
...@@ -241,9 +242,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place, ...@@ -241,9 +242,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place,
cur_fetch_list): cur_fetch_list):
chunk_evaluator.reset() chunk_evaluator.reset()
for data in test_data(): for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place) word = to_lodtensor(list(map(lambda x: x[0], data)), place)
mention = to_lodtensor(map(lambda x: x[1], data), place) mention = to_lodtensor(list(map(lambda x: x[1], data)), place)
target = to_lodtensor(map(lambda x: x[2], data), place) target = to_lodtensor(list(map(lambda x: x[2], data)), place)
result_list = test_exe.run( result_list = test_exe.run(
fetch_list=cur_fetch_list, fetch_list=cur_fetch_list,
feed={"word": word, feed={"word": word,
...@@ -252,8 +253,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place, ...@@ -252,8 +253,9 @@ def test(test_exe, chunk_evaluator, inference_program, test_data, place,
number_infer = np.array(result_list[0]) number_infer = np.array(result_list[0])
number_label = np.array(result_list[1]) number_label = np.array(result_list[1])
number_correct = np.array(result_list[2]) number_correct = np.array(result_list[2])
chunk_evaluator.update(number_infer.sum(), chunk_evaluator.update(number_infer.sum().astype('int64'),
number_label.sum(), number_correct.sum()) number_label.sum().astype('int64'),
number_correct.sum().astype('int64'))
return chunk_evaluator.eval() return chunk_evaluator.eval()
...@@ -270,11 +272,6 @@ def main(args): ...@@ -270,11 +272,6 @@ def main(args):
crf_decode = fluid.layers.crf_decoding( crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw')) input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
inference_program = fluid.default_main_program().clone(for_test=True)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
(precision, recall, f1_score, num_infer_chunks, num_label_chunks, (precision, recall, f1_score, num_infer_chunks, num_label_chunks,
num_correct_chunks) = fluid.layers.chunk_eval( num_correct_chunks) = fluid.layers.chunk_eval(
input=crf_decode, input=crf_decode,
...@@ -282,6 +279,11 @@ def main(args): ...@@ -282,6 +279,11 @@ def main(args):
chunk_scheme="IOB", chunk_scheme="IOB",
num_chunk_types=int(math.ceil((args.label_dict_len - 1) / 2.0))) num_chunk_types=int(math.ceil((args.label_dict_len - 1) / 2.0)))
inference_program = fluid.default_main_program().clone(for_test=True)
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
chunk_evaluator = fluid.metrics.ChunkEvaluator() chunk_evaluator = fluid.metrics.ChunkEvaluator()
train_reader = paddle.batch( train_reader = paddle.batch(
...@@ -312,7 +314,7 @@ def main(args): ...@@ -312,7 +314,7 @@ def main(args):
test_exe = exe test_exe = exe
batch_id = 0 batch_id = 0
for pass_id in xrange(args.num_passes): for pass_id in range(args.num_passes):
chunk_evaluator.reset() chunk_evaluator.reset()
train_reader_iter = train_reader() train_reader_iter = train_reader()
start_time = time.time() start_time = time.time()
...@@ -326,9 +328,9 @@ def main(args): ...@@ -326,9 +328,9 @@ def main(args):
], ],
feed=feeder.feed(cur_batch)) feed=feeder.feed(cur_batch))
chunk_evaluator.update( chunk_evaluator.update(
np.array(nums_infer).sum(), np.array(nums_infer).sum().astype("int64"),
np.array(nums_label).sum(), np.array(nums_label).sum().astype("int64"),
np.array(nums_correct).sum()) np.array(nums_correct).sum().astype("int64"))
cost_list = np.array(cost) cost_list = np.array(cost)
batch_id += 1 batch_id += 1
except StopIteration: except StopIteration:
......
...@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi ...@@ -7,8 +7,8 @@ from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!! #### NOTE kpi.py should shared in models in some way!!!!
train_cost_kpi = CostKpi('train_cost', 0.02, actived=True) train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True)
train_duration_kpi = DurationKpi('train_duration', 0.05, actived=True) train_duration_kpi = DurationKpi('train_duration', 0.05, 0, actived=True)
tracking_kpis = [ tracking_kpis = [
train_cost_kpi, train_cost_kpi,
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
import os import os
import sys import sys
#sys.path.insert(0, os.environ['ceroot']) #sys.path.insert(0, os.environ['ceroot'])
sys.path.append(os.environ['ceroot'])
from kpi import CostKpi, DurationKpi, AccKpi from kpi import CostKpi, DurationKpi, AccKpi
#### NOTE kpi.py should shared in models in some way!!!! #### NOTE kpi.py should shared in models in some way!!!!
......
...@@ -23,6 +23,7 @@ import json ...@@ -23,6 +23,7 @@ import json
import logging import logging
import numpy as np import numpy as np
from collections import Counter from collections import Counter
import io
class BRCDataset(object): class BRCDataset(object):
...@@ -67,7 +68,7 @@ class BRCDataset(object): ...@@ -67,7 +68,7 @@ class BRCDataset(object):
Args: Args:
data_path: the data file to load data_path: the data file to load
""" """
with open(data_path) as fin: with io.open(data_path, 'r', encoding='utf-8') as fin:
data_set = [] data_set = []
for lidx, line in enumerate(fin): for lidx, line in enumerate(fin):
sample = json.loads(line.strip()) sample = json.loads(line.strip())
......
...@@ -22,6 +22,7 @@ import os ...@@ -22,6 +22,7 @@ import os
import random import random
import json import json
import six import six
import multiprocessing
import paddle import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
......
...@@ -469,7 +469,7 @@ def train_loop(exe, ...@@ -469,7 +469,7 @@ def train_loop(exe,
# For faster executor # For faster executor
exec_strategy = fluid.ExecutionStrategy() exec_strategy = fluid.ExecutionStrategy()
exec_strategy.use_experimental_executor = True exec_strategy.use_experimental_executor = True
# exec_strategy.num_iteration_per_drop_scope = 5 exec_strategy.num_iteration_per_drop_scope = int(args.fetch_steps)
build_strategy = fluid.BuildStrategy() build_strategy = fluid.BuildStrategy()
# Since the token number differs among devices, customize gradient scale to # Since the token number differs among devices, customize gradient scale to
# use token average cost among multi-devices. and the gradient scale is # use token average cost among multi-devices. and the gradient scale is
......
...@@ -89,7 +89,7 @@ def train(train_reader, ...@@ -89,7 +89,7 @@ def train(train_reader,
def train_net(): def train_net():
word_dict, train_reader, test_reader = utils.prepare_data( word_dict, train_reader, test_reader = utils.prepare_data(
"imdb", self_dict=False, batch_size=4, buf_size=50000) "imdb", self_dict=False, batch_size=128, buf_size=50000)
if sys.argv[1] == "bow": if sys.argv[1] == "bow":
train( train(
......
...@@ -6,9 +6,9 @@ export OMP_NUM_THREADS=1 ...@@ -6,9 +6,9 @@ export OMP_NUM_THREADS=1
cudaid=${text_matching_on_quora:=0} # use 0-th card as default cudaid=${text_matching_on_quora:=0} # use 0-th card as default
export CUDA_VISIBLE_DEVICES=$cudaid export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce --epoch_num=5 | python _ce.py
cudaid=${text_matching_on_quora_m:=0,1,2,3} # use 0,1,2,3 card as default cudaid=${text_matching_on_quora_m:=0,1,2,3} # use 0,1,2,3 card as default
export CUDA_VISIBLE_DEVICES=$cudaid export CUDA_VISIBLE_DEVICES=$cudaid
FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce | python _ce.py FLAGS_benchmark=true python train_and_evaluate.py --model_name=cdssmNet --config=cdssm_base --enable_ce --epoch_num=5 | python _ce.py
...@@ -7,11 +7,11 @@ from kpi import CostKpi ...@@ -7,11 +7,11 @@ from kpi import CostKpi
from kpi import DurationKpi from kpi import DurationKpi
each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.05, 0, actived=True) each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.08, 0, actived=True)
train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.2, 0) train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.08, 0)
train_avg_acc_card1_kpi = CostKpi('train_avg_acc_card1', 0.02, 0) train_avg_acc_card1_kpi = CostKpi('train_avg_acc_card1', 0.02, 0)
each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.05, 0, actived=True) each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.08, 0, actived=True)
train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.2, 0) train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.08, 0)
train_avg_acc_card4_kpi = CostKpi('train_avg_acc_card4', 0.02, 0) train_avg_acc_card4_kpi = CostKpi('train_avg_acc_card4', 0.02, 0)
tracking_kpis = [ tracking_kpis = [
......
...@@ -34,6 +34,7 @@ parser = argparse.ArgumentParser(description=__doc__) ...@@ -34,6 +34,7 @@ parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--model_name', type=str, default='cdssmNet', help="Which model to train") parser.add_argument('--model_name', type=str, default='cdssmNet', help="Which model to train")
parser.add_argument('--config', type=str, default='cdssm_base', help="The global config setting") parser.add_argument('--config', type=str, default='cdssm_base', help="The global config setting")
parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.') parser.add_argument('--enable_ce', action='store_true', help='If set, run the task with continuous evaluation logs.')
parser.add_argument('--epoch_num', type=int, help='Number of epoch')
DATA_DIR = os.path.join(os.path.expanduser('~'), '.cache/paddle/dataset') DATA_DIR = os.path.join(os.path.expanduser('~'), '.cache/paddle/dataset')
...@@ -241,6 +242,9 @@ def main(): ...@@ -241,6 +242,9 @@ def main():
args = parser.parse_args() args = parser.parse_args()
global_config = configs.__dict__[args.config]() global_config = configs.__dict__[args.config]()
if args.epoch_num != None:
global_config.epoch_num = args.epoch_num
print("net_name: ", args.model_name) print("net_name: ", args.model_name)
net = models.__dict__[args.model_name](global_config) net = models.__dict__[args.model_name](global_config)
......
...@@ -33,7 +33,7 @@ class CNNEncoder(object): ...@@ -33,7 +33,7 @@ class CNNEncoder(object):
""" cnn-encoder""" """ cnn-encoder"""
def __init__(self, def __init__(self,
param_name="cnn.w", param_name="cnn",
win_size=3, win_size=3,
ksize=128, ksize=128,
act='tanh', act='tanh',
...@@ -51,13 +51,15 @@ class CNNEncoder(object): ...@@ -51,13 +51,15 @@ class CNNEncoder(object):
filter_size=self.win_size, filter_size=self.win_size,
act=self.act, act=self.act,
pool_type=self.pool_type, pool_type=self.pool_type,
param_attr=str(self.param_name)) param_attr=self.param_name + ".param",
bias_attr=self.param_name + ".bias")
class GrnnEncoder(object): class GrnnEncoder(object):
""" grnn-encoder """ """ grnn-encoder """
def __init__(self, param_name="grnn.w", hidden_size=128): def __init__(self, param_name="grnn", hidden_size=128):
self.param_name = param_name self.param_name = param_name
self.hidden_size = hidden_size self.hidden_size = hidden_size
...@@ -65,13 +67,15 @@ class GrnnEncoder(object): ...@@ -65,13 +67,15 @@ class GrnnEncoder(object):
fc0 = nn.fc( fc0 = nn.fc(
input=emb, input=emb,
size=self.hidden_size * 3, size=self.hidden_size * 3,
param_attr=str(str(self.param_name) + "_fc") param_attr=self.param_name + "_fc.w",
) bias_attr=False)
gru_h = nn.dynamic_gru( gru_h = nn.dynamic_gru(
input=fc0, input=fc0,
size=self.hidden_size, size=self.hidden_size,
is_reverse=False, is_reverse=False,
param_attr=str(self.param_name)) param_attr=self.param_name + ".param",
bias_attr=self.param_name + ".bias")
return nn.sequence_pool(input=gru_h, pool_type='max') return nn.sequence_pool(input=gru_h, pool_type='max')
...@@ -139,17 +143,17 @@ class MultiviewSimnet(object): ...@@ -139,17 +143,17 @@ class MultiviewSimnet(object):
# lookup embedding for each slot # lookup embedding for each slot
q_embs = [ q_embs = [
nn.embedding( nn.embedding(
input=query, size=self.emb_shape, param_attr="emb.w") input=query, size=self.emb_shape, param_attr="emb")
for query in q_slots for query in q_slots
] ]
pt_embs = [ pt_embs = [
nn.embedding( nn.embedding(
input=title, size=self.emb_shape, param_attr="emb.w") input=title, size=self.emb_shape, param_attr="emb")
for title in pt_slots for title in pt_slots
] ]
nt_embs = [ nt_embs = [
nn.embedding( nn.embedding(
input=title, size=self.emb_shape, param_attr="emb.w") input=title, size=self.emb_shape, param_attr="emb")
for title in nt_slots for title in nt_slots
] ]
...@@ -170,9 +174,9 @@ class MultiviewSimnet(object): ...@@ -170,9 +174,9 @@ class MultiviewSimnet(object):
nt_concat = nn.concat(nt_encodes) nt_concat = nn.concat(nt_encodes)
# projection of hidden layer # projection of hidden layer
q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w') q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b')
pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w') pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b')
nt_hid = nn.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w') nt_hid = nn.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b')
# cosine of hidden layers # cosine of hidden layers
cos_pos = nn.cos_sim(q_hid, pt_hid) cos_pos = nn.cos_sim(q_hid, pt_hid)
...@@ -213,12 +217,12 @@ class MultiviewSimnet(object): ...@@ -213,12 +217,12 @@ class MultiviewSimnet(object):
# lookup embedding for each slot # lookup embedding for each slot
q_embs = [ q_embs = [
nn.embedding( nn.embedding(
input=query, size=self.emb_shape, param_attr="emb.w") input=query, size=self.emb_shape, param_attr="emb")
for query in q_slots for query in q_slots
] ]
pt_embs = [ pt_embs = [
nn.embedding( nn.embedding(
input=title, size=self.emb_shape, param_attr="emb.w") input=title, size=self.emb_shape, param_attr="emb")
for title in pt_slots for title in pt_slots
] ]
# encode each embedding field with encoder # encode each embedding field with encoder
...@@ -232,8 +236,8 @@ class MultiviewSimnet(object): ...@@ -232,8 +236,8 @@ class MultiviewSimnet(object):
q_concat = nn.concat(q_encodes) q_concat = nn.concat(q_encodes)
pt_concat = nn.concat(pt_encodes) pt_concat = nn.concat(pt_encodes)
# projection of hidden layer # projection of hidden layer
q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w') q_hid = nn.fc(q_concat, size=self.hidden_size, param_attr='q_fc.w', bias_attr='q_fc.b')
pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w') pt_hid = nn.fc(pt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b')
# cosine of hidden layers # cosine of hidden layers
cos = nn.cos_sim(q_hid, pt_hid) cos = nn.cos_sim(q_hid, pt_hid)
return cos return cos
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册