提交 78489c4b 编写于 作者: T tianxin04

format

上级 354d97a8
...@@ -19,7 +19,15 @@ from __future__ import print_function ...@@ -19,7 +19,15 @@ from __future__ import print_function
import numpy as np import numpy as np
def mask(batch_tokens, seg_labels, mask_word_tags, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
def mask(batch_tokens,
seg_labels,
mask_word_tags,
total_token_num,
vocab_size,
CLS=1,
SEP=2,
MASK=3):
""" """
Add mask for batch_tokens, return out, mask_label, mask_pos; Add mask for batch_tokens, return out, mask_label, mask_pos;
Note: mask_pos responding the batch_tokens after padded; Note: mask_pos responding the batch_tokens after padded;
...@@ -90,7 +98,8 @@ def mask(batch_tokens, seg_labels, mask_word_tags, total_token_num, vocab_size, ...@@ -90,7 +98,8 @@ def mask(batch_tokens, seg_labels, mask_word_tags, total_token_num, vocab_size,
# random replace # random replace
if token != SEP and token != CLS: if token != SEP and token != CLS:
mask_label.append(sent[token_index]) mask_label.append(sent[token_index])
sent[token_index] = replace_ids[prob_index + token_index] sent[token_index] = replace_ids[prob_index +
token_index]
mask_flag = True mask_flag = True
mask_pos.append(sent_index * max_len + token_index) mask_pos.append(sent_index * max_len + token_index)
else: else:
...@@ -143,7 +152,10 @@ def prepare_batch_data(insts, ...@@ -143,7 +152,10 @@ def prepare_batch_data(insts,
pos_id = pad_batch_data(batch_pos_ids, pad_idx=pad_id) pos_id = pad_batch_data(batch_pos_ids, pad_idx=pad_id)
sent_id = pad_batch_data(batch_sent_ids, pad_idx=pad_id) sent_id = pad_batch_data(batch_sent_ids, pad_idx=pad_id)
return_list = [src_id, pos_id, sent_id, self_attn_bias, mask_label, mask_pos, labels, next_sent_index] return_list = [
src_id, pos_id, sent_id, self_attn_bias, mask_label, mask_pos, labels,
next_sent_index
]
return return_list return return_list
...@@ -207,4 +219,5 @@ def pad_batch_data(insts, ...@@ -207,4 +219,5 @@ def pad_batch_data(insts,
if __name__ == "__main__": if __name__ == "__main__":
pass pass
...@@ -25,22 +25,20 @@ import paddle.fluid as fluid ...@@ -25,22 +25,20 @@ import paddle.fluid as fluid
from model.ernie import ErnieModel from model.ernie import ErnieModel
def create_model(args, def create_model(args, pyreader_name, ernie_config, is_prediction=False):
pyreader_name,
ernie_config,
is_prediction=False):
pyreader = fluid.layers.py_reader( pyreader = fluid.layers.py_reader(
capacity=50, capacity=50,
shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1], shapes=[[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, 1], [-1, args.max_seq_len, 1],
[-1, args.max_seq_len, args.max_seq_len], [-1, 1], [-1, 1], [-1, 1]], [-1, args.max_seq_len, args.max_seq_len], [-1, 1], [-1, 1],
[-1, 1]],
dtypes=['int64', 'int64', 'int64', 'float', 'int64', 'int64', 'int64'], dtypes=['int64', 'int64', 'int64', 'float', 'int64', 'int64', 'int64'],
lod_levels=[0, 0, 0, 0, 0, 0, 0], lod_levels=[0, 0, 0, 0, 0, 0, 0],
name=pyreader_name, name=pyreader_name,
use_double_buffer=True) use_double_buffer=True)
(src_ids, sent_ids, pos_ids, self_attn_mask, labels, (src_ids, sent_ids, pos_ids, self_attn_mask, labels, next_sent_index,
next_sent_index, qids) = fluid.layers.read_file(pyreader) qids) = fluid.layers.read_file(pyreader)
ernie = ErnieModel( ernie = ErnieModel(
src_ids=src_ids, src_ids=src_ids,
...@@ -57,7 +55,7 @@ def create_model(args, ...@@ -57,7 +55,7 @@ def create_model(args,
dropout_implementation="upscale_in_train") dropout_implementation="upscale_in_train")
logits = fluid.layers.fc( logits = fluid.layers.fc(
input=cls_feats, input=cls_feats,
size=ernie_config["num_labels"], size=args.num_labels,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="cls_out_w", name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)), initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
...@@ -82,18 +80,21 @@ def create_model(args, ...@@ -82,18 +80,21 @@ def create_model(args,
num_seqs = fluid.layers.create_tensor(dtype='int64') num_seqs = fluid.layers.create_tensor(dtype='int64')
accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs) accuracy = fluid.layers.accuracy(input=probs, label=labels, total=num_seqs)
graph_vars = {"loss": loss, graph_vars = {
"loss": loss,
"probs": probs, "probs": probs,
"accuracy": accuracy, "accuracy": accuracy,
"labels": labels, "labels": labels,
"num_seqs": num_seqs, "num_seqs": num_seqs,
"qids": qids} "qids": qids
}
for k, v in graph_vars.items(): for k, v in graph_vars.items():
v.persistable=True v.persistable = True
return pyreader, graph_vars return pyreader, graph_vars
def evaluate_mrr(preds): def evaluate_mrr(preds):
last_qid = None last_qid = None
total_mrr = 0.0 total_mrr = 0.0
...@@ -114,6 +115,7 @@ def evaluate_mrr(preds): ...@@ -114,6 +115,7 @@ def evaluate_mrr(preds):
return total_mrr / qnum return total_mrr / qnum
def evaluate_map(preds): def evaluate_map(preds):
def singe_map(st, en): def singe_map(st, en):
total_p = 0.0 total_p = 0.0
...@@ -142,9 +144,10 @@ def evaluate_map(preds): ...@@ -142,9 +144,10 @@ def evaluate_map(preds):
total_map += singe_map(st, len(preds)) total_map += singe_map(st, len(preds))
return total_map / qnum return total_map / qnum
def evaluate(exe, test_program, test_pyreader, graph_vars, eval_phase): def evaluate(exe, test_program, test_pyreader, graph_vars, eval_phase):
train_fetch_list = [graph_vars["loss"].name, train_fetch_list = [
graph_vars["accuracy"].name, graph_vars["loss"].name, graph_vars["accuracy"].name,
graph_vars["num_seqs"].name graph_vars["num_seqs"].name
] ]
...@@ -152,7 +155,7 @@ def evaluate(exe, test_program, test_pyreader, graph_vars, eval_phase): ...@@ -152,7 +155,7 @@ def evaluate(exe, test_program, test_pyreader, graph_vars, eval_phase):
if "learning_rate" in graph_vars: if "learning_rate" in graph_vars:
train_fetch_list.append(graph_vars["learning_rate"].name) train_fetch_list.append(graph_vars["learning_rate"].name)
outputs = exe.run(fetch_list=train_fetch_list) outputs = exe.run(fetch_list=train_fetch_list)
ret = {"loss":np.mean(outputs[0]), "accuracy":np.mean(outputs[1])} ret = {"loss": np.mean(outputs[0]), "accuracy": np.mean(outputs[1])}
if "learning_rate" in graph_vars: if "learning_rate" in graph_vars:
ret["learning_rate"] = float(outputs[4][0]) ret["learning_rate"] = float(outputs[4][0])
return ret return ret
...@@ -162,22 +165,21 @@ def evaluate(exe, test_program, test_pyreader, graph_vars, eval_phase): ...@@ -162,22 +165,21 @@ def evaluate(exe, test_program, test_pyreader, graph_vars, eval_phase):
qids, labels, scores = [], [], [] qids, labels, scores = [], [], []
time_begin = time.time() time_begin = time.time()
fetch_list = [graph_vars["loss"].name, fetch_list = [
graph_vars["accuracy"].name, graph_vars["loss"].name, graph_vars["accuracy"].name,
graph_vars["probs"].name, graph_vars["probs"].name, graph_vars["labels"].name,
graph_vars["labels"].name, graph_vars["num_seqs"].name, graph_vars["qids"].name
graph_vars["num_seqs"].name, ]
graph_vars["qids"].name]
while True: while True:
try: try:
np_loss, np_acc, np_probs, np_labels, np_num_seqs, np_qids = exe.run(program=test_program, np_loss, np_acc, np_probs, np_labels, np_num_seqs, np_qids = exe.run(
fetch_list=fetch_list) program=test_program, fetch_list=fetch_list)
total_cost += np.sum(np_loss * np_num_seqs) total_cost += np.sum(np_loss * np_num_seqs)
total_acc += np.sum(np_acc * np_num_seqs) total_acc += np.sum(np_acc * np_num_seqs)
total_num_seqs += np.sum(np_num_seqs) total_num_seqs += np.sum(np_num_seqs)
labels.extend(np_labels.reshape((-1)).tolist()) labels.extend(np_labels.reshape((-1)).tolist())
qids.extend(np_qids.reshape(-1).tolist()) qids.extend(np_qids.reshape(-1).tolist())
scores.extend(np_probs[:,1].reshape(-1).tolist()) scores.extend(np_probs[:, 1].reshape(-1).tolist())
np_preds = np.argmax(np_probs, axis=1).astype(np.float32) np_preds = np.argmax(np_probs, axis=1).astype(np.float32)
total_label_pos_num += np.sum(np_labels) total_label_pos_num += np.sum(np_labels)
total_pred_pos_num += np.sum(np_preds) total_pred_pos_num += np.sum(np_preds)
...@@ -188,20 +190,23 @@ def evaluate(exe, test_program, test_pyreader, graph_vars, eval_phase): ...@@ -188,20 +190,23 @@ def evaluate(exe, test_program, test_pyreader, graph_vars, eval_phase):
time_end = time.time() time_end = time.time()
if len(qids) == 0: if len(qids) == 0:
print("[%s evaluation] ave loss: %f, ave acc: %f, data_num: %d, elapsed time: %f s" % print(
(eval_phase, total_cost / total_num_seqs, "[%s evaluation] ave loss: %f, ave acc: %f, data_num: %d, elapsed time: %f s"
total_acc / total_num_seqs, total_num_seqs, time_end - time_begin)) % (eval_phase, total_cost / total_num_seqs, total_acc /
total_num_seqs, total_num_seqs, time_end - time_begin))
else: else:
r = total_correct_num / total_label_pos_num r = total_correct_num / total_label_pos_num
p = total_correct_num / total_pred_pos_num p = total_correct_num / total_pred_pos_num
f = 2 * p * r / (p + r) f = 2 * p * r / (p + r)
assert len(qids) == len(labels) == len(scores) assert len(qids) == len(labels) == len(scores)
preds = sorted(zip(qids, scores, labels), key=lambda elem:(elem[0], -elem[1])) preds = sorted(
zip(qids, scores, labels), key=lambda elem: (elem[0], -elem[1]))
mrr = evaluate_mrr(preds) mrr = evaluate_mrr(preds)
map = evaluate_map(preds) map = evaluate_map(preds)
print("[%s evaluation] ave loss: %f, ave_acc: %f, mrr: %f, map: %f, p: %f, r: %f, f1: %f, data_num: %d, elapsed time: %f s" % print(
(eval_phase, total_cost / total_num_seqs, "[%s evaluation] ave loss: %f, ave_acc: %f, mrr: %f, map: %f, p: %f, r: %f, f1: %f, data_num: %d, elapsed time: %f s"
total_acc / total_num_seqs, % (eval_phase, total_cost / total_num_seqs,
mrr, map, p, r, f, total_num_seqs, time_end - time_begin)) total_acc / total_num_seqs, mrr, map, p, r, f, total_num_seqs,
time_end - time_begin))
...@@ -74,3 +74,4 @@ run_type_g.add_arg("do_train", bool, True, "Whether to pe ...@@ -74,3 +74,4 @@ run_type_g.add_arg("do_train", bool, True, "Whether to pe
run_type_g.add_arg("do_val", bool, True, "Whether to perform evaluation on dev data set.") run_type_g.add_arg("do_val", bool, True, "Whether to perform evaluation on dev data set.")
run_type_g.add_arg("do_test", bool, True, "Whether to perform evaluation on test data set.") run_type_g.add_arg("do_test", bool, True, "Whether to perform evaluation on test data set.")
run_type_g.add_arg("metrics", bool, True, "Whether to perform evaluation on test data set.") run_type_g.add_arg("metrics", bool, True, "Whether to perform evaluation on test data set.")
# yapf: enable
...@@ -24,7 +24,6 @@ from utils.args import ArgumentGroup, print_arguments ...@@ -24,7 +24,6 @@ from utils.args import ArgumentGroup, print_arguments
# yapf: disable # yapf: disable
parser = argparse.ArgumentParser(__doc__) parser = argparse.ArgumentParser(__doc__)
parser = argparse.ArgumentParser(__doc__)
model_g = ArgumentGroup(parser, "model", "model configuration and paths.") model_g = ArgumentGroup(parser, "model", "model configuration and paths.")
model_g.add_arg("ernie_config_path", str, "./config/ernie_config.json", "Path to the json file for ernie model config.") model_g.add_arg("ernie_config_path", str, "./config/ernie_config.json", "Path to the json file for ernie model config.")
model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.") model_g.add_arg("init_checkpoint", str, None, "Init checkpoint to resume training from.")
......
...@@ -30,6 +30,7 @@ import paddle.fluid as fluid ...@@ -30,6 +30,7 @@ import paddle.fluid as fluid
from batching import prepare_batch_data from batching import prepare_batch_data
class ErnieDataReader(object): class ErnieDataReader(object):
def __init__(self, def __init__(self,
filelist, filelist,
...@@ -81,8 +82,8 @@ class ErnieDataReader(object): ...@@ -81,8 +82,8 @@ class ErnieDataReader(object):
sent_ids = [int(token) for token in sent_ids.split(" ")] sent_ids = [int(token) for token in sent_ids.split(" ")]
pos_ids = [int(token) for token in pos_ids.split(" ")] pos_ids = [int(token) for token in pos_ids.split(" ")]
seg_labels = [int(seg_label) for seg_label in seg_labels.split(" ")] seg_labels = [int(seg_label) for seg_label in seg_labels.split(" ")]
assert len(token_ids) == len(sent_ids) == len( assert len(token_ids) == len(sent_ids) == len(pos_ids) == len(
pos_ids) == len(seg_labels seg_labels
), "[Must be true]len(token_ids) == len(sent_ids) == len(pos_ids) == len(seg_labels)" ), "[Must be true]len(token_ids) == len(sent_ids) == len(pos_ids) == len(seg_labels)"
label = int(label) label = int(label)
if len(token_ids) > max_seq_len: if len(token_ids) > max_seq_len:
...@@ -153,13 +154,16 @@ class ErnieDataReader(object): ...@@ -153,13 +154,16 @@ class ErnieDataReader(object):
if left_len <= max_len: if left_len <= max_len:
return (token_seq[1:sep_index], seg_labels[1:sep_index]) return (token_seq[1:sep_index], seg_labels[1:sep_index])
else: else:
return [token_seq[sep_index + 1: -1], seg_labels[sep_index + 1 : -1]] return [
token_seq[sep_index + 1:-1], seg_labels[sep_index + 1:-1]
]
for i in range(num_sample): for i in range(num_sample):
pair_index = (i + 1) % num_sample pair_index = (i + 1) % num_sample
left_tokens, left_seg_labels = split_sent(pos_samples[i], left_tokens, left_seg_labels = split_sent(
(self.max_seq_len - 3) // 2, self.sep_id) pos_samples[i], (self.max_seq_len - 3) // 2, self.sep_id)
right_tokens, right_seg_labels = split_sent(pos_samples[pair_index], right_tokens, right_seg_labels = split_sent(
pos_samples[pair_index],
self.max_seq_len - 3 - len(left_tokens), self.sep_id) self.max_seq_len - 3 - len(left_tokens), self.sep_id)
token_seq = [self.cls_id] + left_tokens + [self.sep_id] + \ token_seq = [self.cls_id] + left_tokens + [self.sep_id] + \
......
...@@ -148,7 +148,9 @@ class BaseReader(object): ...@@ -148,7 +148,9 @@ class BaseReader(object):
else: else:
label_id = example.label label_id = example.label
Record = namedtuple('Record', ['token_ids', 'text_type_ids', 'position_ids', 'label_id', 'qid']) Record = namedtuple(
'Record',
['token_ids', 'text_type_ids', 'position_ids', 'label_id', 'qid'])
qid = None qid = None
if "qid" in example._fields: if "qid" in example._fields:
...@@ -168,7 +170,8 @@ class BaseReader(object): ...@@ -168,7 +170,8 @@ class BaseReader(object):
for index, example in enumerate(examples): for index, example in enumerate(examples):
if phase == "train": if phase == "train":
self.current_example = index self.current_example = index
record = self._convert_example_to_record(example, self.max_seq_len, self.tokenizer) record = self._convert_example_to_record(example, self.max_seq_len,
self.tokenizer)
max_len = max(max_len, len(record.token_ids)) max_len = max(max_len, len(record.token_ids))
if self.in_tokens: if self.in_tokens:
to_append = (len(batch_records) + 1) * max_len <= batch_size to_append = (len(batch_records) + 1) * max_len <= batch_size
...@@ -187,7 +190,12 @@ class BaseReader(object): ...@@ -187,7 +190,12 @@ class BaseReader(object):
examples = self._read_tsv(input_file) examples = self._read_tsv(input_file)
return len(examples) return len(examples)
def data_generator(self, input_file, batch_size, epoch, shuffle=True, phase=None): def data_generator(self,
input_file,
batch_size,
epoch,
shuffle=True,
phase=None):
examples = self._read_tsv(input_file) examples = self._read_tsv(input_file)
def wrapper(): def wrapper():
...@@ -198,8 +206,10 @@ class BaseReader(object): ...@@ -198,8 +206,10 @@ class BaseReader(object):
if shuffle: if shuffle:
np.random.shuffle(examples) np.random.shuffle(examples)
for batch_data in self._prepare_batch_data(examples, batch_size, phase=phase): for batch_data in self._prepare_batch_data(
examples, batch_size, phase=phase):
yield batch_data yield batch_data
return wrapper return wrapper
...@@ -209,7 +219,9 @@ class ClassifyReader(BaseReader): ...@@ -209,7 +219,9 @@ class ClassifyReader(BaseReader):
with open(input_file, "r") as f: with open(input_file, "r") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar) reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
headers = next(reader) headers = next(reader)
text_indices = [index for index, h in enumerate(headers) if h != "label"] text_indices = [
index for index, h in enumerate(headers) if h != "label"
]
Example = namedtuple('Example', headers) Example = namedtuple('Example', headers)
examples = [] examples = []
...@@ -236,17 +248,24 @@ class ClassifyReader(BaseReader): ...@@ -236,17 +248,24 @@ class ClassifyReader(BaseReader):
# padding # padding
padded_token_ids, next_sent_index, self_attn_bias = pad_batch_data( padded_token_ids, next_sent_index, self_attn_bias = pad_batch_data(
batch_token_ids, pad_idx=self.pad_id, return_next_sent_pos=True, return_attn_bias=True) batch_token_ids,
padded_text_type_ids = pad_batch_data(batch_text_type_ids, pad_idx=self.pad_id) pad_idx=self.pad_id,
padded_position_ids = pad_batch_data(batch_position_ids, pad_idx=self.pad_id) return_next_sent_pos=True,
return_attn_bias=True)
return_list = [padded_token_ids, padded_text_type_ids, padded_position_ids, self_attn_bias, batch_labels, next_sent_index, batch_qids] padded_text_type_ids = pad_batch_data(
batch_text_type_ids, pad_idx=self.pad_id)
padded_position_ids = pad_batch_data(
batch_position_ids, pad_idx=self.pad_id)
return_list = [
padded_token_ids, padded_text_type_ids, padded_position_ids,
self_attn_bias, batch_labels, next_sent_index, batch_qids
]
return return_list return return_list
class SequenceLabelReader(BaseReader): class SequenceLabelReader(BaseReader):
def _pad_batch_records(self, batch_records): def _pad_batch_records(self, batch_records):
batch_token_ids = [record.token_ids for record in batch_records] batch_token_ids = [record.token_ids for record in batch_records]
batch_text_type_ids = [record.text_type_ids for record in batch_records] batch_text_type_ids = [record.text_type_ids for record in batch_records]
...@@ -256,13 +275,23 @@ class SequenceLabelReader(BaseReader): ...@@ -256,13 +275,23 @@ class SequenceLabelReader(BaseReader):
# padding # padding
padded_token_ids, self_attn_bias = pad_batch_data( padded_token_ids, self_attn_bias = pad_batch_data(
batch_token_ids, pad_idx=self.pad_id, return_next_sent_pos=False, return_attn_bias=True) batch_token_ids,
padded_text_type_ids = pad_batch_data(batch_text_type_ids, pad_idx=self.pad_id) pad_idx=self.pad_id,
padded_position_ids = pad_batch_data(batch_position_ids, pad_idx=self.pad_id) return_next_sent_pos=False,
padded_label_ids = pad_batch_data(batch_label_ids, pad_idx=len(self.label_map)-1) return_attn_bias=True)
batch_seq_lens = np.array(batch_seq_lens).astype("int64").reshape([-1, 1]) padded_text_type_ids = pad_batch_data(
batch_text_type_ids, pad_idx=self.pad_id)
return_list = [padded_token_ids, padded_text_type_ids, padded_position_ids, self_attn_bias, padded_label_ids, batch_seq_lens] padded_position_ids = pad_batch_data(
batch_position_ids, pad_idx=self.pad_id)
padded_label_ids = pad_batch_data(
batch_label_ids, pad_idx=len(self.label_map) - 1)
batch_seq_lens = np.array(batch_seq_lens).astype("int64").reshape(
[-1, 1])
return_list = [
padded_token_ids, padded_text_type_ids, padded_position_ids,
self_attn_bias, padded_label_ids, batch_seq_lens
]
return return_list return return_list
def _reseg_token_label(self, tokens, labels, tokenizer): def _reseg_token_label(self, tokens, labels, tokenizer):
...@@ -299,9 +328,13 @@ class SequenceLabelReader(BaseReader): ...@@ -299,9 +328,13 @@ class SequenceLabelReader(BaseReader):
position_ids = list(range(len(token_ids))) position_ids = list(range(len(token_ids)))
text_type_ids = [0] * len(token_ids) text_type_ids = [0] * len(token_ids)
no_entity_id = len(self.label_map) - 1 no_entity_id = len(self.label_map) - 1
label_ids = [no_entity_id] + [self.label_map[label] for label in labels] + [no_entity_id] label_ids = [no_entity_id] + [
self.label_map[label] for label in labels
] + [no_entity_id]
Record = namedtuple('Record', ['token_ids', 'text_type_ids', 'position_ids', 'label_ids']) Record = namedtuple(
'Record',
['token_ids', 'text_type_ids', 'position_ids', 'label_ids'])
record = Record( record = Record(
token_ids=token_ids, token_ids=token_ids,
text_type_ids=text_type_ids, text_type_ids=text_type_ids,
...@@ -309,5 +342,6 @@ class SequenceLabelReader(BaseReader): ...@@ -309,5 +342,6 @@ class SequenceLabelReader(BaseReader):
label_ids=label_ids) label_ids=label_ids)
return record return record
if __name__ == '__main__': if __name__ == '__main__':
pass pass
...@@ -34,9 +34,9 @@ from utils.args import ArgumentGroup, print_arguments ...@@ -34,9 +34,9 @@ from utils.args import ArgumentGroup, print_arguments
from utils.init import init_pretraining_params, init_checkpoint from utils.init import init_pretraining_params, init_checkpoint
from finetune_args import parser from finetune_args import parser
args = parser.parse_args() args = parser.parse_args()
def main(args): def main(args):
ernie_config = ErnieConfig(args.ernie_config_path) ernie_config = ErnieConfig(args.ernie_config_path)
ernie_config.print_config() ernie_config.print_config()
...@@ -49,7 +49,8 @@ def main(args): ...@@ -49,7 +49,8 @@ def main(args):
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
exe = fluid.Executor(place) exe = fluid.Executor(place)
reader = task_reader.ClassifyReader(vocab_path=args.vocab_path, reader = task_reader.ClassifyReader(
vocab_path=args.vocab_path,
label_map_config=args.label_map_config, label_map_config=args.label_map_config,
max_seq_len=args.max_seq_len, max_seq_len=args.max_seq_len,
do_lower_case=args.do_lower_case, do_lower_case=args.do_lower_case,
...@@ -108,7 +109,8 @@ def main(args): ...@@ -108,7 +109,8 @@ def main(args):
fluid.memory_optimize( fluid.memory_optimize(
input_program=train_program, input_program=train_program,
skip_opt_set=[graph_vars["loss"].name, skip_opt_set=[
graph_vars["loss"].name,
graph_vars["probs"].name, graph_vars["probs"].name,
graph_vars["accuracy"].name, graph_vars["accuracy"].name,
graph_vars["num_seqs"].name, graph_vars["num_seqs"].name,
...@@ -201,7 +203,8 @@ def main(args): ...@@ -201,7 +203,8 @@ def main(args):
if steps % args.skip_steps != 0: if steps % args.skip_steps != 0:
train_exe.run(fetch_list=[]) train_exe.run(fetch_list=[])
else: else:
outputs = evaluate(train_exe, train_program, train_pyreader, graph_vars, "train") outputs = evaluate(train_exe, train_program, train_pyreader,
graph_vars, "train")
if args.verbose: if args.verbose:
verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
...@@ -217,7 +220,8 @@ def main(args): ...@@ -217,7 +220,8 @@ def main(args):
print("epoch: %d, progress: %d/%d, step: %d, ave loss: %f, " print("epoch: %d, progress: %d/%d, step: %d, ave loss: %f, "
"ave acc: %f, speed: %f steps/s" % "ave acc: %f, speed: %f steps/s" %
(current_epoch, current_example, num_train_examples, (current_epoch, current_example, num_train_examples,
steps, outputs["loss"], outputs["accuracy"], args.skip_steps / used_time)) steps, outputs["loss"], outputs["accuracy"],
args.skip_steps / used_time))
time_begin = time.time() time_begin = time.time()
if steps % args.save_steps == 0: if steps % args.save_steps == 0:
...@@ -254,7 +258,9 @@ def main(args): ...@@ -254,7 +258,9 @@ def main(args):
if args.do_val: if args.do_val:
test_pyreader.decorate_tensor_provider( test_pyreader.decorate_tensor_provider(
reader.data_generator( reader.data_generator(
args.dev_set, batch_size=args.batch_size, epoch=1, args.dev_set,
batch_size=args.batch_size,
epoch=1,
shuffle=False)) shuffle=False))
print("Final validation result:") print("Final validation result:")
evaluate(exe, test_prog, test_pyreader, graph_vars, "dev") evaluate(exe, test_prog, test_pyreader, graph_vars, "dev")
...@@ -273,4 +279,5 @@ def main(args): ...@@ -273,4 +279,5 @@ def main(args):
if __name__ == '__main__': if __name__ == '__main__':
print_arguments(args) print_arguments(args)
main(args) main(args)
...@@ -33,9 +33,9 @@ from utils.args import print_arguments ...@@ -33,9 +33,9 @@ from utils.args import print_arguments
from finetune.sequence_label import create_model, evaluate from finetune.sequence_label import create_model, evaluate
from finetune_args import parser from finetune_args import parser
args = parser.parse_args() args = parser.parse_args()
def main(args): def main(args):
ernie_config = ErnieConfig(args.ernie_config_path) ernie_config = ErnieConfig(args.ernie_config_path)
ernie_config.print_config() ernie_config.print_config()
...@@ -48,7 +48,8 @@ def main(args): ...@@ -48,7 +48,8 @@ def main(args):
dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
exe = fluid.Executor(place) exe = fluid.Executor(place)
reader = task_reader.SequenceLabelReader(vocab_path=args.vocab_path, reader = task_reader.SequenceLabelReader(
vocab_path=args.vocab_path,
label_map_config=args.label_map_config, label_map_config=args.label_map_config,
max_seq_len=args.max_seq_len, max_seq_len=args.max_seq_len,
do_lower_case=args.do_lower_case, do_lower_case=args.do_lower_case,
...@@ -107,10 +108,9 @@ def main(args): ...@@ -107,10 +108,9 @@ def main(args):
fluid.memory_optimize( fluid.memory_optimize(
input_program=train_program, input_program=train_program,
skip_opt_set=[graph_vars["loss"].name, skip_opt_set=[
graph_vars["labels"].name, graph_vars["loss"].name, graph_vars["labels"].name,
graph_vars["infers"].name, graph_vars["infers"].name, graph_vars["seq_lens"].name
graph_vars["seq_lens"].name
]) ])
if args.verbose: if args.verbose:
...@@ -200,21 +200,23 @@ def main(args): ...@@ -200,21 +200,23 @@ def main(args):
if steps % args.skip_steps != 0: if steps % args.skip_steps != 0:
train_exe.run(fetch_list=[]) train_exe.run(fetch_list=[])
else: else:
outputs = evaluate(train_exe, train_program, train_pyreader, graph_vars, args.num_labels, "train", dev_count) outputs = evaluate(train_exe, train_program, train_pyreader,
graph_vars, args.num_labels, "train",
dev_count)
if args.verbose: if args.verbose:
verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size( verbose = "train pyreader queue size: %d, " % train_pyreader.queue.size(
) )
verbose += "learning rate: %f" % ( verbose += "learning rate: %f" % (
outputs["lr"] if warmup_steps > 0 else args.learning_rate) outputs["lr"]
if warmup_steps > 0 else args.learning_rate)
print(verbose) print(verbose)
current_example, current_epoch = reader.get_train_progress( current_example, current_epoch = reader.get_train_progress()
)
time_end = time.time() time_end = time.time()
used_time = time_end - time_begin used_time = time_end - time_begin
print("epoch: %d, progress: %d/%d, step: %d, loss: %f, " print("epoch: %d, progress: %d/%d, step: %d, loss: %f, "
"f1: %f, precision: %f, recall: %f, speed: %f steps/s" % "f1: %f, precision: %f, recall: %f, speed: %f steps/s"
(current_epoch, current_example, num_train_examples, % (current_epoch, current_example, num_train_examples,
steps, outputs["loss"], outputs["f1"], steps, outputs["loss"], outputs["f1"],
outputs["precision"], outputs["recall"], outputs["precision"], outputs["recall"],
args.skip_steps / used_time)) args.skip_steps / used_time))
...@@ -234,7 +236,8 @@ def main(args): ...@@ -234,7 +236,8 @@ def main(args):
batch_size=args.batch_size, batch_size=args.batch_size,
epoch=1, epoch=1,
shuffle=False)) shuffle=False))
evaluate(exe, test_prog, test_pyreader, graph_vars, args.num_labels, "dev") evaluate(exe, test_prog, test_pyreader, graph_vars,
args.num_labels, "dev")
# evaluate test set # evaluate test set
if args.do_test: if args.do_test:
test_pyreader.decorate_tensor_provider( test_pyreader.decorate_tensor_provider(
...@@ -243,7 +246,8 @@ def main(args): ...@@ -243,7 +246,8 @@ def main(args):
batch_size=args.batch_size, batch_size=args.batch_size,
epoch=1, epoch=1,
shuffle=False)) shuffle=False))
evaluate(exe, test_prog, test_pyreader, graph_vars, args.num_labels, "test") evaluate(exe, test_prog, test_pyreader, graph_vars,
args.num_labels, "test")
except fluid.core.EOFException: except fluid.core.EOFException:
save_path = os.path.join(args.checkpoints, "step_" + str(steps)) save_path = os.path.join(args.checkpoints, "step_" + str(steps))
...@@ -255,7 +259,9 @@ def main(args): ...@@ -255,7 +259,9 @@ def main(args):
if args.do_val: if args.do_val:
test_pyreader.decorate_tensor_provider( test_pyreader.decorate_tensor_provider(
reader.data_generator( reader.data_generator(
args.dev_set, batch_size=args.batch_size, epoch=1, args.dev_set,
batch_size=args.batch_size,
epoch=1,
shuffle=False)) shuffle=False))
print("Final validation result:") print("Final validation result:")
evaluate(exe, test_prog, test_pyreader, graph_vars, args.num_labels, "dev") evaluate(exe, test_prog, test_pyreader, graph_vars, args.num_labels, "dev")
......
...@@ -35,8 +35,10 @@ from utils.init import init_checkpoint, init_pretraining_params ...@@ -35,8 +35,10 @@ from utils.init import init_checkpoint, init_pretraining_params
from pretrain_args import parser from pretrain_args import parser
args = parser.parse_args() args = parser.parse_args()
# yapf: enable. # yapf: enable.
def create_model(pyreader_name, ernie_config): def create_model(pyreader_name, ernie_config):
pyreader = fluid.layers.py_reader( pyreader = fluid.layers.py_reader(
capacity=70, capacity=70,
...@@ -224,8 +226,7 @@ def train(args): ...@@ -224,8 +226,7 @@ def train(args):
print("train_id == 0, sleep 60s") print("train_id == 0, sleep 60s")
time.sleep(60) time.sleep(60)
print("worker_endpoints:{} trainers_num:{} current_endpoint:{} \ print("worker_endpoints:{} trainers_num:{} current_endpoint:{} \
trainer_id:{}" trainer_id:{}".format(worker_endpoints, trainers_num,
.format(worker_endpoints, trainers_num,
current_endpoint, trainer_id)) current_endpoint, trainer_id))
# prepare nccl2 env. # prepare nccl2 env.
...@@ -319,7 +320,8 @@ def train(args): ...@@ -319,7 +320,8 @@ def train(args):
epoch, current_file_index, total_file, current_file, mask_type = data_reader.get_progress( epoch, current_file_index, total_file, current_file, mask_type = data_reader.get_progress(
) )
print("current learning_rate:%f" % np_lr[0]) print("current learning_rate:%f" % np_lr[0])
print("epoch: %d, progress: %d/%d, step: %d, loss: %f, " print(
"epoch: %d, progress: %d/%d, step: %d, loss: %f, "
"ppl: %f, next_sent_acc: %f, speed: %f steps/s, file: %s, mask_type: %s" "ppl: %f, next_sent_acc: %f, speed: %f steps/s, file: %s, mask_type: %s"
% (epoch, current_file_index, total_file, steps, % (epoch, current_file_index, total_file, steps,
np.mean(np.array(cost)), np.mean(np.array(cost)),
...@@ -341,8 +343,7 @@ def train(args): ...@@ -341,8 +343,7 @@ def train(args):
print("[validation_set] epoch: %d, step: %d, " print("[validation_set] epoch: %d, step: %d, "
"loss: %f, global ppl: %f, batch-averged ppl: %f, " "loss: %f, global ppl: %f, batch-averged ppl: %f, "
"next_sent_acc: %f, speed: %f steps/s" % "next_sent_acc: %f, speed: %f steps/s" %
(epoch, steps, (epoch, steps, np.mean(np.array(vali_cost) / vali_steps),
np.mean(np.array(vali_cost) / vali_steps),
np.exp(np.mean(np.array(vali_lm_cost) / vali_steps)), np.exp(np.mean(np.array(vali_lm_cost) / vali_steps)),
np.mean(np.exp(np.array(vali_lm_cost) / vali_steps)), np.mean(np.exp(np.array(vali_lm_cost) / vali_steps)),
np.mean(np.array(vali_acc) / vali_steps), vali_speed)) np.mean(np.array(vali_acc) / vali_steps), vali_speed))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册