未验证 提交 9ae8d17e 编写于 作者: X Xiaoyao Xi 提交者: GitHub

Merge pull request #50 from wangxiao1021/api

add multi-task example
...@@ -32,7 +32,7 @@ label text_a ...@@ -32,7 +32,7 @@ label text_a
### Step 2: Train & Predict ### Step 2: Train & Predict
The code used to perform classification task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run: The code used to perform this task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run:
```shell ```shell
python run.py python run.py
......
...@@ -64,7 +64,7 @@ if __name__ == '__main__': ...@@ -64,7 +64,7 @@ if __name__ == '__main__':
# step 8-1*: load pretrained parameters # step 8-1*: load pretrained parameters
trainer.load_pretrain(pre_params) trainer.load_pretrain(pre_params)
# step 8-2*: set saver to save model # step 8-2*: set saver to save model
# save_steps = n_steps // gpu_dev_count - batch_size # save_steps = n_steps
save_steps = 2396 save_steps = 2396
trainer.set_saver(save_steps=save_steps, save_path=save_path, save_type=save_type) trainer.set_saver(save_steps=save_steps, save_path=save_path, save_type=save_type)
# step 8-3: start training # step 8-3: start training
......
...@@ -21,7 +21,7 @@ python download.py ...@@ -21,7 +21,7 @@ python download.py
After the dataset is downloaded, you should convert the data format for training: After the dataset is downloaded, you should convert the data format for training:
```shell ```shell
python process.py quora_duplicate_questions.tsv train.tsv test.tsv python process.py data/quora_duplicate_questions.tsv data/train.tsv data/test.tsv
``` ```
If everything goes well, there will be a folder named `data/` created with all the converted datas in it. If everything goes well, there will be a folder named `data/` created with all the converted datas in it.
...@@ -40,7 +40,7 @@ What are the differences between the Dell Inspiron 3000, 5000, and 7000 series l ...@@ -40,7 +40,7 @@ What are the differences between the Dell Inspiron 3000, 5000, and 7000 series l
### Step 2: Train & Predict ### Step 2: Train & Predict
The code used to perform classification task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run: The code used to perform this task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run:
```shell ```shell
python run.py python run.py
......
...@@ -67,7 +67,7 @@ if __name__ == '__main__': ...@@ -67,7 +67,7 @@ if __name__ == '__main__':
# step 8-1*: load pretrained parameters # step 8-1*: load pretrained parameters
trainer.load_pretrain(pre_params, False) trainer.load_pretrain(pre_params, False)
# step 8-2*: set saver to save model # step 8-2*: set saver to save model
# save_steps = (n_steps-16) // gpu_dev_count # save_steps = n_steps-16
save_steps = 6244 save_steps = 6244
trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type) trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type)
# step 8-3: start training # step 8-3: start training
......
...@@ -39,12 +39,13 @@ Here is some example datas: ...@@ -39,12 +39,13 @@ Here is some example datas:
} }
] ]
} }
}
``` ```
### Step 2: Train & Predict ### Step 2: Train & Predict
The code used to perform classification task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run: The code used to perform this task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run:
```shell ```shell
python run.py python run.py
......
...@@ -64,7 +64,7 @@ if __name__ == '__main__': ...@@ -64,7 +64,7 @@ if __name__ == '__main__':
# step 8-1*: load pretrained parameters # step 8-1*: load pretrained parameters
trainer.load_pretrain(pre_params) trainer.load_pretrain(pre_params)
# step 8-2*: set saver to save model # step 8-2*: set saver to save model
# save_steps = (n_steps-8) // gpu_dev_count // 4 # save_steps = (n_steps-8) // 4
save_steps = 1520 save_steps = 1520
trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type) trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type)
# step 8-3: start training # step 8-3: start training
......
# -*- coding: utf-8 -*-
import os
import requests
import tarfile
import shutil
from tqdm import tqdm
def download(src, url):
file_size = int(requests.head(url).headers['Content-Length'])
header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'70.0.3538.67 Safari/537.36'
}
pbar = tqdm(total=file_size)
resp = requests.get(url, headers=header, stream=True)
with open(src, 'ab') as f:
for chunk in resp.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
pbar.update(1024)
pbar.close()
return file_size
abs_path = os.path.abspath(__file__)
download_url = "https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz"
downlaod_path = os.path.join(os.path.dirname(abs_path), "dmtk_data_1.0.0.tar.gz")
target_dir = os.path.dirname(abs_path)
download(downlaod_path, download_url)
tar = tarfile.open(downlaod_path)
tar.extractall(target_dir)
os.remove(downlaod_path)
shutil.rmtree(os.path.join(target_dir, 'data/dstc2/'))
shutil.rmtree(os.path.join(target_dir, 'data/mrda/'))
shutil.rmtree(os.path.join(target_dir, 'data/multi-woz/'))
shutil.rmtree(os.path.join(target_dir, 'data/swda/'))
shutil.rmtree(os.path.join(target_dir, 'data/udc/'))
# -*- coding: utf-8 -*-
import json
def load_label_map(map_dir="./data/atis/atis_slot/label_map.json"):
"""
:param map_dir: dict indictuing chunk type
:return:
"""
return json.load(open(map_dir, "r"))
def cal_chunk(total_res, total_label):
assert len(total_label) == len(total_res), 'prediction result doesn\'t match to labels'
num_labels = 0
num_corr = 0
num_infers = 0
for res, label in zip(total_res, total_label):
assert len(res) == len(label), "prediction result doesn\'t match to labels"
num_labels += sum([0 if i == 6 else 1 for i in label])
num_corr += sum([1 if label[i] == res[i] and label[i] != 6 else 0 for i in range(len(label))])
num_infers += sum([0 if i == 6 else 1 for i in res])
precision = num_corr * 1.0 / num_infers if num_infers > 0 else 0.0
recall = num_corr * 1.0 / num_labels if num_labels > 0 else 0.0
f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0.0
return precision, recall, f1
def res_evaluate(res_dir="./outputs/predict/predictions.json", data_dir="./data/atis/atis_slot/test.tsv"):
label_map = load_label_map()
total_label = []
with open(data_dir, "r") as file:
first_flag = True
for line in file:
if first_flag:
first_flag = False
continue
line = line.strip("\n")
if len(line) == 0:
continue
line = line.split("\t")
if len(line) < 2:
continue
labels = line[1][:-1].split("\x02")
total_label.append(labels)
total_label = [[label_map[j] for j in i] for i in total_label]
total_res = []
with open(res_dir, "r") as file:
cnt = 0
for line in file:
line = line.strip("\n")
if len(line) == 0:
continue
try:
res_arr = json.loads(line)
if len(total_label[cnt]) < len(res_arr):
total_res.append(res_arr[1: 1 + len(total_label[cnt])])
elif len(total_label[cnt]) == len(res_arr):
total_res.append(res_arr)
else:
total_res.append(res_arr)
total_label[cnt] = total_label[cnt][: len(res_arr)]
except:
print("json format error: {}".format(cnt))
print(line)
cnt += 1
precision, recall, f1 = cal_chunk(total_res, total_label)
print("precision: {}, recall: {}, f1: {}".format(precision, recall, f1))
res_evaluate()
# coding=utf-8
import paddlepalm as palm
import json
from paddlepalm.distribute import gpu_dev_count
if __name__ == '__main__':
# configs
max_seqlen = 256
batch_size = 16
num_epochs = 6
print_steps = 5
lr = 5e-5
num_classes = 130
random_seed = 1
label_map = './data/atis/atis_slot/label_map.json'
vocab_path = './pretrain/ernie-en-base/vocab.txt'
predict_file = './data/atis/atis_slot/test.tsv'
save_path = './outputs/'
pred_output = './outputs/predict/'
save_type = 'ckpt'
pre_params = './pretrain/ernie-en-base/params'
config = json.load(open('./pretrain/ernie-en-base/ernie_config.json'))
input_dim = config['hidden_size']
# ----------------------- for prediction -----------------------
# step 1-1: create readers for prediction
print('prepare to predict...')
predict_seq_label_reader = palm.reader.SequenceLabelReader(vocab_path, max_seqlen, label_map, phase='predict')
# step 1-2: load the training data
predict_seq_label_reader.load_data(predict_file, batch_size)
# step 2: create a backbone of the model to extract text features
pred_ernie = palm.backbone.ERNIE.from_config(config, phase='predict')
# step 3: register the backbone in reader
predict_seq_label_reader.register_with(pred_ernie)
# step 4: create the task output head
seq_label_pred_head = palm.head.SequenceLabel(num_classes, input_dim, phase='predict')
# step 5-1: create a task trainer
trainer_seq_label = palm.Trainer("slot")
# step 5-2: build forward graph with backbone and task head
trainer_seq_label.build_predict_forward(pred_ernie, seq_label_pred_head)
# step 6: load pretrained model
pred_model_path = './outputs/1580822697.73-ckpt.step9282'
pred_ckpt = trainer_seq_label.load_ckpt(pred_model_path)
# step 7: fit prepared reader and data
trainer_seq_label.fit_reader(predict_seq_label_reader, phase='predict')
# step 8: predict
print('predicting..')
trainer_seq_label.predict(print_steps=print_steps, output_dir=pred_output)
\ No newline at end of file
import os
import json
label_new = "data/atis/atis_slot/label_map.json"
label_old = "data/atis/atis_slot/map_tag_slot_id.txt"
train_old = "data/atis/atis_slot/train.txt"
train_new = "data/atis/atis_slot/train.tsv"
dev_old = "data/atis/atis_slot/dev.txt"
dev_new = "data/atis/atis_slot/dev.tsv"
test_old = "data/atis/atis_slot/test.txt"
test_new = "data/atis/atis_slot/test.tsv"
intent_test = "data/atis/atis_intent/test.tsv"
os.rename("data/atis/atis_intent/test.txt", intent_test)
intent_train = "data/atis/atis_intent/train.tsv"
os.rename("data/atis/atis_intent/train.txt", intent_train)
intent_dev = "data/atis/atis_intent/dev.tsv"
os.rename("data/atis/atis_intent/dev.txt", intent_dev)
with open(intent_dev, 'r+') as f:
content = f.read()
f.seek(0, 0)
f.write("label\ttext_a\n"+content)
f.close()
with open(intent_test, 'r+') as f:
content = f.read()
f.seek(0, 0)
f.write("label\ttext_a\n"+content)
f.close()
with open(intent_train, 'r+') as f:
content = f.read()
f.seek(0, 0)
f.write("label\ttext_a\n"+content)
f.close()
os.mknod(label_new)
os.mknod(train_new)
os.mknod(dev_new)
os.mknod(test_new)
tag = []
id = []
map = {}
with open(label_old, "r") as f:
with open(label_new, "w") as f2:
for line in f.readlines():
line = line.split('\t')
tag.append(line[0])
id.append(int(line[1][:-1]))
map[line[1][:-1]] = line[0]
re = {tag[i]:id[i] for i in range(len(tag))}
re = json.dumps(re)
f2.write(re)
f2.close()
f.close()
with open(train_old, "r") as f:
with open(train_new, "w") as f2:
f2.write("text_a\tlabel\n")
for line in f.readlines():
line = line.split('\t')
text = line[0].split(' ')
label = line[1].split(' ')
for t in text:
f2.write(t)
f2.write('\2')
f2.write('\t')
for t in label:
if t.endswith('\n'):
t = t[:-1]
f2.write(map[t])
f2.write('\2')
f2.write('\n')
f2.close()
f.close()
with open(test_old, "r") as f:
with open(test_new, "w") as f2:
f2.write("text_a\tlabel\n")
for line in f.readlines():
line = line.split('\t')
text = line[0].split(' ')
label = line[1].split(' ')
for t in text:
f2.write(t)
f2.write('\2')
f2.write('\t')
for t in label:
if t.endswith('\n'):
t = t[:-1]
f2.write(map[t])
f2.write('\2')
f2.write('\n')
f2.close()
f.close()
with open(dev_old, "r") as f:
with open(dev_new, "w") as f2:
f2.write("text_a\tlabel\n")
for line in f.readlines():
line = line.split('\t')
text = line[0].split(' ')
label = line[1].split(' ')
for t in text:
f2.write(t)
f2.write('\2')
f2.write('\t')
for t in label:
if t.endswith('\n'):
t = t[:-1]
f2.write(map[t])
f2.write('\2')
f2.write('\n')
f2.close()
f.close()
os.remove(label_old)
os.remove(train_old)
os.remove(test_old)
os.remove(dev_old)
\ No newline at end of file
# coding=utf-8
import paddlepalm as palm
import json
from paddlepalm.distribute import gpu_dev_count
if __name__ == '__main__':
# configs
max_seqlen = 128
batch_size = 16
num_epochs = 20
print_steps = 5
lr = 2e-5
num_classes = 130
weight_decay = 0.01
num_classes_intent = 26
dropout_prob = 0.1
random_seed = 0
label_map = './data/atis/atis_slot/label_map.json'
vocab_path = './pretrain/ernie-en-base/vocab.txt'
train_slot = './data/atis/atis_slot/train.tsv'
train_intent = './data/atis/atis_intent/train.tsv'
predict_file = './data/atis/atis_slot/test.tsv'
save_path = './outputs/'
pred_output = './outputs/predict/'
save_type = 'ckpt'
pre_params = './pretrain/ernie-en-base/params'
config = json.load(open('./pretrain/ernie-en-base/ernie_config.json'))
input_dim = config['hidden_size']
# ----------------------- for training -----------------------
# step 1-1: create readers for training
seq_label_reader = palm.reader.SequenceLabelReader(vocab_path, max_seqlen, label_map, seed=random_seed)
match_reader = palm.reader.MatchReader(vocab_path, max_seqlen, seed=random_seed)
# step 1-2: load the training data
seq_label_reader.load_data(train_slot, file_format='tsv', num_epochs=None, batch_size=batch_size)
match_reader.load_data(train_intent, file_format='tsv', num_epochs=None, batch_size=batch_size)
# step 2: create a backbone of the model to extract text features
ernie = palm.backbone.ERNIE.from_config(config)
# step 3: register the backbone in readers
seq_label_reader.register_with(ernie)
match_reader.register_with(ernie)
# step 4: create task output heads
seq_label_head = palm.head.SequenceLabel(num_classes, input_dim, dropout_prob)
match_head = palm.head.Match(num_classes_intent, input_dim, dropout_prob)
# step 5-1: create a task trainer
trainer_seq_label = palm.Trainer("slot", mix_ratio=1.0)
trainer_match = palm.Trainer("intent", mix_ratio=0.5)
trainer = palm.MultiHeadTrainer([trainer_seq_label, trainer_match])
# # step 5-2: build forward graph with backbone and task head
loss_var1 = trainer_match.build_forward(ernie, match_head)
loss_var2 = trainer_seq_label.build_forward(ernie, seq_label_head)
loss_var = trainer.build_forward()
# step 6-1*: use warmup
n_steps = seq_label_reader.num_examples * 1.5 * num_epochs // batch_size
warmup_steps = int(0.1 * n_steps)
sched = palm.lr_sched.TriangularSchedualer(warmup_steps, n_steps)
# step 6-2: create a optimizer
adam = palm.optimizer.Adam(loss_var, lr, sched)
# step 6-3: build backward
trainer.build_backward(optimizer=adam, weight_decay=weight_decay)
# step 7: fit prepared reader and data
trainer.fit_readers_with_mixratio([seq_label_reader, match_reader], "slot", num_epochs)
# step 8-1*: load pretrained parameters
trainer.load_pretrain(pre_params)
# step 8-2*: set saver to save model
# save_steps = int(n_steps-batch_size)
save_steps = 10
trainer_seq_label.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type, is_multi=True)
# step 8-3: start training
trainer.train(print_steps=print_steps)
\ No newline at end of file
...@@ -34,7 +34,7 @@ text_a label ...@@ -34,7 +34,7 @@ text_a label
### Step 2: Train & Predict ### Step 2: Train & Predict
The code used to perform classification task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run: The code used to perform this task is in `run.py`. If you have prepared the pre-training model and the data set required for the task, run:
```shell ```shell
python run.py python run.py
......
...@@ -32,26 +32,26 @@ if __name__ == '__main__': ...@@ -32,26 +32,26 @@ if __name__ == '__main__':
# ----------------------- for training ----------------------- # ----------------------- for training -----------------------
# step 1-1: create readers for training # step 1-1: create readers for training
ner_reader = palm.reader.SequenceLabelReader(vocab_path, max_seqlen, label_map, seed=random_seed) seq_label_reader = palm.reader.SequenceLabelReader(vocab_path, max_seqlen, label_map, seed=random_seed)
# step 1-2: load the training data # step 1-2: load the training data
ner_reader.load_data(train_file, file_format='tsv', num_epochs=num_epochs, batch_size=batch_size) seq_label_reader.load_data(train_file, file_format='tsv', num_epochs=num_epochs, batch_size=batch_size)
# step 2: create a backbone of the model to extract text features # step 2: create a backbone of the model to extract text features
ernie = palm.backbone.ERNIE.from_config(config) ernie = palm.backbone.ERNIE.from_config(config)
# step 3: register the backbone in reader # step 3: register the backbone in reader
ner_reader.register_with(ernie) seq_label_reader.register_with(ernie)
# step 4: create the task output head # step 4: create the task output head
ner_head = palm.head.SequenceLabel(num_classes, input_dim, dropout_prob) seq_label_head = palm.head.SequenceLabel(num_classes, input_dim, dropout_prob)
# step 5-1: create a task trainer # step 5-1: create a task trainer
trainer = palm.Trainer(task_name) trainer = palm.Trainer(task_name)
# step 5-2: build forward graph with backbone and task head # step 5-2: build forward graph with backbone and task head
loss_var = trainer.build_forward(ernie, ner_head) loss_var = trainer.build_forward(ernie, seq_label_head)
# step 6-1*: use warmup # step 6-1*: use warmup
n_steps = ner_reader.num_examples * num_epochs // batch_size n_steps = seq_label_reader.num_examples * num_epochs // batch_size
warmup_steps = int(0.1 * n_steps) warmup_steps = int(0.1 * n_steps)
print('total_steps: {}'.format(n_steps)) print('total_steps: {}'.format(n_steps))
print('warmup_steps: {}'.format(warmup_steps)) print('warmup_steps: {}'.format(warmup_steps))
...@@ -62,43 +62,43 @@ if __name__ == '__main__': ...@@ -62,43 +62,43 @@ if __name__ == '__main__':
trainer.build_backward(optimizer=adam, weight_decay=weight_decay) trainer.build_backward(optimizer=adam, weight_decay=weight_decay)
# step 7: fit prepared reader and data # step 7: fit prepared reader and data
trainer.fit_reader(ner_reader) trainer.fit_reader(seq_label_reader)
# step 8-1*: load pretrained parameters # # step 8-1*: load pretrained parameters
trainer.load_pretrain(pre_params) # trainer.load_pretrain(pre_params)
# step 8-2*: set saver to save model # # step 8-2*: set saver to save model
save_steps = (n_steps-20)// gpu_dev_count save_steps = 1951
print('save_steps: {}'.format(save_steps)) # print('save_steps: {}'.format(save_steps))
trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type) # trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type)
# step 8-3: start training # # step 8-3: start training
trainer.train(print_steps=train_print_steps) # trainer.train(print_steps=train_print_steps)
# ----------------------- for prediction ----------------------- # ----------------------- for prediction -----------------------
# step 1-1: create readers for prediction # step 1-1: create readers for prediction
print('prepare to predict...') print('prepare to predict...')
predict_ner_reader = palm.reader.SequenceLabelReader(vocab_path, max_seqlen, label_map, phase='predict') predict_seq_label_reader = palm.reader.SequenceLabelReader(vocab_path, max_seqlen, label_map, phase='predict')
# step 1-2: load the training data # step 1-2: load the training data
predict_ner_reader.load_data(predict_file, batch_size) predict_seq_label_reader.load_data(predict_file, batch_size)
# step 2: create a backbone of the model to extract text features # step 2: create a backbone of the model to extract text features
pred_ernie = palm.backbone.ERNIE.from_config(config, phase='predict') pred_ernie = palm.backbone.ERNIE.from_config(config, phase='predict')
# step 3: register the backbone in reader # step 3: register the backbone in reader
predict_ner_reader.register_with(pred_ernie) predict_seq_label_reader.register_with(pred_ernie)
# step 4: create the task output head # step 4: create the task output head
ner_pred_head = palm.head.SequenceLabel(num_classes, input_dim, phase='predict') seq_label_pred_head = palm.head.SequenceLabel(num_classes, input_dim, phase='predict')
# step 5: build forward graph with backbone and task head # step 5: build forward graph with backbone and task head
trainer.build_predict_forward(pred_ernie, ner_pred_head) trainer.build_predict_forward(pred_ernie, seq_label_pred_head)
# step 6: load pretrained model # step 6: load pretrained model
pred_model_path = './outputs/ckpt.step' + str(save_steps) pred_model_path = './outputs/ckpt.step' + str(save_steps)
pred_ckpt = trainer.load_ckpt(pred_model_path) pred_ckpt = trainer.load_ckpt(pred_model_path)
# step 7: fit prepared reader and data # step 7: fit prepared reader and data
trainer.fit_reader(predict_ner_reader, phase='predict') trainer.fit_reader(predict_seq_label_reader, phase='predict')
# step 8: predict # step 8: predict
print('predicting..') print('predicting..')
......
...@@ -57,9 +57,9 @@ def yield_pieces(data, distribute_strategy, batch_size): ...@@ -57,9 +57,9 @@ def yield_pieces(data, distribute_strategy, batch_size):
yield temp yield temp
def data_feeder(reader, postprocess_fn=None, prefetch_steps=2): def data_feeder(reader, postprocess_fn=None, prefetch_steps=2, phase='train', is_multi=False):
if postprocess_fn is None: if postprocess_fn is None:
def postprocess_fn(batch): def postprocess_fn(batch, id=-1, phase='train', is_multi=False):
return batch return batch
def worker(reader, dev_count, queue): def worker(reader, dev_count, queue):
...@@ -90,6 +90,10 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2): ...@@ -90,6 +90,10 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
queue.task_done() queue.task_done()
if ret is not None: if ret is not None:
batches, num_pad = ret batches, num_pad = ret
if dev_count > 1 and phase == 'train' and is_multi:
id = batches[0]['__task_id'][0]
else:
id = -1
batch_buf = [] batch_buf = []
flag_buf = [] flag_buf = []
for idx, batch in enumerate(batches): for idx, batch in enumerate(batches):
...@@ -97,8 +101,8 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2): ...@@ -97,8 +101,8 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
flag = idx-len(batches) < -num_pad flag = idx-len(batches) < -num_pad
# if num_pad > 0: # if num_pad > 0:
# num_pad -= 1 # num_pad -= 1
# batch = postprocess_fn(batch, id) batch = postprocess_fn(batch, id, phase, is_multi=is_multi)
batch = postprocess_fn(batch) # batch = postprocess_fn(batch)
batch_buf.append(batch) batch_buf.append(batch)
flag_buf.append(flag) flag_buf.append(flag)
yield batch_buf, flag_buf yield batch_buf, flag_buf
......
...@@ -24,24 +24,21 @@ class MaskLM(Head): ...@@ -24,24 +24,21 @@ class MaskLM(Head):
''' '''
mlm mlm
''' '''
def __init__(self, input_dim, vocab_size, hidden_act, initializer_range, dropout_prob=0.0, \ def __init__(self, input_dim, vocab_size, hidden_act, dropout_prob=0.0, \
param_initializer_range=0.02, phase='train'): param_initializer_range=0.02, phase='train'):
self._is_training = phase == 'train' self._is_training = phase == 'train'
self._emb_size = input_dim self._emb_size = input_dim
self._hidden_size = input_dim self._hidden_size = input_dim
self._dropout_prob = dropout_prob if phase == 'train' else 0.0 self._dropout_prob = dropout_prob if phase == 'train' else 0.0
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=param_initializer_range)
self._preds = [] self._preds = []
self._vocab_size = vocab_size self._vocab_size = vocab_size
self._hidden_act = hidden_act self._hidden_act = hidden_act
self._initializer_range = initializer_range self._initializer_range = param_initializer_range
@property @property
def inputs_attrs(self): def inputs_attrs(self):
reader = { reader = {
"token_ids":[[-1, -1], 'int64'],
"mask_label": [[-1], 'int64'], "mask_label": [[-1], 'int64'],
"mask_pos": [[-1], 'int64'], "mask_pos": [[-1], 'int64'],
} }
...@@ -61,21 +58,19 @@ class MaskLM(Head): ...@@ -61,21 +58,19 @@ class MaskLM(Head):
def build(self, inputs, scope_name=""): def build(self, inputs, scope_name=""):
mask_pos = inputs["reader"]["mask_pos"] mask_pos = inputs["reader"]["mask_pos"]
word_emb = inputs["backbone"]["embedding_table"]
enc_out = inputs["backbone"]["encoder_outputs"]
if self._is_training: if self._is_training:
mask_label = inputs["reader"]["mask_label"] mask_label = inputs["reader"]["mask_label"]
l1 = fluid.layers.shape(inputs["reader"]["token_ids"] )[0] l1 = enc_out.shape[0]
# bxs = inputs["reader"]["token_ids"].shape[2].value l2 = enc_out.shape[1]
l2 = fluid.layers.shape(inputs["reader"]["token_ids"][0])[0] bxs = fluid.layers.fill_constant(shape=[1], value=l1*l2, dtype='int64')
bxs = (l1*l2).astype(np.int64)
# max_position = inputs["reader"]["batchsize_x_seqlen"] - 1
max_position = bxs - 1 max_position = bxs - 1
mask_pos = fluid.layers.elementwise_min(mask_pos, max_position) mask_pos = fluid.layers.elementwise_min(mask_pos, max_position)
mask_pos.stop_gradient = True mask_pos.stop_gradient = True
word_emb = inputs["backbone"]["embedding_table"]
enc_out = inputs["backbone"]["encoder_outputs"]
emb_size = word_emb.shape[-1] emb_size = word_emb.shape[-1]
_param_initializer = fluid.initializer.TruncatedNormal( _param_initializer = fluid.initializer.TruncatedNormal(
...@@ -95,7 +90,7 @@ class MaskLM(Head): ...@@ -95,7 +90,7 @@ class MaskLM(Head):
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=scope_name+'mask_lm_trans_fc.w_0', name=scope_name+'mask_lm_trans_fc.w_0',
initializer=_param_initializer), initializer=_param_initializer),
bias_attr=fluid.ParamAttr(name=scope_name+'mask_lm_trans_fc.b_0')) bias_attr=fluid.ParamAttr(name=scope_name+'mask_lm_trans_fc.b_0'))
# transform: layer norm # transform: layer norm
mask_trans_feat = pre_process_layer( mask_trans_feat = pre_process_layer(
mask_trans_feat, 'n', name=scope_name+'mask_lm_trans') mask_trans_feat, 'n', name=scope_name+'mask_lm_trans')
......
...@@ -5,6 +5,7 @@ from paddlepalm.distribute import gpu_dev_count, cpu_dev_count ...@@ -5,6 +5,7 @@ from paddlepalm.distribute import gpu_dev_count, cpu_dev_count
from paddlepalm import Trainer from paddlepalm import Trainer
from paddlepalm.utils import reader_helper from paddlepalm.utils import reader_helper
import numpy as np import numpy as np
from paddlepalm.distribute import gpu_dev_count, data_feeder, decode_fake
import time import time
dev_count = 1 if gpu_dev_count <= 1 else gpu_dev_count dev_count = 1 if gpu_dev_count <= 1 else gpu_dev_count
...@@ -51,11 +52,12 @@ class MultiHeadTrainer(Trainer): ...@@ -51,11 +52,12 @@ class MultiHeadTrainer(Trainer):
'input_varnames': 'self._pred_input_varname_list', 'input_varnames': 'self._pred_input_varname_list',
'fetch_list': 'self._pred_fetch_name_list'} 'fetch_list': 'self._pred_fetch_name_list'}
self._check_save = lambda: False # self._check_save = lambda: False
for t in self._trainers: for t in self._trainers:
t._set_multitask() t._set_multitask()
def build_forward(self, backbone, heads): # def build_forward(self, backbone, heads):
def build_forward(self):
""" """
Build forward computation graph for training, which usually built from input layer to loss node. Build forward computation graph for training, which usually built from input layer to loss node.
...@@ -66,20 +68,13 @@ class MultiHeadTrainer(Trainer): ...@@ -66,20 +68,13 @@ class MultiHeadTrainer(Trainer):
Return: Return:
- loss_var: a Variable object. The computational graph variable(node) of loss. - loss_var: a Variable object. The computational graph variable(node) of loss.
""" """
head_dict = {}
if isinstance(heads, list): backbone = self._trainers[0]._backbone
head_dict = {k.name: v for k,v in zip(self._trainers, heads)} for i in self._trainers:
elif isinstance(heads, dict): assert i._task_head is not None and i._backbone is not None, "You should build forward for the {} task".format(i._name)
head_dict = heads assert i._backbone == backbone, "The backbone for each task must be the same"
else: head_dict[i._name] = i._task_head
raise ValueError()
num_heads = len(self._trainers)
assert len(head_dict) == num_heads
for t in self._trainers:
assert t.name in head_dict, "expected: {}, exists: {}".format(t.name, head_dict.keys())
train_prog = fluid.Program() train_prog = fluid.Program()
train_init_prog = fluid.Program() train_init_prog = fluid.Program()
self._train_prog = train_prog self._train_prog = train_prog
...@@ -87,27 +82,13 @@ class MultiHeadTrainer(Trainer): ...@@ -87,27 +82,13 @@ class MultiHeadTrainer(Trainer):
def get_loss(i): def get_loss(i):
head = head_dict[self._trainers[i].name] head = head_dict[self._trainers[i].name]
# loss_var = self._trainers[i].build_forward(backbone, head, train_prog, train_init_prog)
loss_var = self._trainers[i].build_forward(backbone, head) loss_var = self._trainers[i].build_forward(backbone, head)
return loss_var return loss_var
# task_fns = {} task_fns = {i: lambda i=i: get_loss(i) for i in range(len(self._trainers))}
# for i in range(num_heads):
# def task_loss():
# task_id = i
# return lambda: get_loss(task_id)
# task_fns[i] = task_loss()
# task_fns = {i: lambda: get_loss(i) for i in range(num_heads)}
task_fns = {i: lambda i=i: get_loss(i) for i in range(num_heads)}
with fluid.program_guard(train_prog, train_init_prog): with fluid.program_guard(train_prog, train_init_prog):
task_id_var = fluid.data(name="__task_id",shape=[1],dtype='int64') task_id_var = fluid.data(name="__task_id",shape=[1],dtype='int64')
# task_id_var = fluid.layers.fill_constant(shape=[1],dtype='int64', value=1)
# print(task_id_var.name)
loss_var = layers.switch_case( loss_var = layers.switch_case(
branch_index=task_id_var, branch_index=task_id_var,
...@@ -200,15 +181,15 @@ class MultiHeadTrainer(Trainer): ...@@ -200,15 +181,15 @@ class MultiHeadTrainer(Trainer):
feed_batch_process_fn = reader_helper.create_feed_batch_process_fn(net_inputs) feed_batch_process_fn = reader_helper.create_feed_batch_process_fn(net_inputs)
if gpu_dev_count > 1: if gpu_dev_count > 1:
distribute_feeder_fn = data_feeder(iterator_fn, feed_batch_process_fn) distribute_feeder_fn = data_feeder(iterator_fn, feed_batch_process_fn, phase=phase, is_multi=True)
else: else:
distribute_feeder_fn = iterator_fn distribute_feeder_fn = iterator_fn()
if phase == 'train': if phase == 'train':
self._train_reader = distribute_feeder_fn() self._train_reader = distribute_feeder_fn
self._feed_batch_process_fn = feed_batch_process_fn self._feed_batch_process_fn = feed_batch_process_fn
elif phase == 'predict': elif phase == 'predict':
self._predict_reader = distribute_feeder_fn() self._predict_reader = distribute_feeder_fn
self._pred_feed_batch_process_fn = feed_batch_process_fn self._pred_feed_batch_process_fn = feed_batch_process_fn
def _check_finish(self, task_name, silent=False): def _check_finish(self, task_name, silent=False):
...@@ -241,7 +222,6 @@ class MultiHeadTrainer(Trainer): ...@@ -241,7 +222,6 @@ class MultiHeadTrainer(Trainer):
task_rt_outputs = {k[len(self._trainers[task_id].name+'.'):]: v for k,v in rt_outputs.items() if k.startswith(self._trainers[task_id].name+'.')} task_rt_outputs = {k[len(self._trainers[task_id].name+'.'):]: v for k,v in rt_outputs.items() if k.startswith(self._trainers[task_id].name+'.')}
self._trainers[task_id]._task_head.batch_postprocess(task_rt_outputs) self._trainers[task_id]._task_head.batch_postprocess(task_rt_outputs)
if print_steps > 0 and self._cur_train_step % print_steps == 0: if print_steps > 0 and self._cur_train_step % print_steps == 0:
loss = rt_outputs[self._trainers[task_id].name+'.loss'] loss = rt_outputs[self._trainers[task_id].name+'.loss']
loss = np.mean(np.squeeze(loss)).tolist() loss = np.mean(np.squeeze(loss)).tolist()
...@@ -256,7 +236,7 @@ class MultiHeadTrainer(Trainer): ...@@ -256,7 +236,7 @@ class MultiHeadTrainer(Trainer):
loss, print_steps / time_cost)) loss, print_steps / time_cost))
time_begin = time.time() time_begin = time.time()
self._check_save() # self._check_save()
finish = self._check_finish(self._trainers[task_id].name) finish = self._check_finish(self._trainers[task_id].name)
if finish: if finish:
break break
...@@ -276,8 +256,8 @@ class MultiHeadTrainer(Trainer): ...@@ -276,8 +256,8 @@ class MultiHeadTrainer(Trainer):
def train_one_step(self, batch): def train_one_step(self, batch):
if dev_count > 1: if dev_count > 1:
assert isinstance(batch, list) assert isinstance(batch, tuple)
task_id = batch[0]['__task_id'][0] task_id = batch[0][0]['__task_id'][0]
else: else:
assert isinstance(batch, dict) assert isinstance(batch, dict)
task_id = batch['__task_id'][0] task_id = batch['__task_id'][0]
...@@ -286,7 +266,7 @@ class MultiHeadTrainer(Trainer): ...@@ -286,7 +266,7 @@ class MultiHeadTrainer(Trainer):
rt_outputs = self._trainers[task_id].train_one_step(batch) rt_outputs = self._trainers[task_id].train_one_step(batch)
self._cur_train_step += 1 self._cur_train_step += 1
self._check_save() # self._check_save()
return rt_outputs, task_id return rt_outputs, task_id
# if dev_count > 1: # if dev_count > 1:
......
...@@ -34,7 +34,6 @@ class MaskLMReader(Reader): ...@@ -34,7 +34,6 @@ class MaskLMReader(Reader):
for_cn = lang.lower() == 'cn' or lang.lower() == 'chinese' for_cn = lang.lower() == 'cn' or lang.lower() == 'chinese'
self._register.add('token_ids')
self._register.add('mask_pos') self._register.add('mask_pos')
if phase == 'train': if phase == 'train':
self._register.add('mask_label') self._register.add('mask_label')
......
...@@ -46,7 +46,7 @@ class Trainer(object): ...@@ -46,7 +46,7 @@ class Trainer(object):
self._pred_reader = None self._pred_reader = None
self._task_head = None self._task_head = None
self._pred_head = None self._pred_head = None
self._train_reader = None self._train_reader = None
self._predict_reader = None self._predict_reader = None
self._train_iterator = None self._train_iterator = None
...@@ -54,6 +54,8 @@ class Trainer(object): ...@@ -54,6 +54,8 @@ class Trainer(object):
self._train_init = False self._train_init = False
self._predict_init = False self._predict_init = False
self._train_init_prog = None
self._pred_init_prog = None
self._check_save = lambda: False self._check_save = lambda: False
...@@ -415,7 +417,7 @@ class Trainer(object): ...@@ -415,7 +417,7 @@ class Trainer(object):
self._raw_iterator_fn = iterator_fn self._raw_iterator_fn = iterator_fn
feed_batch_process_fn = reader_helper.create_feed_batch_process_fn(net_inputs) feed_batch_process_fn = reader_helper.create_feed_batch_process_fn(net_inputs)
if gpu_dev_count > 1: if gpu_dev_count > 1:
distribute_feeder_fn = data_feeder(iterator_fn, feed_batch_process_fn) distribute_feeder_fn = data_feeder(iterator_fn, feed_batch_process_fn, phase=phase)
else: else:
distribute_feeder_fn = iterator_fn() distribute_feeder_fn = iterator_fn()
...@@ -427,6 +429,7 @@ class Trainer(object): ...@@ -427,6 +429,7 @@ class Trainer(object):
self._pred_feed_batch_process_fn = feed_batch_process_fn self._pred_feed_batch_process_fn = feed_batch_process_fn
# return distribute_feeder_fn() # return distribute_feeder_fn()
def load_ckpt(self, model_path): def load_ckpt(self, model_path):
""" """
load training checkpoint for further training or predicting. load training checkpoint for further training or predicting.
...@@ -465,7 +468,7 @@ class Trainer(object): ...@@ -465,7 +468,7 @@ class Trainer(object):
strict=True) strict=True)
else: else:
raise Exception("model not found. You should at least build_forward or build_predict_forward to load its checkpoint.") raise Exception("model not found. You should at least build_forward or build_predict_forward to load its checkpoint.")
def load_predict_model(self, model_path, convert=False): def load_predict_model(self, model_path, convert=False):
""" """
load pretrain models(backbone) for training. load pretrain models(backbone) for training.
...@@ -500,7 +503,7 @@ class Trainer(object): ...@@ -500,7 +503,7 @@ class Trainer(object):
convert=convert, convert=convert,
main_program=self._train_init_prog) main_program=self._train_init_prog)
def set_saver(self, save_path, save_steps, save_type='ckpt'): def set_saver(self, save_path, save_steps, save_type='ckpt', is_multi=False):
""" """
create a build-in saver into trainer. A saver will automatically save checkpoint or predict model every `save_steps` training steps. create a build-in saver into trainer. A saver will automatically save checkpoint or predict model every `save_steps` training steps.
...@@ -510,6 +513,7 @@ class Trainer(object): ...@@ -510,6 +513,7 @@ class Trainer(object):
save_type: a string. The type of saved model. Currently support checkpoint(ckpt) and predict model(predict), default is ckpt. If both two types are needed to save, you can set as "ckpt,predict". save_type: a string. The type of saved model. Currently support checkpoint(ckpt) and predict model(predict), default is ckpt. If both two types are needed to save, you can set as "ckpt,predict".
""" """
save_type = save_type.split(',') save_type = save_type.split(',')
if 'predict' in save_type: if 'predict' in save_type:
...@@ -534,12 +538,21 @@ class Trainer(object): ...@@ -534,12 +538,21 @@ class Trainer(object):
def temp_func(): def temp_func():
if (self._save_predict or self._save_ckpt) and self._cur_train_step % save_steps == 0: if (self._save_predict or self._save_ckpt) and self._cur_train_step % save_steps == 0:
if self._save_predict: if self._save_predict:
self._save(save_path, suffix='pred.step'+str(self._cur_train_step)) if is_multi:
print('predict model has been saved at '+os.path.join(save_path, 'pred.step'+str(self._cur_train_step))) self._save(save_path, suffix='-pred.step'+str(self._cur_train_step))
print('predict model has been saved at '+os.path.join(save_path, 'pred.step'+str(self._cur_train_step)))
else:
self._save(save_path, suffix='pred.step'+str(self._cur_train_step))
print('predict model has been saved at '+os.path.join(save_path, 'pred.step'+str(self._cur_train_step)))
if self._save_ckpt: if self._save_ckpt:
fluid.io.save_persistables(self._exe, os.path.join(save_path, 'ckpt.step'+str(self._cur_train_step)), self._train_prog) if is_multi:
print('checkpoint has been saved at '+os.path.join(save_path, 'ckpt.step'+str(self._cur_train_step))) fluid.io.save_persistables(self._exe, os.path.join(save_path, 'ckpt.step'+str(self._cur_train_step)), self._train_prog)
print('checkpoint has been saved at '+os.path.join(save_path, 'ckpt.step'+str(self._cur_train_step)))
else:
fluid.io.save_persistables(self._exe, os.path.join(save_path, 'ckpt.step'+str(self._cur_train_step)), self._train_prog)
print('checkpoint has been saved at '+os.path.join(save_path, 'ckpt.step'+str(self._cur_train_step)))
return True return True
else: else:
return False return False
...@@ -600,7 +613,7 @@ class Trainer(object): ...@@ -600,7 +613,7 @@ class Trainer(object):
(self._cur_train_step-1) % self._steps_pur_epoch + 1 , self._steps_pur_epoch, self._cur_train_epoch, (self._cur_train_step-1) % self._steps_pur_epoch + 1 , self._steps_pur_epoch, self._cur_train_epoch,
loss, print_steps / time_cost)) loss, print_steps / time_cost))
time_begin = time.time() time_begin = time.time()
self._check_save() # self._check_save()
# if cur_task.train_finish and cur_task.cur_train_step + cur_task.cur_train_epoch * cur_task.steps_pur_epoch == cur_task.expected_train_steps: # if cur_task.train_finish and cur_task.cur_train_step + cur_task.cur_train_epoch * cur_task.steps_pur_epoch == cur_task.expected_train_steps:
# print(cur_task.name+': train finished!') # print(cur_task.name+': train finished!')
# cur_task.save() # cur_task.save()
...@@ -718,15 +731,16 @@ class Trainer(object): ...@@ -718,15 +731,16 @@ class Trainer(object):
feed, mask = batch feed, mask = batch
rt_outputs = exe.run(distribute_train_prog, feed=feed, fetch_list=fetch_list) rt_outputs = exe.run(distribute_train_prog, feed=feed, fetch_list=fetch_list)
num_fakes = decode_fake(len(rt_outputs[0]), mask, self._train_batch_size) num_fakes = decode_fake(len(rt_outputs[0]), mask, self._train_batch_size)
for _ in range(num_fakes): if num_fakes:
for item in rt_outputs: rt_outputs = [i[:-num_fakes] for i in rt_outputs]
item.pop()
else: else:
feed = self._feed_batch_process_fn(batch) feed = self._feed_batch_process_fn(batch)
rt_outputs = exe.run(distribute_train_prog, feed=feed, fetch_list=fetch_list) rt_outputs = exe.run(distribute_train_prog, feed=feed, fetch_list=fetch_list)
rt_outputs = {k:v for k,v in zip(self._fetch_names, rt_outputs)} rt_outputs = {k:v for k,v in zip(self._fetch_names, rt_outputs)}
self._cur_train_step += 1 self._cur_train_step += 1
self._check_save()
self._cur_train_epoch = (self._cur_train_step-1) // self._steps_pur_epoch self._cur_train_epoch = (self._cur_train_step-1) // self._steps_pur_epoch
return rt_outputs return rt_outputs
...@@ -735,9 +749,8 @@ class Trainer(object): ...@@ -735,9 +749,8 @@ class Trainer(object):
feed, mask = batch feed, mask = batch
rt_outputs = self._exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._pred_fetch_list) rt_outputs = self._exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._pred_fetch_list)
num_fakes = decode_fake(len(rt_outputs[0]), mask, self._predict_batch_size) num_fakes = decode_fake(len(rt_outputs[0]), mask, self._predict_batch_size)
for _ in range(num_fakes): if num_fakes:
for item in rt_outputs: rt_outputs = [i[:-num_fakes] for i in rt_outputs]
item.pop()
else: else:
feed = self._pred_feed_batch_process_fn(batch) feed = self._pred_feed_batch_process_fn(batch)
rt_outputs = self._exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._pred_fetch_list) rt_outputs = self._exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._pred_fetch_list)
...@@ -750,7 +763,7 @@ class Trainer(object): ...@@ -750,7 +763,7 @@ class Trainer(object):
@property @property
def name(self): def name(self):
return self._name return self._name
@property @property
def num_examples(self): def num_examples(self):
return self._num_examples return self._num_examples
......
...@@ -21,13 +21,20 @@ import numpy as np ...@@ -21,13 +21,20 @@ import numpy as np
import paddle import paddle
from paddle import fluid from paddle import fluid
from paddle.fluid import layers from paddle.fluid import layers
from paddlepalm.distribute import gpu_dev_count, cpu_dev_count
dev_count = 1 if gpu_dev_count <= 1 else gpu_dev_count
def create_feed_batch_process_fn(net_inputs): def create_feed_batch_process_fn(net_inputs):
def feed_batch_process_fn(data): def feed_batch_process_fn(data, id=-1, phase='train', is_multi=False):
temp = {} temp = {}
for q, var in net_inputs.items(): if dev_count > 1 and phase=='train' and is_multi:
inputs = net_inputs[id]
else:
inputs= net_inputs
for q, var in inputs.items():
if isinstance(var, str) or isinstance(var, unicode): if isinstance(var, str) or isinstance(var, unicode):
temp[var] = data[q] temp[var] = data[q]
else: else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册