未验证 提交 7b52bcc9 编写于 作者: N nbcc 提交者: GitHub

Merge pull request #663 from zhanghan1992/develop

change the path of ernie-gram and fix mrc
......@@ -36,7 +36,7 @@ wget https://ernie-github.cdn.bcebos.com/data-xnli.tar.gz
tar xf data-xnli.tar.gz
cd ..
#demo for NLI task
sh ernie_gram/run_cls.sh ernie_gram/task_configs/xnli_conf
sh run_cls.sh task_configs/xnli_conf
```
### Setup
......
......@@ -38,7 +38,7 @@ wget https://ernie-github.cdn.bcebos.com/data-xnli.tar.gz
tar xf data-xnli.tar.gz
cd ..
#demo for NLI task
sh ernie_gram/run_cls.sh ernie_gram/task_configs/xnli_conf
sh run_cls.sh task_configs/xnli_conf
```
......
......@@ -23,6 +23,8 @@ from functools import reduce, partial
import numpy as np
import logging
#from visualdl import LogWriter
import sys
sys.path.append("../")
from pathlib import Path
import paddle as P
......@@ -32,8 +34,8 @@ import propeller.paddle as propeller
#from model.bert import BertConfig, BertModelLayer
from ernie.modeling_ernie import ErnieModel, ErnieModelForSequenceClassification
from ernie.tokenizing_ernie import ErnieTokenizer, ErnieTinyTokenizer
from ernie_gram.optimization import AdamW
from ernie_gram.utils import create_if_not_exists, get_warmup_and_linear_decay
from optimization import AdamW
from utils import create_if_not_exists, get_warmup_and_linear_decay
log.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
......
......@@ -30,6 +30,8 @@ import pickle
import argparse
from functools import partial
from io import open
import sys
sys.path.append("../")
import numpy as np
import logging
......@@ -38,22 +40,23 @@ import paddle as P
from propeller import log
import propeller.paddle as propeller
from ernie_gram.optimization import AdamW
from optimization import AdamW
from ernie.modeling_ernie import ErnieModel, ErnieModelForQuestionAnswering
from ernie.tokenizing_ernie import ErnieTokenizer, ErnieTinyTokenizer
#from ernie.optimization import AdamW, LinearDecay
from ernie_gram.mrc import mrc_reader
from ernie_gram.mrc import mrc_metrics
from ernie_gram.utils import create_if_not_exists, get_warmup_and_linear_decay
from mrc import mrc_reader
from mrc import mrc_metrics
from utils import create_if_not_exists, get_warmup_and_linear_decay
log.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
def evaluate(model, ds, all_examples, all_features, tokenizer, args):
dev_file = json.loads(open(args.dev_file, encoding='utf8').read())
def evaluate(model, ds, all_examples, all_features, tokenizer, args, is_test=False):
dev_file = args.dev_file if not is_test else args.test_file
dev_file = json.loads(open(dev_file, encoding='utf8').read())
with P.no_grad():
log.debug('start eval')
model.eval()
......@@ -84,8 +87,8 @@ def evaluate(model, ds, all_examples, all_features, tokenizer, args):
return f1, em
def train(model, train_dataset, dev_dataset, dev_examples, dev_features,
tokenizer, args):
def train(model, train_dataset, dev_dataset, dev_examples, dev_features,
tokenizer, args, test_dataset=None, test_examples=None, test_features=None, do_test=False):
model = P.DataParallel(model)
max_steps = args.max_steps
......@@ -142,10 +145,14 @@ def train(model, train_dataset, dev_dataset, dev_examples, dev_features,
log.debug(msg)
if env.dev_id == 0 and step % 100==0 and step:
print(step)
f1, em = evaluate(model, dev_dataset, dev_examples,
dev_features, tokenizer, args)
log.debug('[step %d] eval result: f1 %.5f em %.5f' %
log.debug('[step %d] dev eval result: f1 %.5f em %.5f' %
(step, f1, em))
if do_test:
f1, em = evaluate(model, test_dataset, test_examples,
test_features, tokenizer, args, True)
log.debug('[step %d] test eval result: f1 %.5f em %.5f' %
(step, f1, em))
if env.dev_id == 0 and args.save_dir is not None:
P.save(model.state_dict(), args.save_dir / 'ckpt.bin')
......@@ -177,7 +184,12 @@ if __name__ == "__main__":
type=str,
required=True,
help='data directory includes train / develop data')
parser.add_argument('--warmup_proportion', type=float, default=0.0)
parser.add_argument(
'--test_file',
type=str,
default=None,
help='data directory includes train / develop data')
parser.add_argument('--warmup_proportion', type=float, default=0.1)
parser.add_argument('--lr', type=float, default=3e-5, help='learning rate')
parser.add_argument(
'--save_dir', type=Path, required=True, help='model output directory')
......@@ -216,6 +228,10 @@ if __name__ == "__main__":
dev_examples = mrc_reader.read_files(args.dev_file, is_training=False)
dev_features = mrc_reader.convert_example_to_features(
dev_examples, args.max_seqlen, tokenizer, is_training=False)
if args.test_file:
test_examples = mrc_reader.read_files(args.test_file, is_training=False)
test_features = mrc_reader.convert_example_to_features(
test_examples, args.max_seqlen, tokenizer, is_training=False)
log.info('train examples: %d, features: %d' %
(len(train_examples), len(train_features)))
......@@ -235,16 +251,28 @@ if __name__ == "__main__":
dev_dataset = propeller.data.Dataset.from_list(dev_features).map(
map_fn).padded_batch(args.bsz)
model = ErnieModelForQuestionAnswering.from_pretrained(
args.from_pretrained, name='')
train(model, train_dataset, dev_dataset, dev_examples, dev_features,
if args.test_file:
test_dataset = propeller.data.Dataset.from_list(test_features).map(
map_fn).padded_batch(args.bsz)
train(model, train_dataset, dev_dataset, dev_examples, dev_features,
tokenizer, args, test_dataset, test_examples, test_features, True)
else:
train(model, train_dataset, dev_dataset, dev_examples, dev_features,
tokenizer, args)
if env.dev_id == 0:
f1, em = evaluate(model, dev_dataset, dev_examples, dev_features,
tokenizer, args)
log.debug('final eval result: f1 %.5f em %.5f' % (f1, em))
log.debug('final dev eval result: f1 %.5f em %.5f' % (f1, em))
if args.test_file:
f1, em = evaluate(model, test_dataset, test_examples, test_features,
tokenizer, args, True)
log.debug('final test eval result: f1 %.5f em %.5f' % (f1, em))
if env.dev_id == 0 and args.save_dir is not None:
P.save(model.state_dict(), args.save_dir / 'ckpt.bin')
......@@ -29,6 +29,8 @@ import numpy as np
import multiprocessing
import pickle
import logging
import sys
sys.path.append("../")
from sklearn.metrics import f1_score
import paddle as P
......@@ -39,10 +41,10 @@ import propeller.paddle as propeller
log.setLevel(logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)
from ernie_gram.utils import create_if_not_exists, get_warmup_and_linear_decay
from utils import create_if_not_exists, get_warmup_and_linear_decay
from ernie.modeling_ernie import ErnieModel, ErnieModelForSequenceClassification, ErnieModelForTokenClassification
from ernie.tokenizing_ernie import ErnieTokenizer
from ernie_gram.optimization import AdamW
from optimization import AdamW
parser = propeller.ArgumentParser('NER model with ERNIE')
parser.add_argument('--max_seqlen', type=int, default=256)
......
......@@ -349,7 +349,10 @@ def make_results(vocab, all_examples, all_features, all_results, n_best_size,
unique_id_to_result = {}
for result in all_results:
unique_id_to_result[result.unique_id] = result
try:
unique_id_to_result[result.unique_id] = result
except:
continue
_PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
"PrelimPrediction", [
......
......@@ -25,11 +25,13 @@ from paddle.fluid.framework import Variable, default_main_program
import numpy as np
import paddle as P
import paddle.distributed.fleet as fleet
import sys
sys.path.append("../")
from propeller.paddle.train.hooks import RunHook
import paddle.fluid as F
log = logging.getLogger(__name__)
from ernie_gram.utils import create_if_not_exists, get_warmup_and_linear_decay
from utils import create_if_not_exists, get_warmup_and_linear_decay
class AdamW(P.optimizer.AdamW):
"""AdamW object for dygraph"""
......
source $1
python3 -m paddle.distributed.launch ./ernie_gram/finetune_classifier_distributed.py \
python3 -m paddle.distributed.launch ./finetune_classifier_distributed.py \
--data_dir $data_dir \
--max_steps $max_steps \
--bsz $bsz \
......
source $1
export CUDA_VISIBLE_DEVICES=0
python3 -m paddle.distributed.launch ./ernie_gram/finetune_mrc.py \
python3 -m paddle.distributed.launch ./finetune_mrc.py \
--train_file $train_file \
--dev_file $dev_file \
--max_steps $max_steps \
......
source $1
python3 -m paddle.distributed.launch ./ernie_gram/finetune_ner.py \
python3 -m paddle.distributed.launch ./finetune_ner.py \
--data_dir $data_dir \
--max_steps $max_steps \
--epoch $epoch \
......
......@@ -68,12 +68,9 @@ def _fetch_from_remote(url,
f = done_file.open('wb')
f.close()
else:
while True:
if done_file.exists():
break
else:
time.sleep(1)
while not done_file.exists():
time.sleep(1)
log.debug('%s cached in %s' % (url, cached_dir))
return cached_dir_model
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册