From 65c1f0b99bec27c27b04f64bc0b4f4f8b5a46492 Mon Sep 17 00:00:00 2001 From: kinghuin Date: Tue, 15 Dec 2020 21:28:52 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96ernie=20gen=E6=96=87=E6=A1=A3?= =?UTF-8?q?=20(#5059)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add ernie_gen * optimize ernie_gen * optimize ernie_gen * optimize ernie_gen code * fix crf bug * add ernie_gen __init__.py * modify nlp version * fix ernie_gen predict * optimize doc --- PaddleNLP/docs/transformers.md | 4 ++-- PaddleNLP/examples/lexical_analysis/train.py | 2 +- PaddleNLP/examples/text_generation/ernie-gen/README.md | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/PaddleNLP/docs/transformers.md b/PaddleNLP/docs/transformers.md index 91f34754..98a5b966 100644 --- a/PaddleNLP/docs/transformers.md +++ b/PaddleNLP/docs/transformers.md @@ -10,12 +10,12 @@ | Model | Tokenizer| Supported Task| Pretrained Weight| |---|---|---|---| | [BERT](https://arxiv.org/abs/1810.04805) | BertTokenizer|BertModel
BertForQuestionAnswering
BertForSequenceClassification
BertForTokenClassification| `bert-base-uncased`
`bert-large-uncased`
`bert-base-multilingual-uncased`
`bert-base-cased`
`bert-base-chinese`
`bert-base-multilingual-cased`
`bert-large-cased`
`bert-wwm-chinese`
`bert-wwm-ext-chinese` | -|[ERNIE](https://arxiv.org/abs/1904.09223)|ErnieTokenizer
ErnieTinyTokenizer|ErnieModel
ErnieForQuestionAnswering
ErnieForSequenceClassification
ErnieForTokenClassification| `ernie-1.0`
`ernie-tiny`
`ernie-2.0-en`
`ernie-2.0-large-en`| +|[ERNIE](https://arxiv.org/abs/1904.09223)|ErnieTokenizer
ErnieTinyTokenizer|ErnieModel
ErnieForQuestionAnswering
ErnieForSequenceClassification
ErnieForTokenClassification
ErnieForGeneration| `ernie-1.0`
`ernie-tiny`
`ernie-2.0-en`
`ernie-2.0-large-en`
`ernie-gen-base-en`
`ernie-gen-large-en`
`ernie-gen-large-en-430g`| |[RoBERTa](https://arxiv.org/abs/1907.11692)|RobertaTokenizer| RobertaModel
RobertaForQuestionAnswering
RobertaForSequenceClassification
RobertaForTokenClassification| `roberta-wwm-ext`
`roberta-wwm-ext-large`
`rbt3`
`rbtl3`| |[ELECTRA](https://arxiv.org/abs/2003.10555) |ElectraTokenizer| ElectraModel
ElectraForSequenceClassification
ElectraForTokenClassification
|`electra-small`
`electra-base`
`electra-large`
`chinese-electra-small`
`chinese-electra-base`
| |[Transformer](https://arxiv.org/abs/1706.03762) |- | TransformerModel | - | -注:其中中文的预训练模型有 `bert-base-chinese, bert-wwm-chinese, bert-wwm-ext-chinese, ernie-1.0, ernie-tiny, roberta-wwm-ext, roberta-wwm-ext-large, rbt3, rbtl3, chinese-electra-base, chinese-electra-small`。 +注:其中中文的预训练模型有 `bert-base-chinese, bert-wwm-chinese, bert-wwm-ext-chinese, ernie-1.0, ernie-tiny, roberta-wwm-ext, roberta-wwm-ext-large, rbt3, rbtl3, chinese-electra-base, chinese-electra-small`。生成模型`ernie-gen-base-en, ernie-gen-large-en, ernie-gen-large-en-430g`仅支持`ErnieForGeneration`任务。 ## 预训练模型使用方法 diff --git a/PaddleNLP/examples/lexical_analysis/train.py b/PaddleNLP/examples/lexical_analysis/train.py index 1bb08d5b..1adecda6 100644 --- a/PaddleNLP/examples/lexical_analysis/train.py +++ b/PaddleNLP/examples/lexical_analysis/train.py @@ -96,7 +96,7 @@ def train(args): crf_loss = LinearChainCrfLoss(network.crf.transitions) chunk_evaluator = ChunkEvaluator( int(math.ceil((train_dataset.num_labels + 1) / 2.0)), - "IOB") # + 1 for SOS and EOS + "IOB") # + 1 for START and STOP model.prepare(optimizer, crf_loss, chunk_evaluator) if args.init_checkpoint: model.load(args.init_checkpoint) diff --git a/PaddleNLP/examples/text_generation/ernie-gen/README.md b/PaddleNLP/examples/text_generation/ernie-gen/README.md index af31ffa4..8d910e5b 100644 --- a/PaddleNLP/examples/text_generation/ernie-gen/README.md +++ b/PaddleNLP/examples/text_generation/ernie-gen/README.md @@ -59,7 +59,7 @@ python -u ./train.py \ ``` 参数释义如下: -- `model_name_or_path` 指示了某种特定配置的模型,对应有其预训练模型和预训练时使用的 tokenizer。若模型相关内容保存在本地,这里也可以提供相应目录地址。 +- `model_name_or_path` 指示了某种特定配置的模型,对应有其预训练模型和预训练时使用的 tokenizer,支持[PaadleNLP transformer类预训练模型](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/docs/transformers.md)中的所有模型,但只有`ernie-gen-base-en, ernie-gen-large-en, ernie-gen-large-en-430g`三种模型会加载最后输出层的参数,其余模型只会加载transformer参数作热启动。若模型相关内容保存在本地,这里也可以提供相应目录地址。 - `max_encode_len` 表示最大输入句子长度,超过该长度将被截断。 - `max_decode_len` 表示最大输出句子长度,超过该长度将被截断。 - `batch_size` 表示每次迭代**每张卡**上的样本数目。 @@ -86,7 +86,7 @@ python -u ./eval.py \ ``` 参数释义如下: -- `model_name_or_path` 指示了某种特定配置的模型,对应有其预训练模型和预训练时使用的 tokenizer。若模型相关内容保存在本地,这里也可以提供相应目录地址。 +- `model_name_or_path` 指示了某种特定配置的模型,对应有其预训练模型和预训练时使用的 tokenizer,支持[PaadleNLP transformer类预训练模型](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/docs/transformers.md)中的所有模型,但只有`ernie-gen-base-en, ernie-gen-large-en, ernie-gen-large-en-430g`三种模型会加载最后输出层的参数,其余模型只会加载transformer参数作热启动。若模型相关内容保存在本地,这里也可以提供相应目录地址。 - `max_encode_len` 表示最大输入句子长度,超过该长度将被截断。 - `max_decode_len` 表示最大输出句子长度,超过该长度将被截断。 - `batch_size` 表示每次迭代**每张卡**上的样本数目。 -- GitLab