diff --git a/modules/text/language_model/rbt3/README.md b/modules/text/language_model/rbt3/README.md index 560f78e26f9ac09c8bd937115f8e147a17c24492..641b5089be82462ba3c4c2a0bba3973b8d17cb0f 100644 --- a/modules/text/language_model/rbt3/README.md +++ b/modules/text/language_model/rbt3/README.md @@ -1,5 +1,5 @@ ```shell -$ hub install rtb3==2.0.1 +$ hub install rtb3==2.0.2 ```


@@ -85,7 +85,7 @@ label_map = {0: 'negative', 1: 'positive'} model = hub.Module( name='rtb3', - version='2.0.1', + version='2.0.2', task='seq-cls', load_checkpoint='/path/to/parameters', label_map=label_map) @@ -163,3 +163,7 @@ paddlehub >= 2.0.0 * 2.0.1 增加文本匹配任务`text-matching` + +* 2.0.2 + + 更新预训练模型调用方法 diff --git a/modules/text/language_model/rbt3/module.py b/modules/text/language_model/rbt3/module.py index 1fdde350ae46489b99f3072a816d35f630fee042..6ef8b7e030380f9a0e041a7e5bcc3226e58d0588 100644 --- a/modules/text/language_model/rbt3/module.py +++ b/modules/text/language_model/rbt3/module.py @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict -import os import math +import os +from typing import Dict import paddle import paddle.nn as nn import paddle.nn.functional as F - -from paddlenlp.transformers.roberta.modeling import RobertaForSequenceClassification, RobertaForTokenClassification, RobertaModel -from paddlenlp.transformers.roberta.tokenizer import RobertaTokenizer from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers import AutoModel +from paddlenlp.transformers import AutoModelForSequenceClassification +from paddlenlp.transformers import AutoModelForTokenClassification +from paddlenlp.transformers import AutoTokenizer + from paddlehub.module.module import moduleinfo from paddlehub.module.nlp_module import TransformerModule from paddlehub.utils.log import logger @@ -29,7 +31,7 @@ from paddlehub.utils.log import logger @moduleinfo( name="rbt3", - version="2.0.1", + version="2.0.2", summary="rbt3, 3-layer, 768-hidden, 12-heads, 38M parameters ", author="ymcui", author_email="ymcui@ir.hit.edu.cn", @@ -42,13 +44,13 @@ class Roberta(nn.Layer): """ def __init__( - self, - task: str = None, - load_checkpoint: str = None, - label_map: Dict = None, - num_classes: int = 2, - suffix: bool = False, - **kwargs, + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, ): super(Roberta, self).__init__() if label_map: @@ -63,23 +65,26 @@ class Roberta(nn.Layer): "current task name 'sequence_classification' was renamed to 'seq-cls', " "'sequence_classification' has been deprecated and will be removed in the future.", ) if task == 'seq-cls': - self.model = RobertaForSequenceClassification.from_pretrained( - pretrained_model_name_or_path='rbt3', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path='hfl/rbt3', + num_classes=self.num_classes, + **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task == 'token-cls': - self.model = RobertaForTokenClassification.from_pretrained( - pretrained_model_name_or_path='rbt3', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForTokenClassification.from_pretrained(pretrained_model_name_or_path='hfl/rbt3', + num_classes=self.num_classes, + **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) elif task == 'text-matching': - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbt3', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/rbt3', **kwargs) self.dropout = paddle.nn.Dropout(0.1) self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task is None: - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbt3', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/rbt3', **kwargs) else: raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) @@ -171,4 +176,4 @@ class Roberta(nn.Layer): """ Gets the tokenizer that is customized for this module. """ - return RobertaTokenizer.from_pretrained(pretrained_model_name_or_path='rbt3', *args, **kwargs) + return AutoTokenizer.from_pretrained(pretrained_model_name_or_path='hfl/rbt3', *args, **kwargs) diff --git a/modules/text/language_model/rbtl3/README.md b/modules/text/language_model/rbtl3/README.md index c61df18d216efc47c2fd5f3dbb990722a398b0e1..8bcda290548e20d6b7d2bf99ff294514ba36f634 100644 --- a/modules/text/language_model/rbtl3/README.md +++ b/modules/text/language_model/rbtl3/README.md @@ -1,5 +1,5 @@ ```shell -$ hub install rbtl3==2.0.1 +$ hub install rbtl3==2.0.2 ```


@@ -85,7 +85,7 @@ label_map = {0: 'negative', 1: 'positive'} model = hub.Module( name='rbtl3', - version='2.0.1', + version='2.0.2', task='seq-cls', load_checkpoint='/path/to/parameters', label_map=label_map) @@ -163,3 +163,7 @@ paddlehub >= 2.0.0 * 2.0.1 增加文本匹配任务`text-matching` + +* 2.0.2 + + 更新预训练模型调用方法 diff --git a/modules/text/language_model/rbtl3/module.py b/modules/text/language_model/rbtl3/module.py index d5789099dc1c445ab084f477c4f29d59d0716121..bab919f10a292d3d7415cc922a4ad7df60f19da1 100644 --- a/modules/text/language_model/rbtl3/module.py +++ b/modules/text/language_model/rbtl3/module.py @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict -import os import math +import os +from typing import Dict import paddle import paddle.nn as nn import paddle.nn.functional as F - -from paddlenlp.transformers.roberta.modeling import RobertaForSequenceClassification, RobertaForTokenClassification, RobertaModel -from paddlenlp.transformers.roberta.tokenizer import RobertaTokenizer from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers import AutoModel +from paddlenlp.transformers import AutoModelForSequenceClassification +from paddlenlp.transformers import AutoModelForTokenClassification +from paddlenlp.transformers import AutoTokenizer + from paddlehub.module.module import moduleinfo from paddlehub.module.nlp_module import TransformerModule from paddlehub.utils.log import logger @@ -29,7 +31,7 @@ from paddlehub.utils.log import logger @moduleinfo( name="rbtl3", - version="2.0.1", + version="2.0.2", summary="rbtl3, 3-layer, 1024-hidden, 16-heads, 61M parameters ", author="ymcui", author_email="ymcui@ir.hit.edu.cn", @@ -42,13 +44,13 @@ class Roberta(nn.Layer): """ def __init__( - self, - task: str = None, - load_checkpoint: str = None, - label_map: Dict = None, - num_classes: int = 2, - suffix: bool = False, - **kwargs, + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, ): super(Roberta, self).__init__() if label_map: @@ -63,23 +65,26 @@ class Roberta(nn.Layer): "current task name 'sequence_classification' was renamed to 'seq-cls', " "'sequence_classification' has been deprecated and will be removed in the future.", ) if task == 'seq-cls': - self.model = RobertaForSequenceClassification.from_pretrained( - pretrained_model_name_or_path='rbtl3', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForSequenceClassification.from_pretrained(pretrained_model_name_or_path='hfl/rbtl3', + num_classes=self.num_classes, + **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task == 'token-cls': - self.model = RobertaForTokenClassification.from_pretrained( - pretrained_model_name_or_path='rbtl3', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForTokenClassification.from_pretrained(pretrained_model_name_or_path='hfl/rbtl3', + num_classes=self.num_classes, + **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) elif task == 'text-matching': - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbtl3', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/rbtl3', **kwargs) self.dropout = paddle.nn.Dropout(0.1) self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task is None: - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbtl3', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/rbtl3', **kwargs) else: raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) @@ -171,4 +176,4 @@ class Roberta(nn.Layer): """ Gets the tokenizer that is customized for this module. """ - return RobertaTokenizer.from_pretrained(pretrained_model_name_or_path='rbtl3', *args, **kwargs) + return AutoTokenizer.from_pretrained(pretrained_model_name_or_path='hfl/rbtl3', *args, **kwargs) diff --git a/modules/text/language_model/roberta-wwm-ext-large/README.md b/modules/text/language_model/roberta-wwm-ext-large/README.md index d5c5aa59294c98530a2c0cd625713dee5d4963c8..a08e62d7f8dd443396725b7f8028bad1da2342ef 100644 --- a/modules/text/language_model/roberta-wwm-ext-large/README.md +++ b/modules/text/language_model/roberta-wwm-ext-large/README.md @@ -1,6 +1,6 @@ # roberta-wwm-ext-large |模型名称|roberta-wwm-ext-large| -| :--- | :---: | +| :--- | :---: | |类别|文本-语义模型| |网络|roberta-wwm-ext-large| |数据集|百度自建数据集| @@ -51,7 +51,7 @@ label_map = {0: 'negative', 1: 'positive'} model = hub.Module( name='roberta-wwm-ext-large', - version='2.0.2', + version='2.0.3', task='seq-cls', load_checkpoint='/path/to/parameters', label_map=label_map) @@ -181,6 +181,10 @@ for idx, text in enumerate(data): * 2.0.2 增加文本匹配任务`text-matching` + +* 2.0.3 + + 更新预训练模型调用方法 ```shell - $ hub install roberta-wwm-ext-large==2.0.2 + $ hub install roberta-wwm-ext==2.0.3 ``` diff --git a/modules/text/language_model/roberta-wwm-ext-large/module.py b/modules/text/language_model/roberta-wwm-ext-large/module.py index 13efb6aea4d8f1a8d0fdda5c17cceb4347cbaf14..272df442528f202f473afe20676cc5dec036e26b 100644 --- a/modules/text/language_model/roberta-wwm-ext-large/module.py +++ b/modules/text/language_model/roberta-wwm-ext-large/module.py @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict -import os import math +import os +from typing import Dict import paddle import paddle.nn as nn import paddle.nn.functional as F - -from paddlenlp.transformers.roberta.modeling import RobertaForSequenceClassification, RobertaForTokenClassification, RobertaModel -from paddlenlp.transformers.roberta.tokenizer import RobertaTokenizer from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers import AutoModel +from paddlenlp.transformers import AutoModelForSequenceClassification +from paddlenlp.transformers import AutoModelForTokenClassification +from paddlenlp.transformers import AutoTokenizer + from paddlehub.module.module import moduleinfo from paddlehub.module.nlp_module import TransformerModule from paddlehub.utils.log import logger @@ -29,7 +31,7 @@ from paddlehub.utils.log import logger @moduleinfo( name="roberta-wwm-ext-large", - version="2.0.2", + version="2.0.3", summary= "chinese-roberta-wwm-ext-large, 24-layer, 1024-hidden, 16-heads, 340M parameters. The module is executed as paddle.dygraph.", author="ymcui", @@ -43,13 +45,13 @@ class Roberta(nn.Layer): """ def __init__( - self, - task: str = None, - load_checkpoint: str = None, - label_map: Dict = None, - num_classes: int = 2, - suffix: bool = False, - **kwargs, + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, ): super(Roberta, self).__init__() if label_map: @@ -64,23 +66,24 @@ class Roberta(nn.Layer): "current task name 'sequence_classification' was renamed to 'seq-cls', " "'sequence_classification' has been deprecated and will be removed in the future.", ) if task == 'seq-cls': - self.model = RobertaForSequenceClassification.from_pretrained( - pretrained_model_name_or_path='roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='hfl/roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task == 'token-cls': - self.model = RobertaForTokenClassification.from_pretrained( - pretrained_model_name_or_path='roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForTokenClassification.from_pretrained( + pretrained_model_name_or_path='hfl/roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) elif task == 'text-matching': - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext-large', **kwargs) self.dropout = paddle.nn.Dropout(0.1) self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task is None: - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext-large', **kwargs) else: raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) @@ -172,4 +175,4 @@ class Roberta(nn.Layer): """ Gets the tokenizer that is customized for this module. """ - return RobertaTokenizer.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', *args, **kwargs) + return AutoTokenizer.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext-large', *args, **kwargs) diff --git a/modules/text/language_model/roberta-wwm-ext/README.md b/modules/text/language_model/roberta-wwm-ext/README.md index f052628f29153809fdcf94ae93480ff2a618499a..7bb50223702d8807d25995f09237ee16d3f07ac4 100644 --- a/modules/text/language_model/roberta-wwm-ext/README.md +++ b/modules/text/language_model/roberta-wwm-ext/README.md @@ -1,6 +1,6 @@ # roberta-wwm-ext |模型名称|roberta-wwm-ext| -| :--- | :---: | +| :--- | :---: | |类别|文本-语义模型| |网络|roberta-wwm-ext| |数据集|百度自建数据集| @@ -51,7 +51,7 @@ label_map = {0: 'negative', 1: 'positive'} model = hub.Module( name='roberta-wwm-ext', - version='2.0.2', + version='2.0.3', task='seq-cls', load_checkpoint='/path/to/parameters', label_map=label_map) @@ -181,6 +181,10 @@ for idx, text in enumerate(data): * 2.0.2 增加文本匹配任务`text-matching` + +* 2.0.3 + + 更新预训练模型调用方法 ```shell - $ hub install roberta-wwm-ext==2.0.2 + $ hub install roberta-wwm-ext==2.0.3 ``` diff --git a/modules/text/language_model/roberta-wwm-ext/module.py b/modules/text/language_model/roberta-wwm-ext/module.py index 66108a239e8d0acc2e3da9b6595e6e15337e6044..2fe144315a05be9b4ef9e69222e5f1678518f9c7 100644 --- a/modules/text/language_model/roberta-wwm-ext/module.py +++ b/modules/text/language_model/roberta-wwm-ext/module.py @@ -11,17 +11,19 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict -import os import math +import os +from typing import Dict import paddle import paddle.nn as nn import paddle.nn.functional as F - -from paddlenlp.transformers.roberta.modeling import RobertaForSequenceClassification, RobertaForTokenClassification, RobertaModel -from paddlenlp.transformers.roberta.tokenizer import RobertaTokenizer from paddlenlp.metrics import ChunkEvaluator +from paddlenlp.transformers import AutoModel +from paddlenlp.transformers import AutoModelForSequenceClassification +from paddlenlp.transformers import AutoModelForTokenClassification +from paddlenlp.transformers import AutoTokenizer + from paddlehub.module.module import moduleinfo from paddlehub.module.nlp_module import TransformerModule from paddlehub.utils.log import logger @@ -29,7 +31,7 @@ from paddlehub.utils.log import logger @moduleinfo( name="roberta-wwm-ext", - version="2.0.2", + version="2.0.3", summary= "chinese-roberta-wwm-ext, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", author="ymcui", @@ -43,13 +45,13 @@ class Roberta(nn.Layer): """ def __init__( - self, - task: str = None, - load_checkpoint: str = None, - label_map: Dict = None, - num_classes: int = 2, - suffix: bool = False, - **kwargs, + self, + task: str = None, + load_checkpoint: str = None, + label_map: Dict = None, + num_classes: int = 2, + suffix: bool = False, + **kwargs, ): super(Roberta, self).__init__() if label_map: @@ -64,23 +66,24 @@ class Roberta(nn.Layer): "current task name 'sequence_classification' was renamed to 'seq-cls', " "'sequence_classification' has been deprecated and will be removed in the future.", ) if task == 'seq-cls': - self.model = RobertaForSequenceClassification.from_pretrained( - pretrained_model_name_or_path='roberta-wwm-ext', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForSequenceClassification.from_pretrained( + pretrained_model_name_or_path='hfl/roberta-wwm-ext', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task == 'token-cls': - self.model = RobertaForTokenClassification.from_pretrained( - pretrained_model_name_or_path='roberta-wwm-ext', num_classes=self.num_classes, **kwargs) + self.model = AutoModelForTokenClassification.from_pretrained( + pretrained_model_name_or_path='hfl/roberta-wwm-ext', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], + suffix=suffix) elif task == 'text-matching': - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext', **kwargs) self.dropout = paddle.nn.Dropout(0.1) self.classifier = paddle.nn.Linear(self.model.config['hidden_size'] * 3, 2) self.criterion = paddle.nn.loss.CrossEntropyLoss() self.metric = paddle.metric.Accuracy() elif task is None: - self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', **kwargs) + self.model = AutoModel.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext', **kwargs) else: raise RuntimeError("Unknown task {}, task should be one in {}".format(task, self._tasks_supported)) @@ -172,4 +175,4 @@ class Roberta(nn.Layer): """ Gets the tokenizer that is customized for this module. """ - return RobertaTokenizer.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', *args, **kwargs) + return AutoTokenizer.from_pretrained(pretrained_model_name_or_path='hfl/roberta-wwm-ext', *args, **kwargs)