diff --git a/modules/text/language_model/bert-base-cased/README.md b/modules/text/language_model/bert-base-cased/README.md index cd11fd8bb8823f16536f15504e4b90c03266f78e..4f5d7da36661a1cd59236e1fcb3b6d3abe2430bb 100644 --- a/modules/text/language_model/bert-base-cased/README.md +++ b/modules/text/language_model/bert-base-cased/README.md @@ -16,6 +16,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -28,6 +29,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python diff --git a/modules/text/language_model/bert-base-cased/module.py b/modules/text/language_model/bert-base-cased/module.py index f5e2cc7cf30ede59b6cb3ca1968315b2946da0bd..9af1eae87d07ffb66f44c9b65d9885a34cfcf029 100644 --- a/modules/text/language_model/bert-base-cased/module.py +++ b/modules/text/language_model/bert-base-cased/module.py @@ -47,6 +47,7 @@ class Bert(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Bert, self).__init__() @@ -70,7 +71,7 @@ class Bert(nn.Layer): self.model = BertForTokenClassification.from_pretrained( pretrained_model_name_or_path='bert-base-cased', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-cased', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/bert-base-chinese/README.md b/modules/text/language_model/bert-base-chinese/README.md index 79007b083e75905fbd486968c5d0948c961fe256..67a83dd6dbc3f3bf2e2f29ed64f874b4776de417 100644 --- a/modules/text/language_model/bert-base-chinese/README.md +++ b/modules/text/language_model/bert-base-chinese/README.md @@ -16,6 +16,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -28,6 +29,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python def predict( diff --git a/modules/text/language_model/bert-base-chinese/module.py b/modules/text/language_model/bert-base-chinese/module.py index 13457527d045f6783dc4994e791b79a3bc6929c1..5cf2a758deca71590de021109830e0ec625c10e6 100644 --- a/modules/text/language_model/bert-base-chinese/module.py +++ b/modules/text/language_model/bert-base-chinese/module.py @@ -47,6 +47,7 @@ class Bert(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Bert, self).__init__() @@ -70,7 +71,7 @@ class Bert(nn.Layer): self.model = BertForTokenClassification.from_pretrained( pretrained_model_name_or_path='bert-base-chinese', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-chinese', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/bert-base-multilingual-cased/README.md b/modules/text/language_model/bert-base-multilingual-cased/README.md index 88b0b79131b7e472abeae1a487ec69e5c783e0c9..3f3510367552876baae27791582348aeccb391be 100644 --- a/modules/text/language_model/bert-base-multilingual-cased/README.md +++ b/modules/text/language_model/bert-base-multilingual-cased/README.md @@ -16,6 +16,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -28,6 +29,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python def predict( diff --git a/modules/text/language_model/bert-base-multilingual-cased/module.py b/modules/text/language_model/bert-base-multilingual-cased/module.py index 2f4fa756d07be36459f77dfe621608fcda509061..f2f72541eacc394a37a0e76d062c35d6e7da2cab 100644 --- a/modules/text/language_model/bert-base-multilingual-cased/module.py +++ b/modules/text/language_model/bert-base-multilingual-cased/module.py @@ -47,6 +47,7 @@ class Bert(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Bert, self).__init__() @@ -70,7 +71,7 @@ class Bert(nn.Layer): self.model = BertForTokenClassification.from_pretrained( pretrained_model_name_or_path='bert-base-multilingual-cased', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = BertModel.from_pretrained( pretrained_model_name_or_path='bert-base-multilingual-cased', **kwargs) diff --git a/modules/text/language_model/bert-base-multilingual-uncased/README.md b/modules/text/language_model/bert-base-multilingual-uncased/README.md index 3b8b2c13ad7aeabc6f0f66fd18e43d20f3fe0e40..f579ac8612f7a3e63a4e2b4ab8ca9766a16123cc 100644 --- a/modules/text/language_model/bert-base-multilingual-uncased/README.md +++ b/modules/text/language_model/bert-base-multilingual-uncased/README.md @@ -16,6 +16,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -28,6 +29,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python def predict( diff --git a/modules/text/language_model/bert-base-multilingual-uncased/module.py b/modules/text/language_model/bert-base-multilingual-uncased/module.py index 515a6f5fe02abcd34d79ead127eedb27424f6293..091638d73d655f9afa5062fbb76f371df6a4d4cd 100644 --- a/modules/text/language_model/bert-base-multilingual-uncased/module.py +++ b/modules/text/language_model/bert-base-multilingual-uncased/module.py @@ -47,6 +47,7 @@ class Bert(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Bert, self).__init__() @@ -70,7 +71,7 @@ class Bert(nn.Layer): self.model = BertForTokenClassification.from_pretrained( pretrained_model_name_or_path='bert-base-multilingual-uncased', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = BertModel.from_pretrained( pretrained_model_name_or_path='bert-base-multilingual-uncased', **kwargs) diff --git a/modules/text/language_model/bert-base-uncased/README.md b/modules/text/language_model/bert-base-uncased/README.md index 83033e8181a7e9b9cc088ccc75d6fba3ea826dd9..312f218fd468432284b4f4f0e1592b0803e37efb 100644 --- a/modules/text/language_model/bert-base-uncased/README.md +++ b/modules/text/language_model/bert-base-uncased/README.md @@ -16,6 +16,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -28,7 +29,9 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 + ```python def predict( data, diff --git a/modules/text/language_model/bert-base-uncased/module.py b/modules/text/language_model/bert-base-uncased/module.py index e39147151fec40565e571775a3a339d5bc20dc84..804f5599fa0b8dd11c9f12d50be7545598923dda 100644 --- a/modules/text/language_model/bert-base-uncased/module.py +++ b/modules/text/language_model/bert-base-uncased/module.py @@ -47,6 +47,7 @@ class Bert(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Bert, self).__init__() @@ -70,7 +71,7 @@ class Bert(nn.Layer): self.model = BertForTokenClassification.from_pretrained( pretrained_model_name_or_path='bert-base-uncased', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-uncased', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/bert-large-cased/README.md b/modules/text/language_model/bert-large-cased/README.md index a815de3022ca168780735227fa5109f64dacb5a3..cbbcfe1732459049f8e9a3950db3fcdfd51aeed9 100644 --- a/modules/text/language_model/bert-large-cased/README.md +++ b/modules/text/language_model/bert-large-cased/README.md @@ -16,6 +16,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -28,7 +29,9 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 + ```python def predict( data, diff --git a/modules/text/language_model/bert-large-cased/module.py b/modules/text/language_model/bert-large-cased/module.py index a834167b60ec4e3d88c7466ff4508eae218e119d..9aad41aa28711e760505449ed002f0dd58b79e8f 100644 --- a/modules/text/language_model/bert-large-cased/module.py +++ b/modules/text/language_model/bert-large-cased/module.py @@ -47,6 +47,7 @@ class Bert(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Bert, self).__init__() @@ -70,7 +71,7 @@ class Bert(nn.Layer): self.model = BertForTokenClassification.from_pretrained( pretrained_model_name_or_path='bert-large-cased', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-cased', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/bert-large-uncased/README.md b/modules/text/language_model/bert-large-uncased/README.md index 6de8deeb3cf438620df76307df4793d5f0ea9d13..7c7db315c6607411a4b76920f34f051b94c803eb 100644 --- a/modules/text/language_model/bert-large-uncased/README.md +++ b/modules/text/language_model/bert-large-uncased/README.md @@ -16,6 +16,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -28,7 +29,9 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 + ```python def predict( data, diff --git a/modules/text/language_model/bert-large-uncased/module.py b/modules/text/language_model/bert-large-uncased/module.py index 157fe9aa1b7c54fc76d014457b6693d7a08e245e..848f84401f51248cb419d17dfb40d8ca56c89a38 100644 --- a/modules/text/language_model/bert-large-uncased/module.py +++ b/modules/text/language_model/bert-large-uncased/module.py @@ -47,6 +47,7 @@ class Bert(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Bert, self).__init__() @@ -70,7 +71,7 @@ class Bert(nn.Layer): self.model = BertForTokenClassification.from_pretrained( pretrained_model_name_or_path='bert-large-uncased', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-uncased', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/chinese_bert_wwm/README.md b/modules/text/language_model/chinese_bert_wwm/README.md index 12926937ff1b6f06ebcf5fdc0753146bfb5b183c..549db670a557940004c7d614225a74a23cf7c245 100644 --- a/modules/text/language_model/chinese_bert_wwm/README.md +++ b/modules/text/language_model/chinese_bert_wwm/README.md @@ -14,6 +14,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -26,7 +27,9 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 + ```python def predict( data, diff --git a/modules/text/language_model/chinese_bert_wwm/module.py b/modules/text/language_model/chinese_bert_wwm/module.py index 4490bfc463a886de891c98512c506317e4c80bad..5770cdd1da50529b17c429118a33b616947bec4c 100644 --- a/modules/text/language_model/chinese_bert_wwm/module.py +++ b/modules/text/language_model/chinese_bert_wwm/module.py @@ -47,6 +47,7 @@ class BertWwm(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(BertWwm, self).__init__() @@ -70,7 +71,7 @@ class BertWwm(nn.Layer): self.model = BertForTokenClassification.from_pretrained( pretrained_model_name_or_path='bert-wwm-chinese', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-chinese', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/chinese_bert_wwm_ext/README.md b/modules/text/language_model/chinese_bert_wwm_ext/README.md index 28935b5c08144a897b9a17f86953f510004fb9da..a3ea8c75a41843e7a5b7524aa16ce588b17b2802 100644 --- a/modules/text/language_model/chinese_bert_wwm_ext/README.md +++ b/modules/text/language_model/chinese_bert_wwm_ext/README.md @@ -14,6 +14,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -26,7 +27,9 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 + ```python def predict( data, diff --git a/modules/text/language_model/chinese_bert_wwm_ext/module.py b/modules/text/language_model/chinese_bert_wwm_ext/module.py index 589c9226e1e8eaabdb71ec7ae6871de782684170..2c351b830f0b971c618cfabed93ce751c9afbc04 100644 --- a/modules/text/language_model/chinese_bert_wwm_ext/module.py +++ b/modules/text/language_model/chinese_bert_wwm_ext/module.py @@ -47,6 +47,7 @@ class BertWwm(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(BertWwm, self).__init__() @@ -70,7 +71,7 @@ class BertWwm(nn.Layer): self.model = BertForTokenClassification.from_pretrained( pretrained_model_name_or_path='bert-wwm-ext-chinese', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-ext-chinese', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/chinese_electra_base/README.md b/modules/text/language_model/chinese_electra_base/README.md index 6cf0eaad56057b104d04118d6f0c18b9f7e3b2c2..1778ebe67101c38fc7ee9c659e0a5b93f501f0ca 100644 --- a/modules/text/language_model/chinese_electra_base/README.md +++ b/modules/text/language_model/chinese_electra_base/README.md @@ -15,6 +15,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -27,6 +28,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python diff --git a/modules/text/language_model/chinese_electra_base/module.py b/modules/text/language_model/chinese_electra_base/module.py index b9040911bf2a97f0a8db2a6cb286e7e6c1f04720..52a9a9fd2bfb771466156e7109f29735b4eb6b44 100644 --- a/modules/text/language_model/chinese_electra_base/module.py +++ b/modules/text/language_model/chinese_electra_base/module.py @@ -47,6 +47,7 @@ class Electra(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Electra, self).__init__() @@ -70,7 +71,7 @@ class Electra(nn.Layer): self.model = ElectraForTokenClassification.from_pretrained( pretrained_model_name_or_path='chinese-electra-base', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-base', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/chinese_electra_small/README.md b/modules/text/language_model/chinese_electra_small/README.md index 0a006ae5a9eee0c239ed1c48774b1a07d2e52775..9a94c735e14c4edf41d4b81cd0206384a780efe2 100644 --- a/modules/text/language_model/chinese_electra_small/README.md +++ b/modules/text/language_model/chinese_electra_small/README.md @@ -15,6 +15,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -27,6 +28,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python diff --git a/modules/text/language_model/chinese_electra_small/module.py b/modules/text/language_model/chinese_electra_small/module.py index ad9623250513721d64bcfc272a49dcde840f0be0..9e71856ddfe9335943933882c710a2174aa7e380 100644 --- a/modules/text/language_model/chinese_electra_small/module.py +++ b/modules/text/language_model/chinese_electra_small/module.py @@ -47,6 +47,7 @@ class Electra(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Electra, self).__init__() @@ -70,7 +71,7 @@ class Electra(nn.Layer): self.model = ElectraForTokenClassification.from_pretrained( pretrained_model_name_or_path='chinese-electra-small', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-small', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/electra_base/README.md b/modules/text/language_model/electra_base/README.md index 70fc1b064a1151df00a18f26084ba67cdd49b7da..a5b7567c5574cca62805535bfa82162e009efe62 100644 --- a/modules/text/language_model/electra_base/README.md +++ b/modules/text/language_model/electra_base/README.md @@ -15,6 +15,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -27,6 +28,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python diff --git a/modules/text/language_model/electra_base/module.py b/modules/text/language_model/electra_base/module.py index 9a4a46c9c2b4ecf3c5368b5477f81d159defbc21..9f4c473c76988e9ec68e47978b762142e5e73729 100644 --- a/modules/text/language_model/electra_base/module.py +++ b/modules/text/language_model/electra_base/module.py @@ -46,6 +46,7 @@ class Electra(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Electra, self).__init__() @@ -69,7 +70,7 @@ class Electra(nn.Layer): self.model = ElectraForTokenClassification.from_pretrained( pretrained_model_name_or_path='electra-base', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-base', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/electra_large/README.md b/modules/text/language_model/electra_large/README.md index e7ebae4ddca24212cefc9bc24a880e0fbbf740e8..3996c51587159a1901f8fe5f066ce4ffb7d33a49 100644 --- a/modules/text/language_model/electra_large/README.md +++ b/modules/text/language_model/electra_large/README.md @@ -15,6 +15,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -27,6 +28,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python diff --git a/modules/text/language_model/electra_large/module.py b/modules/text/language_model/electra_large/module.py index cefe8e6ca35c694baf9c3cdc91b41dfb890be6d7..8360f30f77b8a639a601640ea68af5da7dcc07ce 100644 --- a/modules/text/language_model/electra_large/module.py +++ b/modules/text/language_model/electra_large/module.py @@ -46,6 +46,7 @@ class Electra(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Electra, self).__init__() @@ -69,7 +70,7 @@ class Electra(nn.Layer): self.model = ElectraForTokenClassification.from_pretrained( pretrained_model_name_or_path='electra-large', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-large', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/electra_small/README.md b/modules/text/language_model/electra_small/README.md index bdd8fd0c7b575087f6243035f60d244fab435f55..588fc2bbcad03962c2f6d25fa5d962a151f569ce 100644 --- a/modules/text/language_model/electra_small/README.md +++ b/modules/text/language_model/electra_small/README.md @@ -15,6 +15,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -27,6 +28,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python diff --git a/modules/text/language_model/electra_small/module.py b/modules/text/language_model/electra_small/module.py index a4af038809dbbd14d4af921154413edc84a85c7d..b139f0cb14b36b522319dc8d6ee5a63effac2dfd 100644 --- a/modules/text/language_model/electra_small/module.py +++ b/modules/text/language_model/electra_small/module.py @@ -46,6 +46,7 @@ class Electra(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Electra, self).__init__() @@ -69,7 +70,7 @@ class Electra(nn.Layer): self.model = ElectraForTokenClassification.from_pretrained( pretrained_model_name_or_path='electra-small', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-small', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/ernie/README.md b/modules/text/language_model/ernie/README.md index 8daa880833d29433e001861cf35deab5162e6908..d604d429e378149183142ad7f2cf42defbd7d46b 100644 --- a/modules/text/language_model/ernie/README.md +++ b/modules/text/language_model/ernie/README.md @@ -25,6 +25,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -37,6 +38,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python diff --git a/modules/text/language_model/ernie/module.py b/modules/text/language_model/ernie/module.py index f851d3e4d85a5e91cee8a0e90a748e070af122a6..6dfc4a1cfec9364e1ebc37b01b54ea434920c374 100644 --- a/modules/text/language_model/ernie/module.py +++ b/modules/text/language_model/ernie/module.py @@ -47,6 +47,7 @@ class Ernie(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Ernie, self).__init__() @@ -70,7 +71,7 @@ class Ernie(nn.Layer): self.model = ErnieForTokenClassification.from_pretrained( pretrained_model_name_or_path='ernie-1.0', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-1.0', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/ernie_tiny/README.md b/modules/text/language_model/ernie_tiny/README.md index e2f765b280926de180374499ff8edeafc1a939d1..8d06f478b0483e244ea299d46abce7bd5872dcf4 100644 --- a/modules/text/language_model/ernie_tiny/README.md +++ b/modules/text/language_model/ernie_tiny/README.md @@ -25,6 +25,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -37,6 +38,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python diff --git a/modules/text/language_model/ernie_tiny/module.py b/modules/text/language_model/ernie_tiny/module.py index e4c025966beccc4c1958b4b7f80a5d6cd87828ae..051837907913f099cccd409241ff2c8229d2103d 100644 --- a/modules/text/language_model/ernie_tiny/module.py +++ b/modules/text/language_model/ernie_tiny/module.py @@ -46,6 +46,7 @@ class ErnieTiny(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(ErnieTiny, self).__init__() @@ -69,7 +70,7 @@ class ErnieTiny(nn.Layer): self.model = ErnieForTokenClassification.from_pretrained( pretrained_model_name_or_path='ernie-tiny', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-tiny', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/ernie_v2_eng_base/README.md b/modules/text/language_model/ernie_v2_eng_base/README.md index f0c20c6637be179aad454d40dd45cd6def71f88c..aa8b0b1ad904b77d89f3499ced8dff10d358cf15 100644 --- a/modules/text/language_model/ernie_v2_eng_base/README.md +++ b/modules/text/language_model/ernie_v2_eng_base/README.md @@ -21,6 +21,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -33,6 +34,7 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 ```python diff --git a/modules/text/language_model/ernie_v2_eng_base/module.py b/modules/text/language_model/ernie_v2_eng_base/module.py index 03574c77490e1fb6658abd235845bab62356905a..3206ed7d0b42f55cd484cc851f01ac8ac3526bac 100644 --- a/modules/text/language_model/ernie_v2_eng_base/module.py +++ b/modules/text/language_model/ernie_v2_eng_base/module.py @@ -47,6 +47,7 @@ class ErnieV2(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(ErnieV2, self).__init__() @@ -70,7 +71,7 @@ class ErnieV2(nn.Layer): self.model = ErnieForTokenClassification.from_pretrained( pretrained_model_name_or_path='ernie-2.0-en', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-en', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/ernie_v2_eng_large/README.md b/modules/text/language_model/ernie_v2_eng_large/README.md index e80442b708a104f8d19b7ee3d5f8078ee1c1db0c..82ef41b1b1093afb9f786f8a0253960e91ac5433 100644 --- a/modules/text/language_model/ernie_v2_eng_large/README.md +++ b/modules/text/language_model/ernie_v2_eng_large/README.md @@ -21,6 +21,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -33,7 +34,9 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 + ```python def predict( data, diff --git a/modules/text/language_model/ernie_v2_eng_large/module.py b/modules/text/language_model/ernie_v2_eng_large/module.py index 57a7779b5da1de20ff7e1e21f7628a656ca85596..8e01735b83468acd97922eeb0a606e9851aabbe2 100644 --- a/modules/text/language_model/ernie_v2_eng_large/module.py +++ b/modules/text/language_model/ernie_v2_eng_large/module.py @@ -47,6 +47,7 @@ class ErnieV2(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(ErnieV2, self).__init__() @@ -70,7 +71,7 @@ class ErnieV2(nn.Layer): self.model = ErnieForTokenClassification.from_pretrained( pretrained_model_name_or_path='ernie-2.0-large-en', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-large-en', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/rbt3/README.md b/modules/text/language_model/rbt3/README.md index ebed472ae05dc50884936b2dae6105ec27f55a55..14245ce79d673297fb2c354c9938c2ff5b08fd51 100644 --- a/modules/text/language_model/rbt3/README.md +++ b/modules/text/language_model/rbt3/README.md @@ -14,6 +14,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -26,7 +27,9 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 + ```python def predict( data, diff --git a/modules/text/language_model/rbt3/module.py b/modules/text/language_model/rbt3/module.py index 7a2edac976f1184f0028c49a6957eeae05b1d29a..1fdde350ae46489b99f3072a816d35f630fee042 100644 --- a/modules/text/language_model/rbt3/module.py +++ b/modules/text/language_model/rbt3/module.py @@ -47,6 +47,7 @@ class Roberta(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Roberta, self).__init__() @@ -70,7 +71,7 @@ class Roberta(nn.Layer): self.model = RobertaForTokenClassification.from_pretrained( pretrained_model_name_or_path='rbt3', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbt3', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/rbtl3/README.md b/modules/text/language_model/rbtl3/README.md index 3562b1a6aeea4322288345575d22356a250ad06e..f570b5663e231962c87da71ce6dcb557426f892d 100644 --- a/modules/text/language_model/rbtl3/README.md +++ b/modules/text/language_model/rbtl3/README.md @@ -14,6 +14,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -26,7 +27,9 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 + ```python def predict( data, diff --git a/modules/text/language_model/rbtl3/module.py b/modules/text/language_model/rbtl3/module.py index 37c96ebc06010b5f8db90ae017572ed7b54fe500..d5789099dc1c445ab084f477c4f29d59d0716121 100644 --- a/modules/text/language_model/rbtl3/module.py +++ b/modules/text/language_model/rbtl3/module.py @@ -47,6 +47,7 @@ class Roberta(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Roberta, self).__init__() @@ -70,7 +71,7 @@ class Roberta(nn.Layer): self.model = RobertaForTokenClassification.from_pretrained( pretrained_model_name_or_path='rbtl3', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbtl3', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/roberta-wwm-ext-large/README.md b/modules/text/language_model/roberta-wwm-ext-large/README.md index 18a85f4b632aebdaf93e4327a4d817f08bfddbbc..c9f5758ef7a97adc12a3ac06026885f4518a8b59 100644 --- a/modules/text/language_model/roberta-wwm-ext-large/README.md +++ b/modules/text/language_model/roberta-wwm-ext-large/README.md @@ -15,6 +15,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -27,7 +28,9 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 + ```python def predict( data, diff --git a/modules/text/language_model/roberta-wwm-ext-large/module.py b/modules/text/language_model/roberta-wwm-ext-large/module.py index 501ff092cd37086f56001f641782368571daf8b3..13efb6aea4d8f1a8d0fdda5c17cceb4347cbaf14 100644 --- a/modules/text/language_model/roberta-wwm-ext-large/module.py +++ b/modules/text/language_model/roberta-wwm-ext-large/module.py @@ -48,6 +48,7 @@ class Roberta(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Roberta, self).__init__() @@ -71,7 +72,7 @@ class Roberta(nn.Layer): self.model = RobertaForTokenClassification.from_pretrained( pretrained_model_name_or_path='roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', **kwargs) self.dropout = paddle.nn.Dropout(0.1) diff --git a/modules/text/language_model/roberta-wwm-ext/README.md b/modules/text/language_model/roberta-wwm-ext/README.md index 8e7cb2696db26fd899039ebac5fa234376ee5ed7..d24a646e781e8bfbd643f3aab3c3bfb6c2bad45b 100644 --- a/modules/text/language_model/roberta-wwm-ext/README.md +++ b/modules/text/language_model/roberta-wwm-ext/README.md @@ -15,6 +15,7 @@ def __init__( load_checkpoint=None, label_map=None, num_classes=2, + suffix=False, **kwargs, ) ``` @@ -27,7 +28,9 @@ def __init__( * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `label_map`:预测时的类别映射表。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 +* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`。 * `**kwargs`:用户额外指定的关键字字典类型的参数。 + ```python def predict( data, diff --git a/modules/text/language_model/roberta-wwm-ext/module.py b/modules/text/language_model/roberta-wwm-ext/module.py index ee3b8f0d193f7fce6ab2d5b3309ee5b68c678b0a..66108a239e8d0acc2e3da9b6595e6e15337e6044 100644 --- a/modules/text/language_model/roberta-wwm-ext/module.py +++ b/modules/text/language_model/roberta-wwm-ext/module.py @@ -48,6 +48,7 @@ class Roberta(nn.Layer): load_checkpoint: str = None, label_map: Dict = None, num_classes: int = 2, + suffix: bool = False, **kwargs, ): super(Roberta, self).__init__() @@ -71,7 +72,7 @@ class Roberta(nn.Layer): self.model = RobertaForTokenClassification.from_pretrained( pretrained_model_name_or_path='roberta-wwm-ext', num_classes=self.num_classes, **kwargs) self.criterion = paddle.nn.loss.CrossEntropyLoss() - self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) + self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix) elif task == 'text-matching': self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', **kwargs) self.dropout = paddle.nn.Dropout(0.1)