未验证 提交 f914eff5 编写于 作者: K KP 提交者: GitHub

Merge pull request #1560 from linjieccc/bug_fix

add suffix for ChunkEvaluator
...@@ -16,6 +16,7 @@ def __init__( ...@@ -16,6 +16,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -28,6 +29,7 @@ def __init__( ...@@ -28,6 +29,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
......
...@@ -47,6 +47,7 @@ class Bert(nn.Layer): ...@@ -47,6 +47,7 @@ class Bert(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Bert, self).__init__() super(Bert, self).__init__()
...@@ -70,7 +71,7 @@ class Bert(nn.Layer): ...@@ -70,7 +71,7 @@ class Bert(nn.Layer):
self.model = BertForTokenClassification.from_pretrained( self.model = BertForTokenClassification.from_pretrained(
pretrained_model_name_or_path='bert-base-cased', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='bert-base-cased', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-cased', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-cased', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -16,6 +16,7 @@ def __init__( ...@@ -16,6 +16,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -28,6 +29,7 @@ def __init__( ...@@ -28,6 +29,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
......
...@@ -47,6 +47,7 @@ class Bert(nn.Layer): ...@@ -47,6 +47,7 @@ class Bert(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Bert, self).__init__() super(Bert, self).__init__()
...@@ -70,7 +71,7 @@ class Bert(nn.Layer): ...@@ -70,7 +71,7 @@ class Bert(nn.Layer):
self.model = BertForTokenClassification.from_pretrained( self.model = BertForTokenClassification.from_pretrained(
pretrained_model_name_or_path='bert-base-chinese', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='bert-base-chinese', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-chinese', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-chinese', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -16,6 +16,7 @@ def __init__( ...@@ -16,6 +16,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -28,6 +29,7 @@ def __init__( ...@@ -28,6 +29,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
......
...@@ -47,6 +47,7 @@ class Bert(nn.Layer): ...@@ -47,6 +47,7 @@ class Bert(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Bert, self).__init__() super(Bert, self).__init__()
...@@ -70,7 +71,7 @@ class Bert(nn.Layer): ...@@ -70,7 +71,7 @@ class Bert(nn.Layer):
self.model = BertForTokenClassification.from_pretrained( self.model = BertForTokenClassification.from_pretrained(
pretrained_model_name_or_path='bert-base-multilingual-cased', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='bert-base-multilingual-cased', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = BertModel.from_pretrained( self.model = BertModel.from_pretrained(
pretrained_model_name_or_path='bert-base-multilingual-cased', **kwargs) pretrained_model_name_or_path='bert-base-multilingual-cased', **kwargs)
......
...@@ -16,6 +16,7 @@ def __init__( ...@@ -16,6 +16,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -28,6 +29,7 @@ def __init__( ...@@ -28,6 +29,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
......
...@@ -47,6 +47,7 @@ class Bert(nn.Layer): ...@@ -47,6 +47,7 @@ class Bert(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Bert, self).__init__() super(Bert, self).__init__()
...@@ -70,7 +71,7 @@ class Bert(nn.Layer): ...@@ -70,7 +71,7 @@ class Bert(nn.Layer):
self.model = BertForTokenClassification.from_pretrained( self.model = BertForTokenClassification.from_pretrained(
pretrained_model_name_or_path='bert-base-multilingual-uncased', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='bert-base-multilingual-uncased', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = BertModel.from_pretrained( self.model = BertModel.from_pretrained(
pretrained_model_name_or_path='bert-base-multilingual-uncased', **kwargs) pretrained_model_name_or_path='bert-base-multilingual-uncased', **kwargs)
......
...@@ -16,6 +16,7 @@ def __init__( ...@@ -16,6 +16,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -28,7 +29,9 @@ def __init__( ...@@ -28,7 +29,9 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
data, data,
......
...@@ -47,6 +47,7 @@ class Bert(nn.Layer): ...@@ -47,6 +47,7 @@ class Bert(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Bert, self).__init__() super(Bert, self).__init__()
...@@ -70,7 +71,7 @@ class Bert(nn.Layer): ...@@ -70,7 +71,7 @@ class Bert(nn.Layer):
self.model = BertForTokenClassification.from_pretrained( self.model = BertForTokenClassification.from_pretrained(
pretrained_model_name_or_path='bert-base-uncased', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='bert-base-uncased', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-uncased', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-uncased', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -16,6 +16,7 @@ def __init__( ...@@ -16,6 +16,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -28,7 +29,9 @@ def __init__( ...@@ -28,7 +29,9 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
data, data,
......
...@@ -47,6 +47,7 @@ class Bert(nn.Layer): ...@@ -47,6 +47,7 @@ class Bert(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Bert, self).__init__() super(Bert, self).__init__()
...@@ -70,7 +71,7 @@ class Bert(nn.Layer): ...@@ -70,7 +71,7 @@ class Bert(nn.Layer):
self.model = BertForTokenClassification.from_pretrained( self.model = BertForTokenClassification.from_pretrained(
pretrained_model_name_or_path='bert-large-cased', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='bert-large-cased', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-cased', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-cased', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -16,6 +16,7 @@ def __init__( ...@@ -16,6 +16,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -28,7 +29,9 @@ def __init__( ...@@ -28,7 +29,9 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
data, data,
......
...@@ -47,6 +47,7 @@ class Bert(nn.Layer): ...@@ -47,6 +47,7 @@ class Bert(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Bert, self).__init__() super(Bert, self).__init__()
...@@ -70,7 +71,7 @@ class Bert(nn.Layer): ...@@ -70,7 +71,7 @@ class Bert(nn.Layer):
self.model = BertForTokenClassification.from_pretrained( self.model = BertForTokenClassification.from_pretrained(
pretrained_model_name_or_path='bert-large-uncased', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='bert-large-uncased', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-uncased', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-uncased', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -14,6 +14,7 @@ def __init__( ...@@ -14,6 +14,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -26,7 +27,9 @@ def __init__( ...@@ -26,7 +27,9 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
data, data,
......
...@@ -47,6 +47,7 @@ class BertWwm(nn.Layer): ...@@ -47,6 +47,7 @@ class BertWwm(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(BertWwm, self).__init__() super(BertWwm, self).__init__()
...@@ -70,7 +71,7 @@ class BertWwm(nn.Layer): ...@@ -70,7 +71,7 @@ class BertWwm(nn.Layer):
self.model = BertForTokenClassification.from_pretrained( self.model = BertForTokenClassification.from_pretrained(
pretrained_model_name_or_path='bert-wwm-chinese', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='bert-wwm-chinese', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-chinese', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-chinese', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -14,6 +14,7 @@ def __init__( ...@@ -14,6 +14,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -26,7 +27,9 @@ def __init__( ...@@ -26,7 +27,9 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
data, data,
......
...@@ -47,6 +47,7 @@ class BertWwm(nn.Layer): ...@@ -47,6 +47,7 @@ class BertWwm(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(BertWwm, self).__init__() super(BertWwm, self).__init__()
...@@ -70,7 +71,7 @@ class BertWwm(nn.Layer): ...@@ -70,7 +71,7 @@ class BertWwm(nn.Layer):
self.model = BertForTokenClassification.from_pretrained( self.model = BertForTokenClassification.from_pretrained(
pretrained_model_name_or_path='bert-wwm-ext-chinese', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='bert-wwm-ext-chinese', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-ext-chinese', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-ext-chinese', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -15,6 +15,7 @@ def __init__( ...@@ -15,6 +15,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -27,6 +28,7 @@ def __init__( ...@@ -27,6 +28,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
......
...@@ -47,6 +47,7 @@ class Electra(nn.Layer): ...@@ -47,6 +47,7 @@ class Electra(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Electra, self).__init__() super(Electra, self).__init__()
...@@ -70,7 +71,7 @@ class Electra(nn.Layer): ...@@ -70,7 +71,7 @@ class Electra(nn.Layer):
self.model = ElectraForTokenClassification.from_pretrained( self.model = ElectraForTokenClassification.from_pretrained(
pretrained_model_name_or_path='chinese-electra-base', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='chinese-electra-base', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-base', **kwargs) self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-base', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -15,6 +15,7 @@ def __init__( ...@@ -15,6 +15,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -27,6 +28,7 @@ def __init__( ...@@ -27,6 +28,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
......
...@@ -47,6 +47,7 @@ class Electra(nn.Layer): ...@@ -47,6 +47,7 @@ class Electra(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Electra, self).__init__() super(Electra, self).__init__()
...@@ -70,7 +71,7 @@ class Electra(nn.Layer): ...@@ -70,7 +71,7 @@ class Electra(nn.Layer):
self.model = ElectraForTokenClassification.from_pretrained( self.model = ElectraForTokenClassification.from_pretrained(
pretrained_model_name_or_path='chinese-electra-small', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='chinese-electra-small', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-small', **kwargs) self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-small', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -15,6 +15,7 @@ def __init__( ...@@ -15,6 +15,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -27,6 +28,7 @@ def __init__( ...@@ -27,6 +28,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
......
...@@ -46,6 +46,7 @@ class Electra(nn.Layer): ...@@ -46,6 +46,7 @@ class Electra(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Electra, self).__init__() super(Electra, self).__init__()
...@@ -69,7 +70,7 @@ class Electra(nn.Layer): ...@@ -69,7 +70,7 @@ class Electra(nn.Layer):
self.model = ElectraForTokenClassification.from_pretrained( self.model = ElectraForTokenClassification.from_pretrained(
pretrained_model_name_or_path='electra-base', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='electra-base', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-base', **kwargs) self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-base', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -15,6 +15,7 @@ def __init__( ...@@ -15,6 +15,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -27,6 +28,7 @@ def __init__( ...@@ -27,6 +28,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
......
...@@ -46,6 +46,7 @@ class Electra(nn.Layer): ...@@ -46,6 +46,7 @@ class Electra(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Electra, self).__init__() super(Electra, self).__init__()
...@@ -69,7 +70,7 @@ class Electra(nn.Layer): ...@@ -69,7 +70,7 @@ class Electra(nn.Layer):
self.model = ElectraForTokenClassification.from_pretrained( self.model = ElectraForTokenClassification.from_pretrained(
pretrained_model_name_or_path='electra-large', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='electra-large', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-large', **kwargs) self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-large', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -15,6 +15,7 @@ def __init__( ...@@ -15,6 +15,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -27,6 +28,7 @@ def __init__( ...@@ -27,6 +28,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
......
...@@ -46,6 +46,7 @@ class Electra(nn.Layer): ...@@ -46,6 +46,7 @@ class Electra(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Electra, self).__init__() super(Electra, self).__init__()
...@@ -69,7 +70,7 @@ class Electra(nn.Layer): ...@@ -69,7 +70,7 @@ class Electra(nn.Layer):
self.model = ElectraForTokenClassification.from_pretrained( self.model = ElectraForTokenClassification.from_pretrained(
pretrained_model_name_or_path='electra-small', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='electra-small', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-small', **kwargs) self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-small', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -25,6 +25,7 @@ def __init__( ...@@ -25,6 +25,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -37,6 +38,7 @@ def __init__( ...@@ -37,6 +38,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
......
...@@ -47,6 +47,7 @@ class Ernie(nn.Layer): ...@@ -47,6 +47,7 @@ class Ernie(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Ernie, self).__init__() super(Ernie, self).__init__()
...@@ -70,7 +71,7 @@ class Ernie(nn.Layer): ...@@ -70,7 +71,7 @@ class Ernie(nn.Layer):
self.model = ErnieForTokenClassification.from_pretrained( self.model = ErnieForTokenClassification.from_pretrained(
pretrained_model_name_or_path='ernie-1.0', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='ernie-1.0', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-1.0', **kwargs) self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-1.0', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -25,6 +25,7 @@ def __init__( ...@@ -25,6 +25,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -37,6 +38,7 @@ def __init__( ...@@ -37,6 +38,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
......
...@@ -46,6 +46,7 @@ class ErnieTiny(nn.Layer): ...@@ -46,6 +46,7 @@ class ErnieTiny(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(ErnieTiny, self).__init__() super(ErnieTiny, self).__init__()
...@@ -69,7 +70,7 @@ class ErnieTiny(nn.Layer): ...@@ -69,7 +70,7 @@ class ErnieTiny(nn.Layer):
self.model = ErnieForTokenClassification.from_pretrained( self.model = ErnieForTokenClassification.from_pretrained(
pretrained_model_name_or_path='ernie-tiny', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='ernie-tiny', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-tiny', **kwargs) self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-tiny', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -21,6 +21,7 @@ def __init__( ...@@ -21,6 +21,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -33,6 +34,7 @@ def __init__( ...@@ -33,6 +34,7 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
......
...@@ -47,6 +47,7 @@ class ErnieV2(nn.Layer): ...@@ -47,6 +47,7 @@ class ErnieV2(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(ErnieV2, self).__init__() super(ErnieV2, self).__init__()
...@@ -70,7 +71,7 @@ class ErnieV2(nn.Layer): ...@@ -70,7 +71,7 @@ class ErnieV2(nn.Layer):
self.model = ErnieForTokenClassification.from_pretrained( self.model = ErnieForTokenClassification.from_pretrained(
pretrained_model_name_or_path='ernie-2.0-en', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='ernie-2.0-en', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-en', **kwargs) self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-en', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -21,6 +21,7 @@ def __init__( ...@@ -21,6 +21,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -33,7 +34,9 @@ def __init__( ...@@ -33,7 +34,9 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
data, data,
......
...@@ -47,6 +47,7 @@ class ErnieV2(nn.Layer): ...@@ -47,6 +47,7 @@ class ErnieV2(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(ErnieV2, self).__init__() super(ErnieV2, self).__init__()
...@@ -70,7 +71,7 @@ class ErnieV2(nn.Layer): ...@@ -70,7 +71,7 @@ class ErnieV2(nn.Layer):
self.model = ErnieForTokenClassification.from_pretrained( self.model = ErnieForTokenClassification.from_pretrained(
pretrained_model_name_or_path='ernie-2.0-large-en', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='ernie-2.0-large-en', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-large-en', **kwargs) self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-large-en', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -14,6 +14,7 @@ def __init__( ...@@ -14,6 +14,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -26,7 +27,9 @@ def __init__( ...@@ -26,7 +27,9 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
data, data,
......
...@@ -47,6 +47,7 @@ class Roberta(nn.Layer): ...@@ -47,6 +47,7 @@ class Roberta(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Roberta, self).__init__() super(Roberta, self).__init__()
...@@ -70,7 +71,7 @@ class Roberta(nn.Layer): ...@@ -70,7 +71,7 @@ class Roberta(nn.Layer):
self.model = RobertaForTokenClassification.from_pretrained( self.model = RobertaForTokenClassification.from_pretrained(
pretrained_model_name_or_path='rbt3', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='rbt3', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbt3', **kwargs) self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbt3', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -14,6 +14,7 @@ def __init__( ...@@ -14,6 +14,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -26,7 +27,9 @@ def __init__( ...@@ -26,7 +27,9 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
data, data,
......
...@@ -47,6 +47,7 @@ class Roberta(nn.Layer): ...@@ -47,6 +47,7 @@ class Roberta(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Roberta, self).__init__() super(Roberta, self).__init__()
...@@ -70,7 +71,7 @@ class Roberta(nn.Layer): ...@@ -70,7 +71,7 @@ class Roberta(nn.Layer):
self.model = RobertaForTokenClassification.from_pretrained( self.model = RobertaForTokenClassification.from_pretrained(
pretrained_model_name_or_path='rbtl3', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='rbtl3', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbtl3', **kwargs) self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbtl3', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -15,6 +15,7 @@ def __init__( ...@@ -15,6 +15,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -27,7 +28,9 @@ def __init__( ...@@ -27,7 +28,9 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
data, data,
......
...@@ -48,6 +48,7 @@ class Roberta(nn.Layer): ...@@ -48,6 +48,7 @@ class Roberta(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Roberta, self).__init__() super(Roberta, self).__init__()
...@@ -71,7 +72,7 @@ class Roberta(nn.Layer): ...@@ -71,7 +72,7 @@ class Roberta(nn.Layer):
self.model = RobertaForTokenClassification.from_pretrained( self.model = RobertaForTokenClassification.from_pretrained(
pretrained_model_name_or_path='roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='roberta-wwm-ext-large', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', **kwargs) self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
...@@ -15,6 +15,7 @@ def __init__( ...@@ -15,6 +15,7 @@ def __init__(
load_checkpoint=None, load_checkpoint=None,
label_map=None, label_map=None,
num_classes=2, num_classes=2,
suffix=False,
**kwargs, **kwargs,
) )
``` ```
...@@ -27,7 +28,9 @@ def __init__( ...@@ -27,7 +28,9 @@ def __init__(
* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 * `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。
* `label_map`:预测时的类别映射表。 * `label_map`:预测时的类别映射表。
* `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。 * `num_classes`:分类任务的类别数,如果指定了`label_map`,此参数可不传,默认2分类。
* `suffix`: 序列标注任务的标签格式,如果设定为`True`,标签以'-B', '-I', '-E' 或者 '-S'为结尾,此参数默认为`False`
* `**kwargs`:用户额外指定的关键字字典类型的参数。 * `**kwargs`:用户额外指定的关键字字典类型的参数。
```python ```python
def predict( def predict(
data, data,
......
...@@ -48,6 +48,7 @@ class Roberta(nn.Layer): ...@@ -48,6 +48,7 @@ class Roberta(nn.Layer):
load_checkpoint: str = None, load_checkpoint: str = None,
label_map: Dict = None, label_map: Dict = None,
num_classes: int = 2, num_classes: int = 2,
suffix: bool = False,
**kwargs, **kwargs,
): ):
super(Roberta, self).__init__() super(Roberta, self).__init__()
...@@ -71,7 +72,7 @@ class Roberta(nn.Layer): ...@@ -71,7 +72,7 @@ class Roberta(nn.Layer):
self.model = RobertaForTokenClassification.from_pretrained( self.model = RobertaForTokenClassification.from_pretrained(
pretrained_model_name_or_path='roberta-wwm-ext', num_classes=self.num_classes, **kwargs) pretrained_model_name_or_path='roberta-wwm-ext', num_classes=self.num_classes, **kwargs)
self.criterion = paddle.nn.loss.CrossEntropyLoss() self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]) self.metric = ChunkEvaluator(label_list=[self.label_map[i] for i in sorted(self.label_map.keys())], suffix=suffix)
elif task == 'text-matching': elif task == 'text-matching':
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', **kwargs) self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', **kwargs)
self.dropout = paddle.nn.Dropout(0.1) self.dropout = paddle.nn.Dropout(0.1)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册