未验证 提交 8f13d8ca 编写于 作者: K KP 提交者: GitHub

Add dataset, module task, and demo of text-matching (#1307)

* update Transformer module that can do text-matching task
上级 21fc5cb5
...@@ -25,7 +25,7 @@ if __name__ == '__main__': ...@@ -25,7 +25,7 @@ if __name__ == '__main__':
model = hub.Module( model = hub.Module(
name='ernie_tiny', name='ernie_tiny',
version='2.0.1', version='2.0.2',
task='text-matching', task='text-matching',
load_checkpoint='./checkpoint/best_model/model.pdparams', load_checkpoint='./checkpoint/best_model/model.pdparams',
label_map=label_map) label_map=label_map)
......
...@@ -31,7 +31,7 @@ parser.add_argument("--save_interval", type=int, default=2, help="Save checkpoin ...@@ -31,7 +31,7 @@ parser.add_argument("--save_interval", type=int, default=2, help="Save checkpoin
args = parser.parse_args() args = parser.parse_args()
if __name__ == '__main__': if __name__ == '__main__':
model = hub.Module(name='ernie_tiny', version='2.0.1', task='text-matching') model = hub.Module(name='ernie_tiny', version='2.0.2', task='text-matching')
tokenizer = model.get_tokenizer() tokenizer = model.get_tokenizer()
train_dataset = LCQMC(tokenizer=tokenizer, max_seq_len=args.max_seq_len, mode='train') train_dataset = LCQMC(tokenizer=tokenizer, max_seq_len=args.max_seq_len, mode='train')
......
...@@ -164,3 +164,7 @@ paddlehub >= 2.0.0 ...@@ -164,3 +164,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="bert-base-cased", name="bert-base-cased",
version="2.0.1", version="2.0.2",
summary= summary=
"bert_cased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", "bert_cased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -72,6 +72,12 @@ class Bert(nn.Layer): ...@@ -72,6 +72,12 @@ class Bert(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-cased', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-cased', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-cased', **kwargs)
else: else:
...@@ -85,8 +91,28 @@ class Bert(nn.Layer): ...@@ -85,8 +91,28 @@ class Bert(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -109,6 +135,35 @@ class Bert(nn.Layer): ...@@ -109,6 +135,35 @@ class Bert(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0 ...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="bert-base-chinese", name="bert-base-chinese",
version="2.0.1", version="2.0.2",
summary= summary=
"bert_chinese_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", "bert_chinese_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -80,6 +80,12 @@ class Bert(nn.Layer): ...@@ -80,6 +80,12 @@ class Bert(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-chinese', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-chinese', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-chinese', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class Bert(nn.Layer): ...@@ -93,8 +99,28 @@ class Bert(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class Bert(nn.Layer): ...@@ -117,6 +143,35 @@ class Bert(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0 ...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="bert-base-multilingual-cased", name="bert-base-multilingual-cased",
version="2.0.1", version="2.0.2",
summary= summary=
"bert_multi_cased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", "bert_multi_cased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -80,6 +80,12 @@ class Bert(nn.Layer): ...@@ -80,6 +80,12 @@ class Bert(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-multilingual-cased', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-multilingual-cased', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-multilingual-cased', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class Bert(nn.Layer): ...@@ -93,8 +99,28 @@ class Bert(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class Bert(nn.Layer): ...@@ -117,6 +143,35 @@ class Bert(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0 ...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="bert-base-multilingual-uncased", name="bert-base-multilingual-uncased",
version="2.0.1", version="2.0.2",
summary= summary=
"bert_multi_uncased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", "bert_multi_uncased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -80,6 +80,12 @@ class Bert(nn.Layer): ...@@ -80,6 +80,12 @@ class Bert(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-multilingual-uncased', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-multilingual-uncased', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-multilingual-uncased', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class Bert(nn.Layer): ...@@ -93,8 +99,28 @@ class Bert(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class Bert(nn.Layer): ...@@ -117,6 +143,35 @@ class Bert(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0 ...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="bert-base-uncased", name="bert-base-uncased",
version="2.0.1", version="2.0.2",
summary= summary=
"bert_uncased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", "bert_uncased_L-12_H-768_A-12, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -72,6 +72,12 @@ class Bert(nn.Layer): ...@@ -72,6 +72,12 @@ class Bert(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-uncased', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-uncased', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-base-uncased', **kwargs)
else: else:
...@@ -85,8 +91,28 @@ class Bert(nn.Layer): ...@@ -85,8 +91,28 @@ class Bert(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -109,6 +135,35 @@ class Bert(nn.Layer): ...@@ -109,6 +135,35 @@ class Bert(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0 ...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="bert-large-cased", name="bert-large-cased",
version="2.0.1", version="2.0.2",
summary= summary=
"bert_cased_L-24_H-1024_A-16, 24-layer, 1024-hidden, 16-heads, 340M parameters. The module is executed as paddle.dygraph.", "bert_cased_L-24_H-1024_A-16, 24-layer, 1024-hidden, 16-heads, 340M parameters. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -72,6 +72,12 @@ class Bert(nn.Layer): ...@@ -72,6 +72,12 @@ class Bert(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-cased', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-cased', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-cased', **kwargs)
else: else:
...@@ -85,8 +91,28 @@ class Bert(nn.Layer): ...@@ -85,8 +91,28 @@ class Bert(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -109,6 +135,35 @@ class Bert(nn.Layer): ...@@ -109,6 +135,35 @@ class Bert(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0 ...@@ -163,3 +163,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="bert-large-uncased", name="bert-large-uncased",
version="2.0.1", version="2.0.2",
summary= summary=
"bert_uncased_L-24_H-1024_A-16, 24-layer, 1024-hidden, 16-heads, 340M parameters. The module is executed as paddle.dygraph.", "bert_uncased_L-24_H-1024_A-16, 24-layer, 1024-hidden, 16-heads, 340M parameters. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -72,6 +72,12 @@ class Bert(nn.Layer): ...@@ -72,6 +72,12 @@ class Bert(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-uncased', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-uncased', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-large-uncased', **kwargs)
else: else:
...@@ -85,8 +91,28 @@ class Bert(nn.Layer): ...@@ -85,8 +91,28 @@ class Bert(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -109,6 +135,35 @@ class Bert(nn.Layer): ...@@ -109,6 +135,35 @@ class Bert(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -156,3 +156,7 @@ paddlehub >= 2.0.0 ...@@ -156,3 +156,7 @@ paddlehub >= 2.0.0
* 2.0.0 * 2.0.0
全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls` 全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls`
* 2.0.1
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="chinese-bert-wwm", name="chinese-bert-wwm",
version="2.0.0", version="2.0.1",
summary= summary=
"chinese-bert-wwm, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", "chinese-bert-wwm, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.",
author="ymcui", author="ymcui",
...@@ -81,6 +81,12 @@ class BertWwm(nn.Layer): ...@@ -81,6 +81,12 @@ class BertWwm(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-chinese', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-chinese', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-chinese', **kwargs)
else: else:
...@@ -94,8 +100,28 @@ class BertWwm(nn.Layer): ...@@ -94,8 +100,28 @@ class BertWwm(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -118,6 +144,35 @@ class BertWwm(nn.Layer): ...@@ -118,6 +144,35 @@ class BertWwm(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -156,3 +156,7 @@ paddlehub >= 2.0.0 ...@@ -156,3 +156,7 @@ paddlehub >= 2.0.0
* 2.0.0 * 2.0.0
全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls` 全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls`
* 2.0.1
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="chinese-bert-wwm-ext", name="chinese-bert-wwm-ext",
version="2.0.0", version="2.0.1",
summary= summary=
"chinese-bert-wwm-ext, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", "chinese-bert-wwm-ext, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.",
author="ymcui", author="ymcui",
...@@ -81,6 +81,12 @@ class BertWwm(nn.Layer): ...@@ -81,6 +81,12 @@ class BertWwm(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-ext-chinese', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-ext-chinese', **kwargs) self.model = BertModel.from_pretrained(pretrained_model_name_or_path='bert-wwm-ext-chinese', **kwargs)
else: else:
...@@ -94,8 +100,28 @@ class BertWwm(nn.Layer): ...@@ -94,8 +100,28 @@ class BertWwm(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -118,6 +144,35 @@ class BertWwm(nn.Layer): ...@@ -118,6 +144,35 @@ class BertWwm(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -155,3 +155,7 @@ paddlehub >= 2.0.0 ...@@ -155,3 +155,7 @@ paddlehub >= 2.0.0
* 2.0.0 * 2.0.0
全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls` 全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls`
* 2.0.1
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -28,7 +28,7 @@ from paddlehub.utils.log import logger ...@@ -28,7 +28,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="chinese-electra-base", name="chinese-electra-base",
version="2.0.0", version="2.0.1",
summary= summary=
"chinese-electra-base, 12-layer, 768-hidden, 12-heads, 102M parameters. The module is executed as paddle.dygraph.", "chinese-electra-base, 12-layer, 768-hidden, 12-heads, 102M parameters. The module is executed as paddle.dygraph.",
author="ymcui", author="ymcui",
...@@ -80,6 +80,12 @@ class Electra(nn.Layer): ...@@ -80,6 +80,12 @@ class Electra(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-base', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-base', **kwargs) self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-base', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class Electra(nn.Layer): ...@@ -93,8 +99,28 @@ class Electra(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class Electra(nn.Layer): ...@@ -117,6 +143,35 @@ class Electra(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(query_token_embedding.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(title_token_embedding.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -155,3 +155,7 @@ paddlehub >= 2.0.0 ...@@ -155,3 +155,7 @@ paddlehub >= 2.0.0
* 2.0.0 * 2.0.0
全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls` 全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls`
* 2.0.1
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -28,7 +28,7 @@ from paddlehub.utils.log import logger ...@@ -28,7 +28,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="chinese-electra-small", name="chinese-electra-small",
version="2.0.0", version="2.0.1",
summary= summary=
"chinese-electra-small, 12-layer, 256-hidden, 4-heads, 12M parameters. The module is executed as paddle.dygraph.", "chinese-electra-small, 12-layer, 256-hidden, 4-heads, 12M parameters. The module is executed as paddle.dygraph.",
author="ymcui", author="ymcui",
...@@ -80,6 +80,12 @@ class Electra(nn.Layer): ...@@ -80,6 +80,12 @@ class Electra(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-small', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-small', **kwargs) self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='chinese-electra-small', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class Electra(nn.Layer): ...@@ -93,8 +99,28 @@ class Electra(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class Electra(nn.Layer): ...@@ -117,6 +143,35 @@ class Electra(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(query_token_embedding.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(title_token_embedding.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -151,3 +151,7 @@ paddlehub >= 2.0.0 ...@@ -151,3 +151,7 @@ paddlehub >= 2.0.0
* 1.0.0 * 1.0.0
初始发布,动态图版本模型,支持文本分类`seq-cls`和序列标注`token-cls`任务的fine-tune 初始发布,动态图版本模型,支持文本分类`seq-cls`和序列标注`token-cls`任务的fine-tune
* 1.0.1
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -28,7 +28,7 @@ from paddlehub.utils.log import logger ...@@ -28,7 +28,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="electra-base", name="electra-base",
version="1.0.0", version="1.0.1",
summary= summary=
"electra-base, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", "electra-base, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -80,6 +80,12 @@ class Electra(nn.Layer): ...@@ -80,6 +80,12 @@ class Electra(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-base', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-base', **kwargs) self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-base', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class Electra(nn.Layer): ...@@ -93,8 +99,28 @@ class Electra(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class Electra(nn.Layer): ...@@ -117,6 +143,35 @@ class Electra(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(query_token_embedding.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(title_token_embedding.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -151,3 +151,7 @@ paddlehub >= 2.0.0 ...@@ -151,3 +151,7 @@ paddlehub >= 2.0.0
* 1.0.0 * 1.0.0
初始发布,动态图版本模型,支持文本分类`seq-cls`和序列标注`token-cls`任务的fine-tune 初始发布,动态图版本模型,支持文本分类`seq-cls`和序列标注`token-cls`任务的fine-tune
* 1.0.1
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -28,7 +28,7 @@ from paddlehub.utils.log import logger ...@@ -28,7 +28,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="electra-large", name="electra-large",
version="1.0.0", version="1.0.1",
summary= summary=
"electra-large, 24-layer, 1024-hidden, 16-heads, 335M parameters. The module is executed as paddle.dygraph.", "electra-large, 24-layer, 1024-hidden, 16-heads, 335M parameters. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -80,6 +80,12 @@ class Electra(nn.Layer): ...@@ -80,6 +80,12 @@ class Electra(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-large', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-large', **kwargs) self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-large', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class Electra(nn.Layer): ...@@ -93,8 +99,28 @@ class Electra(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class Electra(nn.Layer): ...@@ -117,6 +143,35 @@ class Electra(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(query_token_embedding.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(title_token_embedding.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -151,3 +151,7 @@ paddlehub >= 2.0.0 ...@@ -151,3 +151,7 @@ paddlehub >= 2.0.0
* 1.0.0 * 1.0.0
初始发布,动态图版本模型,支持文本分类`seq-cls`和序列标注`token-cls`任务的fine-tune 初始发布,动态图版本模型,支持文本分类`seq-cls`和序列标注`token-cls`任务的fine-tune
* 1.0.1
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -28,7 +28,7 @@ from paddlehub.utils.log import logger ...@@ -28,7 +28,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="electra-small", name="electra-small",
version="1.0.0", version="1.0.1",
summary= summary=
"electra-small, 12-layer, 256-hidden, 4-heads, 14M parameters. The module is executed as paddle.dygraph.", "electra-small, 12-layer, 256-hidden, 4-heads, 14M parameters. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -80,6 +80,12 @@ class Electra(nn.Layer): ...@@ -80,6 +80,12 @@ class Electra(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-small', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-small', **kwargs) self.model = ElectraModel.from_pretrained(pretrained_model_name_or_path='electra-small', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class Electra(nn.Layer): ...@@ -93,8 +99,28 @@ class Electra(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class Electra(nn.Layer): ...@@ -117,6 +143,35 @@ class Electra(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(query_token_embedding.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(title_token_embedding.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -184,3 +184,7 @@ paddlehub >= 2.0.0 ...@@ -184,3 +184,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="ernie", name="ernie",
version="2.0.1", version="2.0.2",
summary= summary=
"Baidu's ERNIE, Enhanced Representation through kNowledge IntEgration, max_seq_len=512 when predtrained. The module is executed as paddle.dygraph.", "Baidu's ERNIE, Enhanced Representation through kNowledge IntEgration, max_seq_len=512 when predtrained. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -72,6 +72,12 @@ class Ernie(nn.Layer): ...@@ -72,6 +72,12 @@ class Ernie(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-1.0', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-1.0', **kwargs) self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-1.0', **kwargs)
else: else:
...@@ -85,8 +91,28 @@ class Ernie(nn.Layer): ...@@ -85,8 +91,28 @@ class Ernie(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -109,6 +135,35 @@ class Ernie(nn.Layer): ...@@ -109,6 +135,35 @@ class Ernie(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -176,3 +176,7 @@ paddlehub >= 2.0.0 ...@@ -176,3 +176,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -172,3 +172,7 @@ paddlehub >= 2.0.0 ...@@ -172,3 +172,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="ernie_v2_eng_base", name="ernie_v2_eng_base",
version="2.0.1", version="2.0.2",
summary= summary=
"Baidu's ERNIE 2.0, Enhanced Representation through kNowledge IntEgration, max_seq_len=512 when predtrained. The module is executed as paddle.dygraph.", "Baidu's ERNIE 2.0, Enhanced Representation through kNowledge IntEgration, max_seq_len=512 when predtrained. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -80,6 +80,12 @@ class ErnieV2(nn.Layer): ...@@ -80,6 +80,12 @@ class ErnieV2(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-en', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-en', **kwargs) self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-en', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class ErnieV2(nn.Layer): ...@@ -93,8 +99,28 @@ class ErnieV2(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class ErnieV2(nn.Layer): ...@@ -117,6 +143,35 @@ class ErnieV2(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -171,3 +171,7 @@ paddlehub >= 2.0.0 ...@@ -171,3 +171,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="ernie_v2_eng_large", name="ernie_v2_eng_large",
version="2.0.1", version="2.0.2",
summary= summary=
"Baidu's ERNIE 2.0, Enhanced Representation through kNowledge IntEgration, max_seq_len=512 when predtrained. The module is executed as paddle.dygraph.", "Baidu's ERNIE 2.0, Enhanced Representation through kNowledge IntEgration, max_seq_len=512 when predtrained. The module is executed as paddle.dygraph.",
author="paddlepaddle", author="paddlepaddle",
...@@ -80,6 +80,12 @@ class ErnieV2(nn.Layer): ...@@ -80,6 +80,12 @@ class ErnieV2(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-large-en', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-large-en', **kwargs) self.model = ErnieModel.from_pretrained(pretrained_model_name_or_path='ernie-2.0-large-en', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class ErnieV2(nn.Layer): ...@@ -93,8 +99,28 @@ class ErnieV2(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class ErnieV2(nn.Layer): ...@@ -117,6 +143,35 @@ class ErnieV2(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -156,3 +156,7 @@ paddlehub >= 2.0.0 ...@@ -156,3 +156,7 @@ paddlehub >= 2.0.0
* 2.0.0 * 2.0.0
全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls` 全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls`
* 2.0.1
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="rbt3", name="rbt3",
version="2.0.0", version="2.0.1",
summary="rbt3, 3-layer, 768-hidden, 12-heads, 38M parameters ", summary="rbt3, 3-layer, 768-hidden, 12-heads, 38M parameters ",
author="ymcui", author="ymcui",
author_email="ymcui@ir.hit.edu.cn", author_email="ymcui@ir.hit.edu.cn",
...@@ -80,6 +80,12 @@ class Roberta(nn.Layer): ...@@ -80,6 +80,12 @@ class Roberta(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbt3', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbt3', **kwargs) self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbt3', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class Roberta(nn.Layer): ...@@ -93,8 +99,28 @@ class Roberta(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class Roberta(nn.Layer): ...@@ -117,6 +143,35 @@ class Roberta(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -156,3 +156,7 @@ paddlehub >= 2.0.0 ...@@ -156,3 +156,7 @@ paddlehub >= 2.0.0
* 2.0.0 * 2.0.0
全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls` 全面升级动态图,接口有所变化。任务名称调整,增加序列标注任务`token-cls`
* 2.0.1
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="rbtl3", name="rbtl3",
version="2.0.0", version="2.0.1",
summary="rbtl3, 3-layer, 1024-hidden, 16-heads, 61M parameters ", summary="rbtl3, 3-layer, 1024-hidden, 16-heads, 61M parameters ",
author="ymcui", author="ymcui",
author_email="ymcui@ir.hit.edu.cn", author_email="ymcui@ir.hit.edu.cn",
...@@ -80,6 +80,12 @@ class Roberta(nn.Layer): ...@@ -80,6 +80,12 @@ class Roberta(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbtl3', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbtl3', **kwargs) self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='rbtl3', **kwargs)
else: else:
...@@ -93,8 +99,28 @@ class Roberta(nn.Layer): ...@@ -93,8 +99,28 @@ class Roberta(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -117,6 +143,35 @@ class Roberta(nn.Layer): ...@@ -117,6 +143,35 @@ class Roberta(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -158,3 +158,7 @@ paddlehub >= 2.0.0 ...@@ -158,3 +158,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="roberta-wwm-ext-large", name="roberta-wwm-ext-large",
version="2.0.1", version="2.0.2",
summary= summary=
"chinese-roberta-wwm-ext-large, 24-layer, 1024-hidden, 16-heads, 340M parameters. The module is executed as paddle.dygraph.", "chinese-roberta-wwm-ext-large, 24-layer, 1024-hidden, 16-heads, 340M parameters. The module is executed as paddle.dygraph.",
author="ymcui", author="ymcui",
...@@ -81,6 +81,12 @@ class Roberta(nn.Layer): ...@@ -81,6 +81,12 @@ class Roberta(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', **kwargs) self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext-large', **kwargs)
else: else:
...@@ -94,8 +100,28 @@ class Roberta(nn.Layer): ...@@ -94,8 +100,28 @@ class Roberta(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -118,6 +144,35 @@ class Roberta(nn.Layer): ...@@ -118,6 +144,35 @@ class Roberta(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
...@@ -158,3 +158,7 @@ paddlehub >= 2.0.0 ...@@ -158,3 +158,7 @@ paddlehub >= 2.0.0
* 2.0.1 * 2.0.1
任务名称调整,增加序列标注任务`token-cls` 任务名称调整,增加序列标注任务`token-cls`
* 2.0.2
增加文本匹配任务`text-matching`
\ No newline at end of file
...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger ...@@ -29,7 +29,7 @@ from paddlehub.utils.log import logger
@moduleinfo( @moduleinfo(
name="roberta-wwm-ext", name="roberta-wwm-ext",
version="2.0.1", version="2.0.2",
summary= summary=
"chinese-roberta-wwm-ext, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.", "chinese-roberta-wwm-ext, 12-layer, 768-hidden, 12-heads, 110M parameters. The module is executed as paddle.dygraph.",
author="ymcui", author="ymcui",
...@@ -81,6 +81,12 @@ class Roberta(nn.Layer): ...@@ -81,6 +81,12 @@ class Roberta(nn.Layer):
self.metric = ChunkEvaluator( self.metric = ChunkEvaluator(
label_list=[self.label_map[i] for i in sorted(self.label_map.keys())] label_list=[self.label_map[i] for i in sorted(self.label_map.keys())]
) )
elif task == 'text-matching':
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', **kwargs)
self.dropout = paddle.nn.Dropout(0.1)
self.classifier = paddle.nn.Linear(self.model.config['hidden_size']*3, 2)
self.criterion = paddle.nn.loss.CrossEntropyLoss()
self.metric = paddle.metric.Accuracy()
elif task is None: elif task is None:
self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', **kwargs) self.model = RobertaModel.from_pretrained(pretrained_model_name_or_path='roberta-wwm-ext', **kwargs)
else: else:
...@@ -94,8 +100,28 @@ class Roberta(nn.Layer): ...@@ -94,8 +100,28 @@ class Roberta(nn.Layer):
self.set_state_dict(state_dict) self.set_state_dict(state_dict)
logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint)) logger.info('Loaded parameters from %s' % os.path.abspath(load_checkpoint))
def forward(self, input_ids, token_type_ids=None, position_ids=None, attention_mask=None, seq_lengths=None, labels=None): def forward(self,
input_ids=None,
token_type_ids=None,
position_ids=None,
attention_mask=None,
query_input_ids=None,
query_token_type_ids=None,
query_position_ids=None,
query_attention_mask=None,
title_input_ids=None,
title_token_type_ids=None,
title_position_ids=None,
title_attention_mask=None,
seq_lengths=None,
labels=None):
if self.task != 'text-matching':
result = self.model(input_ids, token_type_ids, position_ids, attention_mask) result = self.model(input_ids, token_type_ids, position_ids, attention_mask)
else:
query_result = self.model(query_input_ids, query_token_type_ids, query_position_ids, query_attention_mask)
title_result = self.model(title_input_ids, title_token_type_ids, title_position_ids, title_attention_mask)
if self.task == 'seq-cls': if self.task == 'seq-cls':
logits = result logits = result
probs = F.softmax(logits, axis=1) probs = F.softmax(logits, axis=1)
...@@ -118,6 +144,35 @@ class Roberta(nn.Layer): ...@@ -118,6 +144,35 @@ class Roberta(nn.Layer):
_, _, f1_score = map(float, self.metric.accumulate()) _, _, f1_score = map(float, self.metric.accumulate())
return token_level_probs, loss, {'f1_score': f1_score} return token_level_probs, loss, {'f1_score': f1_score}
return token_level_probs return token_level_probs
elif self.task == 'text-matching':
query_token_embedding, _ = query_result
query_token_embedding = self.dropout(query_token_embedding)
query_attention_mask = paddle.unsqueeze(
(query_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
query_token_embedding = query_token_embedding * query_attention_mask
query_sum_embedding = paddle.sum(query_token_embedding, axis=1)
query_sum_mask = paddle.sum(query_attention_mask, axis=1)
query_mean = query_sum_embedding / query_sum_mask
title_token_embedding, _ = title_result
title_token_embedding = self.dropout(title_token_embedding)
title_attention_mask = paddle.unsqueeze(
(title_input_ids != self.model.pad_token_id).astype(self.model.pooler.dense.weight.dtype), axis=2)
title_token_embedding = title_token_embedding * title_attention_mask
title_sum_embedding = paddle.sum(title_token_embedding, axis=1)
title_sum_mask = paddle.sum(title_attention_mask, axis=1)
title_mean = title_sum_embedding / title_sum_mask
sub = paddle.abs(paddle.subtract(query_mean, title_mean))
projection = paddle.concat([query_mean, title_mean, sub], axis=-1)
logits = self.classifier(projection)
probs = F.softmax(logits)
if labels is not None:
loss = self.criterion(logits, labels)
correct = self.metric.compute(probs, labels)
acc = self.metric.update(correct)
return probs, loss, {'acc': acc}
return probs
else: else:
sequence_output, pooled_output = result sequence_output, pooled_output = result
return sequence_output, pooled_output return sequence_output, pooled_output
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册