未验证 提交 15e8e205 编写于 作者: K KP 提交者: GitHub

Add predict_method for predict serving

上级 29624f88
...@@ -21,7 +21,7 @@ import io ...@@ -21,7 +21,7 @@ import io
import json import json
import os import os
import six import six
from typing import List from typing import List, Tuple
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -347,40 +347,101 @@ class PretrainedModel(nn.Layer): ...@@ -347,40 +347,101 @@ class PretrainedModel(nn.Layer):
paddle.save(self.state_dict(), file_name) paddle.save(self.state_dict(), file_name)
class EmbeddingServing(object): class TextServing(object):
"""
A base class for text model which supports serving.
"""
@serving @serving
def get_embedding(self, texts, use_gpu=False): def predict_method(
if self.task is not None: self,
raise RuntimeError("The get_embedding method is only valid when task is None, but got task %s" % self.task) data: List[List[str]],
max_seq_len: int = 128,
batch_size: int = 1,
use_gpu: bool = False
):
"""
Run predict method as a service.
Serving as a task which is specified from serving config.
Tasks supported:
1. seq-cls: sequence classification;
2. token-cls: sequence labeling;
3. None: embedding.
paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu') Args:
data (obj:`List(List(str))`): The processed data whose each element is the list of a single text or a pair of texts.
max_seq_len (:obj:`int`, `optional`, defaults to 128):
If set to a number, will limit the total sequence returned so that it has a maximum length.
batch_size(obj:`int`, defaults to 1): The number of batch.
use_gpu(obj:`bool`, defaults to `False`): Whether to use gpu to run or not.
Returns:
results(obj:`list`): All the predictions labels.
"""
if self.task in self._tasks_supported: # cls service
if self.label_map:
# compatible with json decoding label_map
self.label_map = {int(k): v for k, v in self.label_map.items()}
results = self.predict(data, max_seq_len, batch_size, use_gpu)
if self.task == 'token-cls':
# remove labels of [CLS] token and pad tokens
results = [
token_labels[1:len(data[i][0])+1] for i, token_labels in enumerate(results)
]
return results
elif self.task is None: # embedding service
token_results, sentence_results = self.get_embedding(data, max_seq_len, batch_size, use_gpu)
token_results = [
token_embeddings[1:len(data[i][0])+1] for i, token_embeddings in enumerate(token_results)
]
return token_results, sentence_results
else: # unknown service
logger.error(
f'Unknown task {self.task}, current tasks supported:\n'
'1. seq-cls: sequence classification service;\n'
'2. token-cls: sequence labeling service;\n'
'3. None: embedding service'
)
return
class TransformerModule(RunModule, TextServing):
"""
The base class for Transformer models.
"""
_tasks_supported = [
'seq-cls',
'token-cls',
]
def _batchify(self, data: List[List[str]], max_seq_len: int, batch_size: int):
def _parse_batch(batch):
input_ids = [entry[0] for entry in batch]
segment_ids = [entry[1] for entry in batch]
return input_ids, segment_ids
tokenizer = self.get_tokenizer() tokenizer = self.get_tokenizer()
results = [] examples = []
for text in texts: for text in data:
if len(text) == 1: if len(text) == 1:
encoded_inputs = tokenizer.encode(text[0], text_pair=None, pad_to_max_seq_len=False) encoded_inputs = tokenizer.encode(text[0], text_pair=None, max_seq_len=max_seq_len)
elif len(text) == 2: elif len(text) == 2:
encoded_inputs = tokenizer.encode(text[0], text_pair=text[1], pad_to_max_seq_len=False) encoded_inputs = tokenizer.encode(text[0], text_pair=text[1], max_seq_len=max_seq_len)
else: else:
raise RuntimeError( raise RuntimeError(
'The input text must have one or two sequence, but got %d. Please check your inputs.' % len(text)) 'The input text must have one or two sequence, but got %d. Please check your inputs.' % len(text))
examples.append((encoded_inputs['input_ids'], encoded_inputs['segment_ids']))
input_ids = paddle.to_tensor(encoded_inputs['input_ids']).unsqueeze(0) # Seperates data into some batches.
segment_ids = paddle.to_tensor(encoded_inputs['segment_ids']).unsqueeze(0) one_batch = []
sequence_output, pooled_output = self(input_ids, segment_ids) for example in examples:
one_batch.append(example)
sequence_output = sequence_output.squeeze(0) if len(one_batch) == batch_size:
pooled_output = pooled_output.squeeze(0) yield _parse_batch(one_batch)
results.append((sequence_output.numpy().tolist(), pooled_output.numpy().tolist())) one_batch = []
return results if one_batch:
# The last batch whose size is less than the config batch_size setting.
yield _parse_batch(one_batch)
class TransformerModule(RunModule, EmbeddingServing):
_tasks_supported = [
'seq-cls',
'token-cls',
]
def training_step(self, batch: List[paddle.Tensor], batch_idx: int): def training_step(self, batch: List[paddle.Tensor], batch_idx: int):
""" """
...@@ -408,12 +469,41 @@ class TransformerModule(RunModule, EmbeddingServing): ...@@ -408,12 +469,41 @@ class TransformerModule(RunModule, EmbeddingServing):
predictions, avg_loss, acc = self(input_ids=batch[0], token_type_ids=batch[1], labels=batch[2]) predictions, avg_loss, acc = self(input_ids=batch[0], token_type_ids=batch[1], labels=batch[2])
return {'metrics': {'acc': acc}} return {'metrics': {'acc': acc}}
def predict(self, data, max_seq_len=128, batch_size=1, use_gpu=False): def get_embedding(self, data: List[List[str]], max_seq_len=128, batch_size=1, use_gpu=False):
"""
Get token level embeddings and sentence level embeddings from model.
Args:
data (obj:`List(List(str))`): The processed data whose each element is the list of a single text or a pair of texts.
max_seq_len (:obj:`int`, `optional`, defaults to :int:`None`):
If set to a number, will limit the total sequence returned so that it has a maximum length.
batch_size(obj:`int`, defaults to 1): The number of batch.
use_gpu(obj:`bool`, defaults to `False`): Whether to use gpu to run or not.
Returns:
results(obj:`list`): All the tokens and sentences embeddings.
"""
if self.task is not None:
raise RuntimeError("The get_embedding method is only valid when task is None, but got task %s" % self.task)
return self.predict(
data=data,
max_seq_len=max_seq_len,
batch_size=batch_size,
use_gpu=use_gpu
)
def predict(
self,
data: List[List[str]],
max_seq_len: int = 128,
batch_size: int = 1,
use_gpu: bool = False
):
""" """
Predicts the data labels. Predicts the data labels.
Args: Args:
data (obj:`List(str)`): The processed data whose each element is the raw text. data (obj:`List(List(str))`): The processed data whose each element is the list of a single text or a pair of texts.
max_seq_len (:obj:`int`, `optional`, defaults to :int:`None`): max_seq_len (:obj:`int`, `optional`, defaults to :int:`None`):
If set to a number, will limit the total sequence returned so that it has a maximum length. If set to a number, will limit the total sequence returned so that it has a maximum length.
batch_size(obj:`int`, defaults to 1): The number of batch. batch_size(obj:`int`, defaults to 1): The number of batch.
...@@ -422,45 +512,22 @@ class TransformerModule(RunModule, EmbeddingServing): ...@@ -422,45 +512,22 @@ class TransformerModule(RunModule, EmbeddingServing):
Returns: Returns:
results(obj:`list`): All the predictions labels. results(obj:`list`): All the predictions labels.
""" """
if self.task not in self._tasks_supported: if self.task not in self._tasks_supported \
raise RuntimeError("The predict method supports task in {}, but got task {}.".format( and self.task is not None: # None for getting embedding
self._tasks_supported, self.task)) raise RuntimeError(
f'Unknown task {self.task}, current tasks supported:\n'
'1. seq-cls: sequence classification;\n'
'2. token-cls: sequence labeling;\n'
'3. None: embedding'
)
paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu') paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu')
tokenizer = self.get_tokenizer()
examples = []
for text in data:
if len(text) == 1:
encoded_inputs = tokenizer.encode(text[0], text_pair=None, max_seq_len=max_seq_len)
elif len(text) == 2:
encoded_inputs = tokenizer.encode(text[0], text_pair=text[1], max_seq_len=max_seq_len)
else:
raise RuntimeError(
'The input text must have one or two sequence, but got %d. Please check your inputs.' % len(text))
examples.append((encoded_inputs['input_ids'], encoded_inputs['segment_ids']))
def _batchify_fn(batch):
input_ids = [entry[0] for entry in batch]
segment_ids = [entry[1] for entry in batch]
return input_ids, segment_ids
# Seperates data into some batches.
batches = []
one_batch = []
for example in examples:
one_batch.append(example)
if len(one_batch) == batch_size:
batches.append(one_batch)
one_batch = []
if one_batch:
# The last batch whose size is less than the config batch_size setting.
batches.append(one_batch)
batches = self._batchify(data, max_seq_len, batch_size)
results = [] results = []
self.eval() self.eval()
for batch in batches: for batch in batches:
input_ids, segment_ids = _batchify_fn(batch) input_ids, segment_ids = batch
input_ids = paddle.to_tensor(input_ids) input_ids = paddle.to_tensor(input_ids)
segment_ids = paddle.to_tensor(segment_ids) segment_ids = paddle.to_tensor(segment_ids)
...@@ -476,5 +543,11 @@ class TransformerModule(RunModule, EmbeddingServing): ...@@ -476,5 +543,11 @@ class TransformerModule(RunModule, EmbeddingServing):
batch_ids = batch_ids.tolist() batch_ids = batch_ids.tolist()
token_labels = [[self.label_map[i] for i in token_ids] for token_ids in batch_ids] token_labels = [[self.label_map[i] for i in token_ids] for token_ids in batch_ids]
results.extend(token_labels) results.extend(token_labels)
elif self.task == None:
if not results:
results = [[], []]
sequence_output, pooled_output = self(input_ids, segment_ids)
results[0].extend(sequence_output.numpy().tolist()) # token-level embedding
results[1].extend(pooled_output.numpy().tolist()) # sentence-level embedding
return results return results
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册