Add predict_method for predict serving

15e8e205 · KP · GitHub · 29624f88 · 15e8e205
隐藏空白更改
内联并排

Showing with 133 addition and 60 deletion

paddlehub/module/nlp_module.py paddlehub/module/nlp_module.py +133 -60

未找到文件。
--- a/paddlehub/module/nlp_module.py
+++ b/paddlehub/module/nlp_module.py
@@ -21,7 +21,7 @@ import io
 import json
 import os
 import six
-from typing import List
+from typing import List, Tuple
 import paddle
 import paddle.nn as nn
@@ -347,40 +347,101 @@ class PretrainedModel(nn.Layer):
        paddle.save(self.state_dict(), file_name)
-class EmbeddingServing(object):
+class TextServing(object):
+    """
+    A base class for text model which supports serving.
+    """
    @serving
-    def get_embedding(self, texts, use_gpu=False):
+    def predict_method(
-        if self.task is not None:
+            self,
-            raise RuntimeError("The get_embedding method is only valid when task is None, but got task %s" % self.task)
+            data: List[List[str]],
+            max_seq_len: int = 128,
+            batch_size: int = 1,
+            use_gpu: bool = False
+    ):
+        """
+        Run predict method as a service.
+        Serving as a task which is specified from serving config.
+        Tasks supported:
+        1. seq-cls: sequence classification;
+        2. token-cls: sequence labeling;
+        3. None: embedding.
-        paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu')
+        Args:
+            data (obj:`List(List(str))`): The processed data whose each element is the list of a single text or a pair of texts.
+            max_seq_len (:obj:`int`, `optional`, defaults to 128):
+                If set to a number, will limit the total sequence returned so that it has a maximum length.
+            batch_size(obj:`int`, defaults to 1): The number of batch.
+            use_gpu(obj:`bool`, defaults to `False`): Whether to use gpu to run or not.
+        Returns:
+            results(obj:`list`): All the predictions labels.
+        """
+        if self.task in self._tasks_supported:  # cls service
+            if self.label_map:
+                # compatible with json decoding label_map
+                self.label_map = {int(k): v for k, v in self.label_map.items()}
+            results = self.predict(data, max_seq_len, batch_size, use_gpu)
+            if self.task == 'token-cls':
+                # remove labels of [CLS] token and pad tokens
+                results = [
+                    token_labels[1:len(data[i][0])+1] for i, token_labels in enumerate(results)
+                ]
+            return results
+        elif self.task is None:                 # embedding service
+            token_results, sentence_results = self.get_embedding(data, max_seq_len, batch_size, use_gpu)
+            token_results = [
+                token_embeddings[1:len(data[i][0])+1] for i, token_embeddings in enumerate(token_results)
+            ]
+            return token_results, sentence_results
+        else:                                   # unknown service
+            logger.error(
+                f'Unknown task {self.task}, current tasks supported:\n'
+                '1. seq-cls: sequence classification service;\n'
+                '2. token-cls: sequence labeling service;\n'
+                '3. None: embedding service'
+            )
+        return
+class TransformerModule(RunModule, TextServing):
+    """
+    The base class for Transformer models.
+    """
+    _tasks_supported = [
+        'seq-cls',
+        'token-cls',
+    ]
+    def _batchify(self, data: List[List[str]], max_seq_len: int, batch_size: int):
+        def _parse_batch(batch):
+            input_ids = [entry[0] for entry in batch]
+            segment_ids = [entry[1] for entry in batch]
+            return input_ids, segment_ids
        tokenizer = self.get_tokenizer()
-        results = []
+        examples = []
-        for text in texts:
+        for text in data:
            if len(text) == 1:
-                encoded_inputs = tokenizer.encode(text[0], text_pair=None, pad_to_max_seq_len=False)
+                encoded_inputs = tokenizer.encode(text[0], text_pair=None, max_seq_len=max_seq_len)
            elif len(text) == 2:
-                encoded_inputs = tokenizer.encode(text[0], text_pair=text[1], pad_to_max_seq_len=False)
+                encoded_inputs = tokenizer.encode(text[0], text_pair=text[1], max_seq_len=max_seq_len)
            else:
                raise RuntimeError(
                    'The input text must have one or two sequence, but got %d. Please check your inputs.' % len(text))
+            examples.append((encoded_inputs['input_ids'], encoded_inputs['segment_ids']))
-            input_ids = paddle.to_tensor(encoded_inputs['input_ids']).unsqueeze(0)
+        # Seperates data into some batches.
-            segment_ids = paddle.to_tensor(encoded_inputs['segment_ids']).unsqueeze(0)
+        one_batch = []
-            sequence_output, pooled_output = self(input_ids, segment_ids)
+        for example in examples:
+            one_batch.append(example)
-            sequence_output = sequence_output.squeeze(0)
+            if len(one_batch) == batch_size:
-            pooled_output = pooled_output.squeeze(0)
+                yield _parse_batch(one_batch)
-            results.append((sequence_output.numpy().tolist(), pooled_output.numpy().tolist()))
+                one_batch = []
-        return results
+        if one_batch:
+            # The last batch whose size is less than the config batch_size setting.
+            yield _parse_batch(one_batch)
-class TransformerModule(RunModule, EmbeddingServing):
-    _tasks_supported = [
-        'seq-cls',
-        'token-cls',
-    ]
    def training_step(self, batch: List[paddle.Tensor], batch_idx: int):
        """
@@ -408,12 +469,41 @@ class TransformerModule(RunModule, EmbeddingServing):
        predictions, avg_loss, acc = self(input_ids=batch[0], token_type_ids=batch[1], labels=batch[2])
        return {'metrics': {'acc': acc}}
-    def predict(self, data, max_seq_len=128, batch_size=1, use_gpu=False):
+    def get_embedding(self, data: List[List[str]], max_seq_len=128, batch_size=1, use_gpu=False):
+        """
+        Get token level embeddings and sentence level embeddings from model.
+        Args:
+            data (obj:`List(List(str))`): The processed data whose each element is the list of a single text or a pair of texts.
+            max_seq_len (:obj:`int`, `optional`, defaults to :int:`None`):
+                If set to a number, will limit the total sequence returned so that it has a maximum length.
+            batch_size(obj:`int`, defaults to 1): The number of batch.
+            use_gpu(obj:`bool`, defaults to `False`): Whether to use gpu to run or not.
+        Returns:
+            results(obj:`list`): All the tokens and sentences embeddings.
+        """
+        if self.task is not None:
+            raise RuntimeError("The get_embedding method is only valid when task is None, but got task %s" % self.task)
+        return self.predict(
+            data=data,
+            max_seq_len=max_seq_len,
+            batch_size=batch_size,
+            use_gpu=use_gpu
+        )
+    def predict(
+            self,
+            data: List[List[str]],
+            max_seq_len: int = 128,
+            batch_size: int = 1,
+            use_gpu: bool = False
+    ):
        """
        Predicts the data labels.
        Args:
-            data (obj:`List(str)`): The processed data whose each element is the raw text.
+            data (obj:`List(List(str))`): The processed data whose each element is the list of a single text or a pair of texts.
            max_seq_len (:obj:`int`, `optional`, defaults to :int:`None`):
                If set to a number, will limit the total sequence returned so that it has a maximum length.
            batch_size(obj:`int`, defaults to 1): The number of batch.
@@ -422,45 +512,22 @@ class TransformerModule(RunModule, EmbeddingServing):
        Returns:
            results(obj:`list`): All the predictions labels.
        """
-        if self.task not in self._tasks_supported:
+        if self.task not in self._tasks_supported \
-            raise RuntimeError("The predict method supports task in {}, but got task {}.".format(
+                and self.task is not None:      # None for getting embedding
-                self._tasks_supported, self.task))
+            raise RuntimeError(
+                f'Unknown task {self.task}, current tasks supported:\n'
+                '1. seq-cls: sequence classification;\n'
+                '2. token-cls: sequence labeling;\n'
+                '3. None: embedding'
+            )
        paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu')
-        tokenizer = self.get_tokenizer()
-        examples = []
-        for text in data:
-            if len(text) == 1:
-                encoded_inputs = tokenizer.encode(text[0], text_pair=None, max_seq_len=max_seq_len)
-            elif len(text) == 2:
-                encoded_inputs = tokenizer.encode(text[0], text_pair=text[1], max_seq_len=max_seq_len)
-            else:
-                raise RuntimeError(
-                    'The input text must have one or two sequence, but got %d. Please check your inputs.' % len(text))
-            examples.append((encoded_inputs['input_ids'], encoded_inputs['segment_ids']))
-        def _batchify_fn(batch):
-            input_ids = [entry[0] for entry in batch]
-            segment_ids = [entry[1] for entry in batch]
-            return input_ids, segment_ids
-        # Seperates data into some batches.
-        batches = []
-        one_batch = []
-        for example in examples:
-            one_batch.append(example)
-            if len(one_batch) == batch_size:
-                batches.append(one_batch)
-                one_batch = []
-        if one_batch:
-            # The last batch whose size is less than the config batch_size setting.
-            batches.append(one_batch)
+        batches = self._batchify(data, max_seq_len, batch_size)
        results = []
        self.eval()
        for batch in batches:
-            input_ids, segment_ids = _batchify_fn(batch)
+            input_ids, segment_ids = batch
            input_ids = paddle.to_tensor(input_ids)
            segment_ids = paddle.to_tensor(segment_ids)
@@ -476,5 +543,11 @@ class TransformerModule(RunModule, EmbeddingServing):
                batch_ids = batch_ids.tolist()
                token_labels = [[self.label_map[i] for i in token_ids] for token_ids in batch_ids]
                results.extend(token_labels)
+            elif self.task == None:
+                if not results:
+                    results = [[], []]
+                sequence_output, pooled_output = self(input_ids, segment_ids)
+                results[0].extend(sequence_output.numpy().tolist())  # token-level embedding
+                results[1].extend(pooled_output.numpy().tolist())    # sentence-level embedding
        return results