executor.py 7.8 KB
Newer Older
K
KP 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
K
KP 已提交
14
import logging
K
KP 已提交
15
import os
K
KP 已提交
16
import sys
K
KP 已提交
17 18
from abc import ABC
from abc import abstractmethod
K
KP 已提交
19
from collections import OrderedDict
K
KP 已提交
20
from typing import Any
K
KP 已提交
21
from typing import Dict
K
KP 已提交
22
from typing import List
K
KP 已提交
23 24 25 26
from typing import Union

import paddle

K
KP 已提交
27
from .log import logger
小湉湉's avatar
小湉湉 已提交
28 29
from .utils import download_and_decompress
from .utils import MODEL_HOME
K
KP 已提交
30

K
KP 已提交
31 32 33 34 35 36 37

class BaseExecutor(ABC):
    """
        An abstract executor of paddlespeech tasks.
    """

    def __init__(self):
K
KP 已提交
38 39
        self._inputs = OrderedDict()
        self._outputs = OrderedDict()
小湉湉's avatar
小湉湉 已提交
40 41
        self.pretrained_models = OrderedDict()
        self.model_alias = OrderedDict()
K
KP 已提交
42 43

    @abstractmethod
K
KP 已提交
44
    def _init_from_path(self, *args, **kwargs):
K
KP 已提交
45
        """
K
KP 已提交
46
        Init model and other resources from arguments. This method should be called by `__call__()`.
K
KP 已提交
47 48 49 50
        """
        pass

    @abstractmethod
K
KP 已提交
51
    def preprocess(self, input: Any, *args, **kwargs):
K
KP 已提交
52
        """
K
KP 已提交
53 54 55 56 57
        Input preprocess and return paddle.Tensor stored in self._inputs.
        Input content can be a text(tts), a file(asr, cls), a stream(not supported yet) or anything needed.

        Args:
            input (Any): Input text/file/stream or other content.
K
KP 已提交
58 59 60 61 62
        """
        pass

    @paddle.no_grad()
    @abstractmethod
K
KP 已提交
63
    def infer(self, *args, **kwargs):
K
KP 已提交
64
        """
K
KP 已提交
65 66
        Model inference and put results into self._outputs.
        This method get input tensors from self._inputs, and write output tensors into self._outputs.
K
KP 已提交
67 68 69 70
        """
        pass

    @abstractmethod
K
KP 已提交
71
    def postprocess(self, *args, **kwargs) -> Union[str, os.PathLike]:
K
KP 已提交
72
        """
K
KP 已提交
73 74 75 76 77
        Output postprocess and return results.
        This method get model output from self._outputs and convert it into human-readable results.

        Returns:
            Union[str, os.PathLike]: Human-readable results such as texts and audio files.
K
KP 已提交
78 79
        """
        pass
K
KP 已提交
80 81 82 83

    @abstractmethod
    def execute(self, argv: List[str]) -> bool:
        """
K
KP 已提交
84 85 86 87 88 89 90
        Command line entry. This method can only be accessed by a command line such as `paddlespeech asr`.

        Args:
            argv (List[str]): Arguments from command line.

        Returns:
            int: Result of the command execution. `True` for a success and `False` for a failure.
K
KP 已提交
91 92 93 94 95 96
        """
        pass

    @abstractmethod
    def __call__(self, *arg, **kwargs):
        """
K
KP 已提交
97
        Python API to call an executor.
K
KP 已提交
98 99
        """
        pass
K
KP 已提交
100

K
KP 已提交
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
    def get_task_source(self, input_: Union[str, os.PathLike, None]
                        ) -> Dict[str, Union[str, os.PathLike]]:
        """
        Get task input source from command line input.

        Args:
            input_ (Union[str, os.PathLike, None]): Input from command line.

        Returns:
            Dict[str, Union[str, os.PathLike]]: A dict with ids and inputs.
        """
        if self._is_job_input(input_):
            ret = self._get_job_contents(input_)
        else:
            ret = OrderedDict()

            if input_ is None:  # Take input from stdin
                for i, line in enumerate(sys.stdin):
                    line = line.strip()
                    if len(line.split(' ')) == 1:
                        ret[str(i + 1)] = line
                    elif len(line.split(' ')) == 2:
                        id_, info = line.split(' ')
                        ret[id_] = info
                    else:  # No valid input info from one line.
                        continue
            else:
                ret[1] = input_
        return ret

    def process_task_results(self,
                             input_: Union[str, os.PathLike, None],
                             results: Dict[str, os.PathLike],
                             job_dump_result: bool=False):
        """
        Handling task results and redirect stdout if needed.

        Args:
            input_ (Union[str, os.PathLike, None]): Input from command line.
            results (Dict[str, os.PathLike]): Task outputs.
            job_dump_result (bool, optional): if True, dumps job results into file. Defaults to False.
        """

K
KP 已提交
144 145 146 147 148 149 150
        if not self._is_job_input(input_) and len(
                results) == 1:  # Only one input sample
            raw_text = list(results.values())[0]
        else:
            raw_text = self._format_task_results(results)

        print(raw_text, end='')  # Stdout
K
KP 已提交
151

K
KP 已提交
152 153
        if self._is_job_input(
                input_) and job_dump_result:  # Dump to *.job.done 
K
KP 已提交
154 155 156 157 158 159 160 161
            try:
                job_output_file = os.path.abspath(input_) + '.done'
                sys.stdout = open(job_output_file, 'w')
                print(raw_text, end='')
                logger.info(f'Results had been saved to: {job_output_file}')
            finally:
                sys.stdout.close()

K
KP 已提交
162 163 164 165 166 167 168 169 170 171
    def _is_job_input(self, input_: Union[str, os.PathLike]) -> bool:
        """
        Check if current input file is a job input or not.

        Args:
            input_ (Union[str, os.PathLike]): Input file of current task.

        Returns:
            bool: return `True` for job input, `False` otherwise.
        """
K
KP 已提交
172 173
        return input_ and os.path.isfile(input_) and (input_.endswith('.job') or
                                                      input_.endswith('.txt'))
K
KP 已提交
174

K
KP 已提交
175 176
    def _get_job_contents(
            self, job_input: os.PathLike) -> Dict[str, Union[str, os.PathLike]]:
K
KP 已提交
177 178 179 180 181 182 183 184 185
        """
        Read a job input file and return its contents in a dictionary.

        Args:
            job_input (os.PathLike): The job input file.

        Returns:
            Dict[str, str]: Contents of job input.
        """
K
KP 已提交
186
        job_contents = OrderedDict()
K
KP 已提交
187 188 189 190 191 192 193 194 195
        with open(job_input) as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                k, v = line.split(' ')
                job_contents[k] = v
        return job_contents

K
KP 已提交
196 197
    def _format_task_results(
            self, results: Dict[str, Union[str, os.PathLike]]) -> str:
K
KP 已提交
198
        """
K
KP 已提交
199
        Convert task results to raw text.
K
KP 已提交
200 201

        Args:
K
KP 已提交
202
            results (Dict[str, str]): A dictionary of task results.
K
KP 已提交
203 204

        Returns:
K
KP 已提交
205
            str: A string object contains task results.
K
KP 已提交
206 207
        """
        ret = ''
K
KP 已提交
208
        for k, v in results.items():
K
KP 已提交
209 210
            ret += f'{k} {v}\n'
        return ret
K
KP 已提交
211 212 213 214 215 216 217 218 219 220

    def disable_task_loggers(self):
        """
        Disable all loggers in current task.
        """
        loggers = [
            logging.getLogger(name) for name in logging.root.manager.loggerDict
        ]
        for l in loggers:
            l.disabled = True
小湉湉's avatar
小湉湉 已提交
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237

    def _get_pretrained_path(self, tag: str) -> os.PathLike:
        """
        Download and returns pretrained resources path of current task.
        """
        support_models = list(self.pretrained_models.keys())
        assert tag in self.pretrained_models, 'The model "{}" you want to use has not been supported, please choose other models.\nThe support models includes:\n\t\t{}\n'.format(
            tag, '\n\t\t'.join(support_models))

        res_path = os.path.join(MODEL_HOME, tag)
        decompressed_path = download_and_decompress(self.pretrained_models[tag],
                                                    res_path)
        decompressed_path = os.path.abspath(decompressed_path)
        logger.info(
            'Use pretrained model stored in: {}'.format(decompressed_path))

        return decompressed_path