error.py 16.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import os
16 17
import sys
import traceback
18
import linecache
19
import re
20
import numpy as np  # noqa: F401
21

22
from .origin_info import (
23 24 25 26
    Location,
    OriginInfo,
    global_origin_info_map,
)
27 28 29 30 31
from .utils import _is_api_in_module_helper  # noqa: F401
from .utils import RE_PYMODULE


__all__ = []
32 33 34

ERROR_DATA = "Error data about original source code information and traceback."

35 36 37 38 39 40 41 42
# A flag to set whether to open the dygraph2static error reporting module
SIMPLIFY_ERROR_ENV_NAME = "TRANSLATOR_SIMPLIFY_NEW_ERROR"
DEFAULT_SIMPLIFY_NEW_ERROR = 1

# A flag to set whether to display the simplified error stack
DISABLE_ERROR_ENV_NAME = "TRANSLATOR_DISABLE_NEW_ERROR"
DEFAULT_DISABLE_NEW_ERROR = 0

43 44 45
SOURCE_CODE_RANGE = 5
BLANK_COUNT_BEFORE_FILE_STR = 4

46

47
def attach_error_data(error, in_runtime=False):
48 49 50 51 52
    """
    Attachs error data about original source code information and traceback to an error.

    Args:
        error(Exception): An native error.
53
        in_runtime(bool): `error` is raised in runtime if in_runtime is True, otherwise in compile time
54 55 56
    Returns:
        An error attached data about original source code information and traceback.
    """
57

58 59 60 61
    e_type, e_value, e_traceback = sys.exc_info()
    tb = traceback.extract_tb(e_traceback)[1:]

    error_data = ErrorData(e_type, e_value, tb, global_origin_info_map)
62 63
    error_data.in_runtime = in_runtime

64 65 66 67 68 69 70 71 72 73 74 75 76 77
    setattr(error, ERROR_DATA, error_data)

    return error


class TraceBackFrame(OriginInfo):
    """
    Traceback frame information.
    """

    def __init__(self, location, function_name, source_code):
        self.location = location
        self.function_name = function_name
        self.source_code = source_code
78
        self.error_line = ''
79

80
    def formated_message(self):
81 82
        # self.source_code may be empty in some functions.
        # For example, decorator generated function
83 84 85 86 87 88 89 90 91 92 93
        return (
            ' ' * BLANK_COUNT_BEFORE_FILE_STR
            + 'File "{}", line {}, in {}\n\t{}'.format(
                self.location.filepath,
                self.location.lineno,
                self.function_name,
                self.source_code.lstrip()
                if isinstance(self.source_code, str)
                else self.source_code,
            )
        )
94

95

96 97 98 99 100 101 102 103 104
class TraceBackFrameRange(OriginInfo):
    """
    Traceback frame information.
    """

    def __init__(self, location, function_name):
        self.location = location
        self.function_name = function_name
        self.source_code = []
105
        self.error_line = ''
106 107 108 109
        blank_count = []
        begin_lineno = max(1, self.location.lineno - int(SOURCE_CODE_RANGE / 2))

        for i in range(begin_lineno, begin_lineno + SOURCE_CODE_RANGE):
110 111
            line = linecache.getline(self.location.filepath, i).rstrip('\n')
            line_lstrip = line.lstrip()
112
            self.source_code.append(line_lstrip)
113 114 115 116
            if not line_lstrip:  # empty line from source code
                blank_count.append(-1)
            else:
                blank_count.append(len(line) - len(line_lstrip))
117 118

            if i == self.location.lineno:
119
                self.error_line = self.source_code[-1]
120 121 122 123
                hint_msg = '~' * len(self.source_code[-1]) + ' <--- HERE'
                self.source_code.append(hint_msg)
                blank_count.append(blank_count[-1])
        linecache.clearcache()
124 125 126 127 128 129 130 131 132
        # remove top and bottom empty line in source code
        while len(self.source_code) > 0 and not self.source_code[0]:
            self.source_code.pop(0)
            blank_count.pop(0)
        while len(self.source_code) > 0 and not self.source_code[-1]:
            self.source_code.pop(-1)
            blank_count.pop(-1)

        min_black_count = min([i for i in blank_count if i >= 0])
133
        for i in range(len(self.source_code)):
134 135
            # if source_code[i] is empty line between two code line, dont add blank
            if self.source_code[i]:
136 137 138 139 140 141 142 143 144
                self.source_code[i] = (
                    ' '
                    * (
                        blank_count[i]
                        - min_black_count
                        + BLANK_COUNT_BEFORE_FILE_STR * 2
                    )
                    + self.source_code[i]
                )
145 146

    def formated_message(self):
147 148 149 150 151 152
        msg = (
            ' ' * BLANK_COUNT_BEFORE_FILE_STR
            + 'File "{}", line {}, in {}\n'.format(
                self.location.filepath, self.location.lineno, self.function_name
            )
        )
153
        # add empty line after range code
154
        return msg + '\n'.join(self.source_code)
155 156


157
class SuggestionDict:
158 159 160
    def __init__(self):
        # {(keywords): (suggestions)}
        self.suggestion_dict = {
161 162 163 164
            ('is not initialized.', 'Hint:', 'IsInitialized'): (
                "Please ensure all your sublayers are inheritted from nn.Layer.",
                "Please ensure there is no tensor created explicitly depended on external data, we suggest to register it as buffer tensor. See https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/04_dygraph_to_static/export_model/principle_cn.html#parameters-buffers for details",
            )
165 166 167 168 169 170 171 172 173
        }

    def keys(self):
        return self.suggestion_dict.keys()

    def __getitem__(self, key):
        return self.suggestion_dict[key]


174 175 176 177
class Dy2StKeyError(Exception):
    pass


178
class ErrorData:
179 180 181 182
    """
    Error data attached to an exception which is raised in un-transformed code.
    """

183 184 185
    def __init__(
        self, error_type, error_value, origin_traceback, origin_info_map
    ):
186 187 188 189
        self.error_type = error_type
        self.error_value = error_value
        self.origin_traceback = origin_traceback
        self.origin_info_map = origin_info_map
190
        self.in_runtime = False
191
        self.suggestion_dict = SuggestionDict()
192 193 194

    def create_exception(self):
        message = self.create_message()
195 196 197 198
        if self.error_type is KeyError:
            new_exception = Dy2StKeyError(message)
        else:
            new_exception = self.error_type(message)
199 200 201
        setattr(new_exception, ERROR_DATA, self)
        return new_exception

202 203 204 205 206 207 208 209
    def numpy_api_check(self, format_exception, error_line):
        if self.error_type is not TypeError:
            return format_exception

        tb = self.origin_traceback
        func_str = None
        for frame in tb:
            searched_name = re.search(
210 211 212 213 214
                r'({module})*{name}'.format(
                    module=RE_PYMODULE, name=frame.name
                ),
                error_line,
            )
215 216 217 218
            if searched_name:
                func_str = searched_name.group(0)
                break
        try:
219 220 221 222 223 224
            module_result = eval(
                "_is_api_in_module_helper({}, '{}')".format(func_str, "numpy")
            )
            is_numpy_api_err = module_result or (
                func_str.startswith("numpy.") or func_str.startswith("np.")
            )
225 226 227 228 229
        except Exception:
            is_numpy_api_err = False

        if is_numpy_api_err and func_str:
            return [
230 231 232 233
                "TypeError: Code '{}' called numpy API {}, please use Paddle API to replace it.".format(
                    error_line, func_str
                ),
                "           values will be changed to variables by dy2static, numpy api can not handle variables",
234 235 236 237
            ]
        else:
            return format_exception

238 239 240 241 242 243 244
    def create_message(self):
        """
        Creates a custom error message which includes trace stack with source code information of dygraph from user.
        """
        message_lines = []

        # Step1: Adds header message to prompt users that the following is the original information.
245
        header_message = "In transformed code:"
246 247
        message_lines.append(header_message)
        message_lines.append("")
248
        error_line = None
249

250 251
        # Simplify error value to improve readability if error is raised in runtime
        if self.in_runtime:
252
            if int(
253 254
                os.getenv(SIMPLIFY_ERROR_ENV_NAME, DEFAULT_SIMPLIFY_NEW_ERROR)
            ):
255
                self._simplify_error_value()
256 257 258
            message_lines.append(str(self.error_value))
            return '\n'.join(message_lines)

259
        # Step2: Optimizes stack information with source code information of dygraph from user.
260
        user_code_traceback_index = []
261 262 263 264 265 266
        for i, (filepath, lineno, funcname, code) in enumerate(
            self.origin_traceback
        ):
            dygraph_func_info = self.origin_info_map.get(
                (filepath, lineno), None
            )
267
            if dygraph_func_info:
268 269 270 271 272
                user_code_traceback_index.append(i)

        # Add user code traceback
        for i in user_code_traceback_index:
            filepath, lineno, funcname, code = self.origin_traceback[i]
273 274 275
            dygraph_func_info = self.origin_info_map.get(
                (filepath, lineno), None
            )
276 277
            if i == user_code_traceback_index[-1]:
                traceback_frame = TraceBackFrameRange(
278 279
                    dygraph_func_info.location, dygraph_func_info.function_name
                )
280 281
            else:
                traceback_frame = TraceBackFrame(
282 283 284 285
                    dygraph_func_info.location,
                    dygraph_func_info.function_name,
                    dygraph_func_info.source_code,
                )
286 287

            message_lines.append(traceback_frame.formated_message())
288
            error_line = traceback_frame.error_line
289 290 291
        message_lines.append("")

        # Add paddle traceback after user code traceback
292 293 294 295 296
        paddle_traceback_start_index = (
            user_code_traceback_index[-1] + 1
            if user_code_traceback_index
            else 0
        )
297
        for filepath, lineno, funcname, code in self.origin_traceback[
298 299 300 301 302
            paddle_traceback_start_index:
        ]:
            traceback_frame = TraceBackFrame(
                Location(filepath, lineno), funcname, code
            )
303 304
            message_lines.append(traceback_frame.formated_message())
        message_lines.append("")
305 306

        # Step3: Adds error message like "TypeError: dtype must be int32, but received float32".
307 308
        # NOTE: `format_exception` is a list, its length is 1 in most cases, but sometimes its length
        # is gather than 1, for example, the error_type is IndentationError.
309
        format_exception = traceback.format_exception_only(
310 311
            self.error_type, self.error_value
        )
312
        if error_line is not None:
313 314 315
            format_exception = self.numpy_api_check(
                format_exception, error_line
            )
316

317 318 319 320
        error_message = [
            " " * BLANK_COUNT_BEFORE_FILE_STR + line
            for line in format_exception
        ]
321
        message_lines.extend(error_message)
322 323

        return '\n'.join(message_lines)
324

325 326
    def _create_revise_suggestion(self, bottom_error_message):
        revise_suggestions = [
327 328
            '',
            ' ' * BLANK_COUNT_BEFORE_FILE_STR + 'Revise suggestion: ',
329 330 331 332 333 334
        ]
        for keywords in self.suggestion_dict.keys():
            contain_keywords = [
                True for i in keywords if i in ''.join(bottom_error_message)
            ]
            if len(contain_keywords) == len(
335 336
                keywords
            ):  # all keywords should be in bottom_error_message
337
                for suggestion in self.suggestion_dict[keywords]:
338 339 340 341 342 343
                    suggestion_msg = (
                        ' ' * BLANK_COUNT_BEFORE_FILE_STR * 2
                        + '{}. {}'.format(
                            str(len(revise_suggestions) - 1), suggestion
                        )
                    )
344 345 346
                    revise_suggestions.append(suggestion_msg)
        return revise_suggestions if len(revise_suggestions) > 2 else []

347 348 349 350 351 352 353 354 355 356
    def _simplify_error_value(self):
        """
        Simplifies error value to improve readability if error is raised in runtime.

        NOTE(liym27): The op callstack information about transformed static code has been replaced with original dygraph code.

        TODO(liym27):
            1. Need a more robust way because the code of start_trace may change.
            2. Set the switch to determine whether to simplify error_value
        """
357

358 359 360 361 362 363 364
        assert self.in_runtime is True

        error_value_lines = str(self.error_value).split("\n")
        error_value_lines_strip = [mes.lstrip(" ") for mes in error_value_lines]

        start_trace = "outputs = static_func(*inputs)"
        start_idx = error_value_lines_strip.index(start_trace)
365

366 367
        error_value_lines = error_value_lines[start_idx + 1 :]
        error_value_lines_strip = error_value_lines_strip[start_idx + 1 :]
368 369 370

        # use empty line to locate the bottom_error_message
        empty_line_idx = error_value_lines_strip.index('')
371
        bottom_error_message = error_value_lines[empty_line_idx + 1 :]
372
        revise_suggestion = self._create_revise_suggestion(bottom_error_message)
373

374 375
        error_traceback = []
        user_code_traceback_index = []
376
        pattern = 'File "(?P<filepath>.+)", line (?P<lineno>.+), in (?P<function_name>.+)'
377 378 379 380 381 382 383 384

        # Distinguish user code and framework code using static_info_map
        static_info_map = {}
        for k, v in self.origin_info_map.items():
            origin_filepath = v.location.filepath
            origin_lineno = v.location.lineno
            static_info_map[(origin_filepath, origin_lineno)] = k

385 386 387 388
        for i in range(0, len(error_value_lines_strip), 2):
            if error_value_lines_strip[i].startswith("File "):
                re_result = re.search(pattern, error_value_lines_strip[i])
                tmp_filepath, lineno_str, function_name = re_result.groups()
389 390 391 392 393
                code = (
                    error_value_lines_strip[i + 1]
                    if i + 1 < len(error_value_lines_strip)
                    else ''
                )
394 395

                if static_info_map.get((tmp_filepath, int(lineno_str))):
396 397 398
                    user_code_traceback_index.append(len(error_traceback))

                error_traceback.append(
399 400
                    (tmp_filepath, int(lineno_str), function_name, code)
                )
401 402

        error_frame = []
403 404 405 406 407
        # Add user code traceback
        for i in user_code_traceback_index:
            filepath, lineno, funcname, code = error_traceback[i]
            if i == user_code_traceback_index[-1]:
                traceback_frame = TraceBackFrameRange(
408 409
                    Location(filepath, lineno), funcname
                )
410
            else:
411 412 413
                traceback_frame = TraceBackFrame(
                    Location(filepath, lineno), funcname, code
                )
414 415 416 417
            error_frame.append(traceback_frame.formated_message())
        error_frame.append("")

        # Add paddle traceback after user code traceback
418 419 420 421 422
        paddle_traceback_start_index = (
            user_code_traceback_index[-1] + 1
            if user_code_traceback_index
            else 0
        )
423
        for filepath, lineno, funcname, code in error_traceback[
424 425 426 427 428
            paddle_traceback_start_index:
        ]:
            traceback_frame = TraceBackFrame(
                Location(filepath, lineno), funcname, code
            )
429 430
            error_frame.append(traceback_frame.formated_message())
        error_frame.append("")
431

432
        error_frame.extend(bottom_error_message)
433
        error_frame.extend(revise_suggestion)
434
        error_value_str = '\n'.join(error_frame)
435
        self.error_value = self.error_type(error_value_str)
436 437 438 439 440 441 442

    def raise_new_exception(self):
        # Raises the origin error if disable dygraph2static error module,
        if int(os.getenv(DISABLE_ERROR_ENV_NAME, DEFAULT_DISABLE_NEW_ERROR)):
            raise

        new_exception = self.create_exception()
443 444 445 446 447 448 449 450
        # NOTE(liym27):
        # Why `raise new_exception from None`?
        #
        # In Python 3, by default, an new exception is raised with trace information of the caught exception.
        # This only raises new_exception and hides unwanted implementation details from tracebacks of the
        # caught exception.

        raise new_exception from None