error.py 16.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import os
16 17
import sys
import traceback
18
import linecache
19
import re
20
import numpy as np
21

22 23 24 25 26 27 28 29 30
from paddle.fluid.dygraph.dygraph_to_static.origin_info import (
    Location,
    OriginInfo,
    global_origin_info_map,
)
from paddle.fluid.dygraph.dygraph_to_static.utils import (
    _is_api_in_module_helper,
    RE_PYMODULE,
)
31 32 33

ERROR_DATA = "Error data about original source code information and traceback."

34 35 36 37 38 39 40 41
# A flag to set whether to open the dygraph2static error reporting module
SIMPLIFY_ERROR_ENV_NAME = "TRANSLATOR_SIMPLIFY_NEW_ERROR"
DEFAULT_SIMPLIFY_NEW_ERROR = 1

# A flag to set whether to display the simplified error stack
DISABLE_ERROR_ENV_NAME = "TRANSLATOR_DISABLE_NEW_ERROR"
DEFAULT_DISABLE_NEW_ERROR = 0

42 43 44
SOURCE_CODE_RANGE = 5
BLANK_COUNT_BEFORE_FILE_STR = 4

45

46
def attach_error_data(error, in_runtime=False):
47 48 49 50 51
    """
    Attachs error data about original source code information and traceback to an error.

    Args:
        error(Exception): An native error.
52
        in_runtime(bool): `error` is raised in runtime if in_runtime is True, otherwise in compile time
53 54 55
    Returns:
        An error attached data about original source code information and traceback.
    """
56

57 58 59 60
    e_type, e_value, e_traceback = sys.exc_info()
    tb = traceback.extract_tb(e_traceback)[1:]

    error_data = ErrorData(e_type, e_value, tb, global_origin_info_map)
61 62
    error_data.in_runtime = in_runtime

63 64 65 66 67 68 69 70 71 72 73 74 75 76
    setattr(error, ERROR_DATA, error_data)

    return error


class TraceBackFrame(OriginInfo):
    """
    Traceback frame information.
    """

    def __init__(self, location, function_name, source_code):
        self.location = location
        self.function_name = function_name
        self.source_code = source_code
77
        self.error_line = ''
78

79
    def formated_message(self):
80 81
        # self.source_code may be empty in some functions.
        # For example, decorator generated function
82 83 84 85 86 87 88 89 90 91 92
        return (
            ' ' * BLANK_COUNT_BEFORE_FILE_STR
            + 'File "{}", line {}, in {}\n\t{}'.format(
                self.location.filepath,
                self.location.lineno,
                self.function_name,
                self.source_code.lstrip()
                if isinstance(self.source_code, str)
                else self.source_code,
            )
        )
93

94

95 96 97 98 99 100 101 102 103
class TraceBackFrameRange(OriginInfo):
    """
    Traceback frame information.
    """

    def __init__(self, location, function_name):
        self.location = location
        self.function_name = function_name
        self.source_code = []
104
        self.error_line = ''
105 106 107 108
        blank_count = []
        begin_lineno = max(1, self.location.lineno - int(SOURCE_CODE_RANGE / 2))

        for i in range(begin_lineno, begin_lineno + SOURCE_CODE_RANGE):
109 110
            line = linecache.getline(self.location.filepath, i).rstrip('\n')
            line_lstrip = line.lstrip()
111
            self.source_code.append(line_lstrip)
112 113 114 115
            if not line_lstrip:  # empty line from source code
                blank_count.append(-1)
            else:
                blank_count.append(len(line) - len(line_lstrip))
116 117

            if i == self.location.lineno:
118
                self.error_line = self.source_code[-1]
119 120 121 122
                hint_msg = '~' * len(self.source_code[-1]) + ' <--- HERE'
                self.source_code.append(hint_msg)
                blank_count.append(blank_count[-1])
        linecache.clearcache()
123 124 125 126 127 128 129 130 131
        # remove top and bottom empty line in source code
        while len(self.source_code) > 0 and not self.source_code[0]:
            self.source_code.pop(0)
            blank_count.pop(0)
        while len(self.source_code) > 0 and not self.source_code[-1]:
            self.source_code.pop(-1)
            blank_count.pop(-1)

        min_black_count = min([i for i in blank_count if i >= 0])
132
        for i in range(len(self.source_code)):
133 134
            # if source_code[i] is empty line between two code line, dont add blank
            if self.source_code[i]:
135 136 137 138 139 140 141 142 143
                self.source_code[i] = (
                    ' '
                    * (
                        blank_count[i]
                        - min_black_count
                        + BLANK_COUNT_BEFORE_FILE_STR * 2
                    )
                    + self.source_code[i]
                )
144 145

    def formated_message(self):
146 147 148 149 150 151
        msg = (
            ' ' * BLANK_COUNT_BEFORE_FILE_STR
            + 'File "{}", line {}, in {}\n'.format(
                self.location.filepath, self.location.lineno, self.function_name
            )
        )
152
        # add empty line after range code
153
        return msg + '\n'.join(self.source_code)
154 155


156 157 158 159
class SuggestionDict(object):
    def __init__(self):
        # {(keywords): (suggestions)}
        self.suggestion_dict = {
160 161 162 163
            ('is not initialized.', 'Hint:', 'IsInitialized'): (
                "Please ensure all your sublayers are inheritted from nn.Layer.",
                "Please ensure there is no tensor created explicitly depended on external data, we suggest to register it as buffer tensor. See https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/04_dygraph_to_static/export_model/principle_cn.html#parameters-buffers for details",
            )
164 165 166 167 168 169 170 171 172
        }

    def keys(self):
        return self.suggestion_dict.keys()

    def __getitem__(self, key):
        return self.suggestion_dict[key]


173 174 175 176
class Dy2StKeyError(Exception):
    pass


177 178 179 180 181
class ErrorData(object):
    """
    Error data attached to an exception which is raised in un-transformed code.
    """

182 183 184
    def __init__(
        self, error_type, error_value, origin_traceback, origin_info_map
    ):
185 186 187 188
        self.error_type = error_type
        self.error_value = error_value
        self.origin_traceback = origin_traceback
        self.origin_info_map = origin_info_map
189
        self.in_runtime = False
190
        self.suggestion_dict = SuggestionDict()
191 192 193

    def create_exception(self):
        message = self.create_message()
194 195 196 197
        if self.error_type is KeyError:
            new_exception = Dy2StKeyError(message)
        else:
            new_exception = self.error_type(message)
198 199 200
        setattr(new_exception, ERROR_DATA, self)
        return new_exception

201 202 203 204 205 206 207 208
    def numpy_api_check(self, format_exception, error_line):
        if self.error_type is not TypeError:
            return format_exception

        tb = self.origin_traceback
        func_str = None
        for frame in tb:
            searched_name = re.search(
209 210 211 212 213
                r'({module})*{name}'.format(
                    module=RE_PYMODULE, name=frame.name
                ),
                error_line,
            )
214 215 216 217
            if searched_name:
                func_str = searched_name.group(0)
                break
        try:
218 219 220 221 222 223
            module_result = eval(
                "_is_api_in_module_helper({}, '{}')".format(func_str, "numpy")
            )
            is_numpy_api_err = module_result or (
                func_str.startswith("numpy.") or func_str.startswith("np.")
            )
224 225 226 227 228
        except Exception:
            is_numpy_api_err = False

        if is_numpy_api_err and func_str:
            return [
229 230 231 232
                "TypeError: Code '{}' called numpy API {}, please use Paddle API to replace it.".format(
                    error_line, func_str
                ),
                "           values will be changed to variables by dy2static, numpy api can not handle variables",
233 234 235 236
            ]
        else:
            return format_exception

237 238 239 240 241 242 243
    def create_message(self):
        """
        Creates a custom error message which includes trace stack with source code information of dygraph from user.
        """
        message_lines = []

        # Step1: Adds header message to prompt users that the following is the original information.
244
        header_message = "In transformed code:"
245 246
        message_lines.append(header_message)
        message_lines.append("")
247
        error_line = None
248

249 250
        # Simplify error value to improve readability if error is raised in runtime
        if self.in_runtime:
251
            if int(
252 253
                os.getenv(SIMPLIFY_ERROR_ENV_NAME, DEFAULT_SIMPLIFY_NEW_ERROR)
            ):
254
                self._simplify_error_value()
255 256 257
            message_lines.append(str(self.error_value))
            return '\n'.join(message_lines)

258
        # Step2: Optimizes stack information with source code information of dygraph from user.
259
        user_code_traceback_index = []
260 261 262 263 264 265
        for i, (filepath, lineno, funcname, code) in enumerate(
            self.origin_traceback
        ):
            dygraph_func_info = self.origin_info_map.get(
                (filepath, lineno), None
            )
266
            if dygraph_func_info:
267 268 269 270 271
                user_code_traceback_index.append(i)

        # Add user code traceback
        for i in user_code_traceback_index:
            filepath, lineno, funcname, code = self.origin_traceback[i]
272 273 274
            dygraph_func_info = self.origin_info_map.get(
                (filepath, lineno), None
            )
275 276
            if i == user_code_traceback_index[-1]:
                traceback_frame = TraceBackFrameRange(
277 278
                    dygraph_func_info.location, dygraph_func_info.function_name
                )
279 280
            else:
                traceback_frame = TraceBackFrame(
281 282 283 284
                    dygraph_func_info.location,
                    dygraph_func_info.function_name,
                    dygraph_func_info.source_code,
                )
285 286

            message_lines.append(traceback_frame.formated_message())
287
            error_line = traceback_frame.error_line
288 289 290
        message_lines.append("")

        # Add paddle traceback after user code traceback
291 292 293 294 295
        paddle_traceback_start_index = (
            user_code_traceback_index[-1] + 1
            if user_code_traceback_index
            else 0
        )
296
        for filepath, lineno, funcname, code in self.origin_traceback[
297 298 299 300 301
            paddle_traceback_start_index:
        ]:
            traceback_frame = TraceBackFrame(
                Location(filepath, lineno), funcname, code
            )
302 303
            message_lines.append(traceback_frame.formated_message())
        message_lines.append("")
304 305

        # Step3: Adds error message like "TypeError: dtype must be int32, but received float32".
306 307
        # NOTE: `format_exception` is a list, its length is 1 in most cases, but sometimes its length
        # is gather than 1, for example, the error_type is IndentationError.
308
        format_exception = traceback.format_exception_only(
309 310
            self.error_type, self.error_value
        )
311
        if error_line is not None:
312 313 314
            format_exception = self.numpy_api_check(
                format_exception, error_line
            )
315

316 317 318 319
        error_message = [
            " " * BLANK_COUNT_BEFORE_FILE_STR + line
            for line in format_exception
        ]
320
        message_lines.extend(error_message)
321 322

        return '\n'.join(message_lines)
323

324 325
    def _create_revise_suggestion(self, bottom_error_message):
        revise_suggestions = [
326 327
            '',
            ' ' * BLANK_COUNT_BEFORE_FILE_STR + 'Revise suggestion: ',
328 329 330 331 332 333
        ]
        for keywords in self.suggestion_dict.keys():
            contain_keywords = [
                True for i in keywords if i in ''.join(bottom_error_message)
            ]
            if len(contain_keywords) == len(
334 335
                keywords
            ):  # all keywords should be in bottom_error_message
336
                for suggestion in self.suggestion_dict[keywords]:
337 338 339 340 341 342
                    suggestion_msg = (
                        ' ' * BLANK_COUNT_BEFORE_FILE_STR * 2
                        + '{}. {}'.format(
                            str(len(revise_suggestions) - 1), suggestion
                        )
                    )
343 344 345
                    revise_suggestions.append(suggestion_msg)
        return revise_suggestions if len(revise_suggestions) > 2 else []

346 347 348 349 350 351 352 353 354 355
    def _simplify_error_value(self):
        """
        Simplifies error value to improve readability if error is raised in runtime.

        NOTE(liym27): The op callstack information about transformed static code has been replaced with original dygraph code.

        TODO(liym27):
            1. Need a more robust way because the code of start_trace may change.
            2. Set the switch to determine whether to simplify error_value
        """
356

357 358 359 360 361 362 363
        assert self.in_runtime is True

        error_value_lines = str(self.error_value).split("\n")
        error_value_lines_strip = [mes.lstrip(" ") for mes in error_value_lines]

        start_trace = "outputs = static_func(*inputs)"
        start_idx = error_value_lines_strip.index(start_trace)
364

365 366
        error_value_lines = error_value_lines[start_idx + 1 :]
        error_value_lines_strip = error_value_lines_strip[start_idx + 1 :]
367 368 369

        # use empty line to locate the bottom_error_message
        empty_line_idx = error_value_lines_strip.index('')
370
        bottom_error_message = error_value_lines[empty_line_idx + 1 :]
371
        revise_suggestion = self._create_revise_suggestion(bottom_error_message)
372

373 374
        error_traceback = []
        user_code_traceback_index = []
375
        pattern = 'File "(?P<filepath>.+)", line (?P<lineno>.+), in (?P<function_name>.+)'
376 377 378 379 380 381 382 383

        # Distinguish user code and framework code using static_info_map
        static_info_map = {}
        for k, v in self.origin_info_map.items():
            origin_filepath = v.location.filepath
            origin_lineno = v.location.lineno
            static_info_map[(origin_filepath, origin_lineno)] = k

384 385 386 387
        for i in range(0, len(error_value_lines_strip), 2):
            if error_value_lines_strip[i].startswith("File "):
                re_result = re.search(pattern, error_value_lines_strip[i])
                tmp_filepath, lineno_str, function_name = re_result.groups()
388 389 390 391 392
                code = (
                    error_value_lines_strip[i + 1]
                    if i + 1 < len(error_value_lines_strip)
                    else ''
                )
393 394

                if static_info_map.get((tmp_filepath, int(lineno_str))):
395 396 397
                    user_code_traceback_index.append(len(error_traceback))

                error_traceback.append(
398 399
                    (tmp_filepath, int(lineno_str), function_name, code)
                )
400 401

        error_frame = []
402 403 404 405 406
        # Add user code traceback
        for i in user_code_traceback_index:
            filepath, lineno, funcname, code = error_traceback[i]
            if i == user_code_traceback_index[-1]:
                traceback_frame = TraceBackFrameRange(
407 408
                    Location(filepath, lineno), funcname
                )
409
            else:
410 411 412
                traceback_frame = TraceBackFrame(
                    Location(filepath, lineno), funcname, code
                )
413 414 415 416
            error_frame.append(traceback_frame.formated_message())
        error_frame.append("")

        # Add paddle traceback after user code traceback
417 418 419 420 421
        paddle_traceback_start_index = (
            user_code_traceback_index[-1] + 1
            if user_code_traceback_index
            else 0
        )
422
        for filepath, lineno, funcname, code in error_traceback[
423 424 425 426 427
            paddle_traceback_start_index:
        ]:
            traceback_frame = TraceBackFrame(
                Location(filepath, lineno), funcname, code
            )
428 429
            error_frame.append(traceback_frame.formated_message())
        error_frame.append("")
430

431
        error_frame.extend(bottom_error_message)
432
        error_frame.extend(revise_suggestion)
433
        error_value_str = '\n'.join(error_frame)
434
        self.error_value = self.error_type(error_value_str)
435 436 437 438 439 440 441

    def raise_new_exception(self):
        # Raises the origin error if disable dygraph2static error module,
        if int(os.getenv(DISABLE_ERROR_ENV_NAME, DEFAULT_DISABLE_NEW_ERROR)):
            raise

        new_exception = self.create_exception()
442 443 444 445 446 447 448 449
        # NOTE(liym27):
        # Why `raise new_exception from None`?
        #
        # In Python 3, by default, an new exception is raised with trace information of the caught exception.
        # This only raises new_exception and hides unwanted implementation details from tracebacks of the
        # caught exception.

        raise new_exception from None