error.py 15.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import os
16 17
import sys
import traceback
18
import linecache
19
import re
20
import numpy as np
21 22

from paddle.fluid.dygraph.dygraph_to_static.origin_info import Location, OriginInfo, global_origin_info_map
23
from paddle.fluid.dygraph.dygraph_to_static.utils import _is_api_in_module_helper, RE_PYMODULE
24 25 26

ERROR_DATA = "Error data about original source code information and traceback."

27 28 29 30 31 32 33 34
# A flag to set whether to open the dygraph2static error reporting module
SIMPLIFY_ERROR_ENV_NAME = "TRANSLATOR_SIMPLIFY_NEW_ERROR"
DEFAULT_SIMPLIFY_NEW_ERROR = 1

# A flag to set whether to display the simplified error stack
DISABLE_ERROR_ENV_NAME = "TRANSLATOR_DISABLE_NEW_ERROR"
DEFAULT_DISABLE_NEW_ERROR = 0

35 36 37
SOURCE_CODE_RANGE = 5
BLANK_COUNT_BEFORE_FILE_STR = 4

38

39
def attach_error_data(error, in_runtime=False):
40 41 42 43 44
    """
    Attachs error data about original source code information and traceback to an error.

    Args:
        error(Exception): An native error.
45
        in_runtime(bool): `error` is raised in runtime if in_runtime is True, otherwise in compile time
46 47 48
    Returns:
        An error attached data about original source code information and traceback.
    """
49

50 51 52 53
    e_type, e_value, e_traceback = sys.exc_info()
    tb = traceback.extract_tb(e_traceback)[1:]

    error_data = ErrorData(e_type, e_value, tb, global_origin_info_map)
54 55
    error_data.in_runtime = in_runtime

56 57 58 59 60 61 62 63 64 65 66 67 68 69
    setattr(error, ERROR_DATA, error_data)

    return error


class TraceBackFrame(OriginInfo):
    """
    Traceback frame information.
    """

    def __init__(self, location, function_name, source_code):
        self.location = location
        self.function_name = function_name
        self.source_code = source_code
70
        self.error_line = ''
71

72
    def formated_message(self):
73 74
        # self.source_code may be empty in some functions.
        # For example, decorator generated function
75
        return ' ' * BLANK_COUNT_BEFORE_FILE_STR + 'File "{}", line {}, in {}\n\t{}'.format(
76
            self.location.filepath, self.location.lineno, self.function_name,
77 78
            self.source_code.lstrip()
            if isinstance(self.source_code, str) else self.source_code)
79

80

81 82 83 84 85 86 87 88 89
class TraceBackFrameRange(OriginInfo):
    """
    Traceback frame information.
    """

    def __init__(self, location, function_name):
        self.location = location
        self.function_name = function_name
        self.source_code = []
90
        self.error_line = ''
91 92 93 94
        blank_count = []
        begin_lineno = max(1, self.location.lineno - int(SOURCE_CODE_RANGE / 2))

        for i in range(begin_lineno, begin_lineno + SOURCE_CODE_RANGE):
95 96
            line = linecache.getline(self.location.filepath, i).rstrip('\n')
            line_lstrip = line.lstrip()
97
            self.source_code.append(line_lstrip)
98 99 100 101
            if not line_lstrip:  # empty line from source code
                blank_count.append(-1)
            else:
                blank_count.append(len(line) - len(line_lstrip))
102 103

            if i == self.location.lineno:
104
                self.error_line = self.source_code[-1]
105 106 107 108
                hint_msg = '~' * len(self.source_code[-1]) + ' <--- HERE'
                self.source_code.append(hint_msg)
                blank_count.append(blank_count[-1])
        linecache.clearcache()
109 110 111 112 113 114 115 116 117
        # remove top and bottom empty line in source code
        while len(self.source_code) > 0 and not self.source_code[0]:
            self.source_code.pop(0)
            blank_count.pop(0)
        while len(self.source_code) > 0 and not self.source_code[-1]:
            self.source_code.pop(-1)
            blank_count.pop(-1)

        min_black_count = min([i for i in blank_count if i >= 0])
118
        for i in range(len(self.source_code)):
119 120
            # if source_code[i] is empty line between two code line, dont add blank
            if self.source_code[i]:
121 122 123
                self.source_code[i] = ' ' * (
                    blank_count[i] - min_black_count +
                    BLANK_COUNT_BEFORE_FILE_STR * 2) + self.source_code[i]
124 125 126 127 128

    def formated_message(self):
        msg = ' ' * BLANK_COUNT_BEFORE_FILE_STR + 'File "{}", line {}, in {}\n'.format(
            self.location.filepath, self.location.lineno, self.function_name)
        # add empty line after range code
129
        return msg + '\n'.join(self.source_code)
130 131


132
class SuggestionDict(object):
133

134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
    def __init__(self):
        # {(keywords): (suggestions)}
        self.suggestion_dict = {
            ('is not initialized.', 'Hint:', 'IsInitialized'):
            ("Please ensure all your sublayers are inheritted from nn.Layer.",
             "Please ensure there is no tensor created explicitly depended on external data, we suggest to register it as buffer tensor. See https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/04_dygraph_to_static/export_model/principle_cn.html#parameters-buffers for details"
             )
        }

    def keys(self):
        return self.suggestion_dict.keys()

    def __getitem__(self, key):
        return self.suggestion_dict[key]


150 151 152 153
class Dy2StKeyError(Exception):
    pass


154 155 156 157 158 159 160 161 162 163 164
class ErrorData(object):
    """
    Error data attached to an exception which is raised in un-transformed code.
    """

    def __init__(self, error_type, error_value, origin_traceback,
                 origin_info_map):
        self.error_type = error_type
        self.error_value = error_value
        self.origin_traceback = origin_traceback
        self.origin_info_map = origin_info_map
165
        self.in_runtime = False
166
        self.suggestion_dict = SuggestionDict()
167 168 169

    def create_exception(self):
        message = self.create_message()
170 171 172 173
        if self.error_type is KeyError:
            new_exception = Dy2StKeyError(message)
        else:
            new_exception = self.error_type(message)
174 175 176
        setattr(new_exception, ERROR_DATA, self)
        return new_exception

177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
    def numpy_api_check(self, format_exception, error_line):
        if self.error_type is not TypeError:
            return format_exception

        tb = self.origin_traceback
        func_str = None
        for frame in tb:
            searched_name = re.search(
                r'({module})*{name}'.format(module=RE_PYMODULE,
                                            name=frame.name), error_line)
            if searched_name:
                func_str = searched_name.group(0)
                break
        try:
            module_result = eval("_is_api_in_module_helper({}, '{}')".format(
                func_str, "numpy"))
            is_numpy_api_err = module_result or (func_str.startswith("numpy.")
                                                 or func_str.startswith("np."))
        except Exception:
            is_numpy_api_err = False

        if is_numpy_api_err and func_str:
            return [
                "TypeError: Code '{}' called numpy API {}, please use Paddle API to replace it."
                .format(error_line, func_str),
                "           values will be changed to variables by dy2static, numpy api can not handle variables"
            ]
        else:
            return format_exception

207 208 209 210 211 212 213
    def create_message(self):
        """
        Creates a custom error message which includes trace stack with source code information of dygraph from user.
        """
        message_lines = []

        # Step1: Adds header message to prompt users that the following is the original information.
214
        header_message = "In transformed code:"
215 216
        message_lines.append(header_message)
        message_lines.append("")
217
        error_line = None
218

219 220
        # Simplify error value to improve readability if error is raised in runtime
        if self.in_runtime:
221 222 223 224
            if int(
                    os.getenv(SIMPLIFY_ERROR_ENV_NAME,
                              DEFAULT_SIMPLIFY_NEW_ERROR)):
                self._simplify_error_value()
225 226 227
            message_lines.append(str(self.error_value))
            return '\n'.join(message_lines)

228
        # Step2: Optimizes stack information with source code information of dygraph from user.
229 230 231 232
        user_code_traceback_index = []
        for i, (filepath, lineno, funcname,
                code) in enumerate(self.origin_traceback):
            dygraph_func_info = self.origin_info_map.get((filepath, lineno),
233 234
                                                         None)
            if dygraph_func_info:
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
                user_code_traceback_index.append(i)

        # Add user code traceback
        for i in user_code_traceback_index:
            filepath, lineno, funcname, code = self.origin_traceback[i]
            dygraph_func_info = self.origin_info_map.get((filepath, lineno),
                                                         None)
            if i == user_code_traceback_index[-1]:
                traceback_frame = TraceBackFrameRange(
                    dygraph_func_info.location, dygraph_func_info.function_name)
            else:
                traceback_frame = TraceBackFrame(
                    dygraph_func_info.location, dygraph_func_info.function_name,
                    dygraph_func_info.source_code)

            message_lines.append(traceback_frame.formated_message())
251
            error_line = traceback_frame.error_line
252 253 254
        message_lines.append("")

        # Add paddle traceback after user code traceback
255
        paddle_traceback_start_index = user_code_traceback_index[
256 257
            -1] + 1 if user_code_traceback_index else 0
        for filepath, lineno, funcname, code in self.origin_traceback[
258
                paddle_traceback_start_index:]:
259 260
            traceback_frame = TraceBackFrame(Location(filepath, lineno),
                                             funcname, code)
261 262
            message_lines.append(traceback_frame.formated_message())
        message_lines.append("")
263 264

        # Step3: Adds error message like "TypeError: dtype must be int32, but received float32".
265 266
        # NOTE: `format_exception` is a list, its length is 1 in most cases, but sometimes its length
        # is gather than 1, for example, the error_type is IndentationError.
267 268
        format_exception = traceback.format_exception_only(
            self.error_type, self.error_value)
269 270 271 272
        if error_line is not None:
            format_exception = self.numpy_api_check(format_exception,
                                                    error_line)

273 274 275 276
        error_message = [
            " " * BLANK_COUNT_BEFORE_FILE_STR + line
            for line in format_exception
        ]
277
        message_lines.extend(error_message)
278 279

        return '\n'.join(message_lines)
280

281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
    def _create_revise_suggestion(self, bottom_error_message):
        revise_suggestions = [
            '', ' ' * BLANK_COUNT_BEFORE_FILE_STR + 'Revise suggestion: '
        ]
        for keywords in self.suggestion_dict.keys():
            contain_keywords = [
                True for i in keywords if i in ''.join(bottom_error_message)
            ]
            if len(contain_keywords) == len(
                    keywords):  # all keywords should be in bottom_error_message
                for suggestion in self.suggestion_dict[keywords]:
                    suggestion_msg = ' ' * BLANK_COUNT_BEFORE_FILE_STR * 2 + '{}. {}'.format(
                        str(len(revise_suggestions) - 1), suggestion)
                    revise_suggestions.append(suggestion_msg)
        return revise_suggestions if len(revise_suggestions) > 2 else []

297 298 299 300 301 302 303 304 305 306
    def _simplify_error_value(self):
        """
        Simplifies error value to improve readability if error is raised in runtime.

        NOTE(liym27): The op callstack information about transformed static code has been replaced with original dygraph code.

        TODO(liym27):
            1. Need a more robust way because the code of start_trace may change.
            2. Set the switch to determine whether to simplify error_value
        """
307

308 309 310 311 312 313 314
        assert self.in_runtime is True

        error_value_lines = str(self.error_value).split("\n")
        error_value_lines_strip = [mes.lstrip(" ") for mes in error_value_lines]

        start_trace = "outputs = static_func(*inputs)"
        start_idx = error_value_lines_strip.index(start_trace)
315

316
        error_value_lines = error_value_lines[start_idx + 1:]
317 318 319 320 321
        error_value_lines_strip = error_value_lines_strip[start_idx + 1:]

        # use empty line to locate the bottom_error_message
        empty_line_idx = error_value_lines_strip.index('')
        bottom_error_message = error_value_lines[empty_line_idx + 1:]
322
        revise_suggestion = self._create_revise_suggestion(bottom_error_message)
323

324 325
        error_traceback = []
        user_code_traceback_index = []
326
        pattern = 'File "(?P<filepath>.+)", line (?P<lineno>.+), in (?P<function_name>.+)'
327 328 329 330 331 332 333 334

        # Distinguish user code and framework code using static_info_map
        static_info_map = {}
        for k, v in self.origin_info_map.items():
            origin_filepath = v.location.filepath
            origin_lineno = v.location.lineno
            static_info_map[(origin_filepath, origin_lineno)] = k

335 336 337 338
        for i in range(0, len(error_value_lines_strip), 2):
            if error_value_lines_strip[i].startswith("File "):
                re_result = re.search(pattern, error_value_lines_strip[i])
                tmp_filepath, lineno_str, function_name = re_result.groups()
339 340
                code = error_value_lines_strip[
                    i + 1] if i + 1 < len(error_value_lines_strip) else ''
341 342

                if static_info_map.get((tmp_filepath, int(lineno_str))):
343 344 345 346
                    user_code_traceback_index.append(len(error_traceback))

                error_traceback.append(
                    (tmp_filepath, int(lineno_str), function_name, code))
347 348

        error_frame = []
349 350 351 352 353 354
        # Add user code traceback
        for i in user_code_traceback_index:
            filepath, lineno, funcname, code = error_traceback[i]
            if i == user_code_traceback_index[-1]:
                traceback_frame = TraceBackFrameRange(
                    Location(filepath, lineno), funcname)
355
            else:
356 357
                traceback_frame = TraceBackFrame(Location(filepath, lineno),
                                                 funcname, code)
358 359 360 361
            error_frame.append(traceback_frame.formated_message())
        error_frame.append("")

        # Add paddle traceback after user code traceback
362
        paddle_traceback_start_index = user_code_traceback_index[
363 364
            -1] + 1 if user_code_traceback_index else 0
        for filepath, lineno, funcname, code in error_traceback[
365
                paddle_traceback_start_index:]:
366 367
            traceback_frame = TraceBackFrame(Location(filepath, lineno),
                                             funcname, code)
368 369
            error_frame.append(traceback_frame.formated_message())
        error_frame.append("")
370

371
        error_frame.extend(bottom_error_message)
372
        error_frame.extend(revise_suggestion)
373
        error_value_str = '\n'.join(error_frame)
374
        self.error_value = self.error_type(error_value_str)
375 376 377 378 379 380 381

    def raise_new_exception(self):
        # Raises the origin error if disable dygraph2static error module,
        if int(os.getenv(DISABLE_ERROR_ENV_NAME, DEFAULT_DISABLE_NEW_ERROR)):
            raise

        new_exception = self.create_exception()
382 383 384 385 386 387 388 389
        # NOTE(liym27):
        # Why `raise new_exception from None`?
        #
        # In Python 3, by default, an new exception is raised with trace information of the caught exception.
        # This only raises new_exception and hides unwanted implementation details from tracebacks of the
        # caught exception.

        raise new_exception from None