core.py 14.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import site
16 17
import sys
import os
18 19
import warnings
import platform
20

21 22 23 24
has_paddle_dy_lib = False

dy_lib_name = 'libpaddle'
dy_lib_suffix = 'so'
25
if os.name == 'nt':
26
    dy_lib_suffix = 'pyd'
27 28

current_path = os.path.abspath(os.path.dirname(__file__))
29 30
if os.path.exists(current_path + os.sep + dy_lib_name + '.' + dy_lib_suffix):
    has_paddle_dy_lib = True
31

32 33
try:
    if os.name == 'nt':
34
        third_lib_path = current_path + os.sep + '..' + os.sep + 'libs'
35
        # Will load shared library from 'path' on windows
36 37 38
        os.environ['path'] = (
            current_path + ';' + third_lib_path + ';' + os.environ['path']
        )
39
        sys.path.insert(0, third_lib_path)
40 41 42 43 44
        # Note: from python3.8, PATH will not take effect
        # https://github.com/python/cpython/pull/12302
        # Use add_dll_directory to specify dll resolution path
        if sys.version_info[:2] >= (3, 8):
            os.add_dll_directory(third_lib_path)
45 46 47 48

except ImportError as e:
    if os.name == 'nt':
        executable_path = os.path.abspath(os.path.dirname(sys.executable))
49 50
        raise ImportError(
            """NOTE: You may need to run \"set PATH=%s;%%PATH%%\"
51 52
        if you encounters \"DLL load failed\" errors. If you have python
        installed in other directory, replace \"%s\" with your own
53 54 55
        directory. The original error is: \n %s"""
            % (executable_path, executable_path, str(e))
        )
56 57 58 59 60
    else:
        raise ImportError(
            """NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\"
        if you encounters \"libmkldnn.so not found\" errors. If you have python
        installed in other directory, replace \"/usr/local/lib\" with your own
61 62 63
        directory. The original error is: \n"""
            + str(e)
        )
64 65 66
except Exception as e:
    raise e

T
tensor-tang 已提交
67

T
tensor-tang 已提交
68 69 70 71 72 73 74 75
def avx_supported():
    """
    Whether current system(Linux, MacOS, Windows) is supported with AVX.
    """
    sysstr = platform.system().lower()
    has_avx = False
    if sysstr == 'linux':
        try:
Z
zlsh80826 已提交
76 77 78
            pipe = os.popen('cat /proc/cpuinfo | grep -i avx')
            has_avx = pipe.read() != ''
            pipe.close()
T
tensor-tang 已提交
79
        except Exception as e:
80 81 82 83
            sys.stderr.write(
                'Can not get the AVX flag from /proc/cpuinfo.\n'
                'The original error is: %s\n' % str(e)
            )
T
tensor-tang 已提交
84 85
        return has_avx
    elif sysstr == 'darwin':
T
tensor-tang 已提交
86
        try:
Z
zlsh80826 已提交
87 88 89
            pipe = os.popen('sysctl machdep.cpu.features | grep -i avx')
            has_avx = pipe.read() != ''
            pipe.close()
T
tensor-tang 已提交
90 91
        except Exception as e:
            sys.stderr.write(
T
tensor-tang 已提交
92
                'Can not get the AVX flag from machdep.cpu.features.\n'
93 94
                'The original error is: %s\n' % str(e)
            )
T
tensor-tang 已提交
95
        if not has_avx:
96
            import subprocess
97

98 99 100 101
            pipe = subprocess.Popen(
                'sysctl machdep.cpu.leaf7_features | grep -i avx',
                shell=True,
                stdout=subprocess.PIPE,
102 103
                stderr=subprocess.PIPE,
            )
104 105
            _ = pipe.communicate()
            has_avx = True if pipe.returncode == 0 else False
T
tensor-tang 已提交
106 107 108
        return has_avx
    elif sysstr == 'windows':
        import ctypes
109

T
tensor-tang 已提交
110
        ONE_PAGE = ctypes.c_size_t(0x1000)
T
tensor-tang 已提交
111

T
tensor-tang 已提交
112 113 114 115 116 117 118
        def asm_func(code_str, restype=ctypes.c_uint32, argtypes=()):
            # Call the code_str as a function
            # Alloc 1 page to ensure the protection
            pfnVirtualAlloc = ctypes.windll.kernel32.VirtualAlloc
            pfnVirtualAlloc.restype = ctypes.c_void_p
            MEM_COMMIT = ctypes.c_ulong(0x1000)
            PAGE_READWRITE = ctypes.c_ulong(0x4)
119 120 121
            address = pfnVirtualAlloc(
                None, ONE_PAGE, MEM_COMMIT, PAGE_READWRITE
            )
T
tensor-tang 已提交
122 123 124 125
            if not address:
                raise Exception("Failed to VirtualAlloc")

            # Copy the code into the memory segment
126 127 128 129 130 131
            memmove = ctypes.CFUNCTYPE(
                ctypes.c_void_p,
                ctypes.c_void_p,
                ctypes.c_void_p,
                ctypes.c_size_t,
            )(ctypes._memmove_addr)
T
tensor-tang 已提交
132 133 134 135 136 137
            if memmove(address, code_str, len(code_str)) < 0:
                raise Exception("Failed to memmove")

            # Enable execute permissions
            PAGE_EXECUTE = ctypes.c_ulong(0x10)
            pfnVirtualProtect = ctypes.windll.kernel32.VirtualProtect
138 139 140 141 142 143
            res = pfnVirtualProtect(
                ctypes.c_void_p(address),
                ONE_PAGE,
                PAGE_EXECUTE,
                ctypes.byref(ctypes.c_ulong(0)),
            )
T
tensor-tang 已提交
144 145 146 147 148 149 150 151
            if not res:
                raise Exception("Failed VirtualProtect")

            # Flush instruction cache
            pfnGetCurrentProcess = ctypes.windll.kernel32.GetCurrentProcess
            pfnGetCurrentProcess.restype = ctypes.c_void_p
            prochandle = ctypes.c_void_p(pfnGetCurrentProcess())
            res = ctypes.windll.kernel32.FlushInstructionCache(
152 153
                prochandle, ctypes.c_void_p(address), ONE_PAGE
            )
T
tensor-tang 已提交
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
            if not res:
                raise Exception("Failed FlushInstructionCache")

            # Cast the memory to function
            functype = ctypes.CFUNCTYPE(restype, *argtypes)
            func = functype(address)
            return func, address

        # http://en.wikipedia.org/wiki/CPUID#EAX.3D1:_Processor_Info_and_Feature_Bits
        # mov eax,0x1; cpuid; mov cx, ax; ret
        code_str = b"\xB8\x01\x00\x00\x00\x0f\xa2\x89\xC8\xC3"
        avx_bit = 28
        retval = 0
        try:
            # Convert the code_str into a function that returns uint
            func, address = asm_func(code_str)
            retval = func()
171 172 173
            ctypes.windll.kernel32.VirtualFree(
                ctypes.c_void_p(address), ctypes.c_size_t(0), ONE_PAGE
            )
T
tensor-tang 已提交
174
        except Exception as e:
175 176 177 178
            sys.stderr.write(
                'Failed getting the AVX flag on Windows.\n'
                'The original error is: %s\n' % str(e)
            )
T
tensor-tang 已提交
179 180 181 182 183 184
        return (retval & (1 << avx_bit)) > 0
    else:
        sys.stderr.write('Do not get AVX flag on %s\n' % sysstr)
        return False


185 186
def run_shell_command(cmd):
    import subprocess
187 188 189 190

    out, err = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
    ).communicate()
191 192 193
    if err:
        return None
    else:
194
        return out.decode('utf-8').strip()
195 196 197 198


def get_dso_path(core_so, dso_name):
    if core_so and dso_name:
199 200 201
        return run_shell_command(
            "ldd %s|grep %s|awk '{print $3}'" % (core_so, dso_name)
        )
202 203 204 205 206 207 208 209
    else:
        return None


def load_dso(dso_absolute_path):
    if dso_absolute_path:
        try:
            from ctypes import cdll
210

211 212 213 214 215 216
            cdll.LoadLibrary(dso_absolute_path)
        except:
            warnings.warn("Load {} failed".format(dso_absolute_path))


def pre_load(dso_name):
217 218
    if has_paddle_dy_lib:
        core_so = current_path + os.sep + dy_lib_name + '.' + dy_lib_suffix
219 220 221 222 223 224
    else:
        core_so = None
    dso_path = get_dso_path(core_so, dso_name)
    load_dso(dso_path)


chen.zhiyu's avatar
chen.zhiyu 已提交
225 226 227 228 229 230 231 232 233
def get_libc_ver():
    ldd_glibc = run_shell_command("ldd --version | awk '/ldd/{print $NF}'")
    if ldd_glibc is not None:
        return ("glibc", ldd_glibc)

    ldd_musl = run_shell_command("ldd 2>&1 | awk '/Version/{print $NF}'")
    if ldd_musl is not None:
        return ("musl", ldd_musl)
    return (None, None)
234 235 236


def less_than_ver(a, b):
237 238 239
    if a is None or b is None:
        return False

240 241 242 243
    import re
    import operator

    def to_list(s):
244
        s = re.sub(r'(\.0+)+$', '', s)
245 246 247 248 249
        return [int(x) for x in s.split('.')]

    return operator.lt(to_list(a), to_list(b))


250
# NOTE(zhiqiu): An error may occurs when import paddle in linux platform with glibc < 2.22,
251 252 253 254 255
# the error message of which is "dlopen: cannot load any more object with static TLS".
# This happens when:
# (1) the number of dynamic shared librarys (DSO) loaded > 14,
# (2) after that, load a dynamic shared library (DSO) with static TLS.
# For paddle, the problem is that 'libgomp' is a DSO with static TLS, and it is loaded after 14 DSOs.
256
# So, here is a tricky way to solve the problem by pre load 'libgomp' before 'libpaddle.so'.
257
# The final solution is to upgrade glibc to > 2.22 on the target system.
chen.zhiyu's avatar
chen.zhiyu 已提交
258 259 260 261 262 263
if platform.system().lower() == 'linux':
    libc_type, libc_ver = get_libc_ver()
    if libc_type == 'glibc' and less_than_ver(libc_ver, '2.23'):
        try:
            pre_load('libgomp')
        except Exception as e:
264
            # NOTE(zhiqiu): do not abort if failed, since it may success when import libpaddle.so
chen.zhiyu's avatar
chen.zhiyu 已提交
265
            sys.stderr.write('Error: Can not preload libgomp.so')
266

267 268
try:
    from . import libpaddle
269

270 271 272 273 274
    if avx_supported() and not libpaddle.is_compiled_with_avx():
        sys.stderr.write(
            "Hint: Your machine support AVX, but the installed paddlepaddle doesn't have avx core. "
            "Hence, no-avx core with worse preformance will be imported.\nIf you like, you could "
            "reinstall paddlepaddle by 'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' "
275 276
            "to get better performance.\n"
        )
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302

    # assign tensor alias
    libpaddle.LoDTensor = libpaddle.Tensor

    from .libpaddle import *
    from .libpaddle import __doc__, __file__, __name__, __package__
    from .libpaddle import __unittest_throw_exception__
    from .libpaddle import _append_python_callable_object_and_return_id
    from .libpaddle import _cleanup, _Scope
    from .libpaddle import _get_use_default_grad_op_desc_maker_ops
    from .libpaddle import _get_all_register_op_kernels
    from .libpaddle import _is_program_version_supported
    from .libpaddle import _set_eager_deletion_mode
    from .libpaddle import _get_eager_deletion_vars
    from .libpaddle import _set_fuse_parameter_group_size
    from .libpaddle import _set_fuse_parameter_memory_size
    from .libpaddle import _is_dygraph_debug_enabled
    from .libpaddle import _dygraph_debug_level
    from .libpaddle import _switch_tracer
    from .libpaddle import _set_paddle_lib_path
    from .libpaddle import _create_loaded_parameter
    from .libpaddle import _cuda_synchronize
    from .libpaddle import _is_compiled_with_heterps
    from .libpaddle import _promote_types_if_complex_exists
    from .libpaddle import _set_cached_executor_build_strategy
    from .libpaddle import _device_synchronize
J
james 已提交
303
    from .libpaddle import _xpu_device_synchronize
304 305 306
    from .libpaddle import _get_current_stream
    from .libpaddle import _Profiler, _ProfilerResult, _RecordEvent
    from .libpaddle import _set_current_stream
307
    from .libpaddle import _get_phi_kernel_name
J
Jiabin Yang 已提交
308 309
    from .libpaddle import set_prim_enabled
    from .libpaddle import is_prim_enabled
310

311 312 313 314 315 316 317 318 319 320
    if sys.platform != 'win32':
        from .libpaddle import _set_process_pids
        from .libpaddle import _erase_process_pids
        from .libpaddle import _set_process_signal_handler
        from .libpaddle import _throw_error_if_process_failed
        from .libpaddle import _convert_to_tensor_list
        from .libpaddle import _array_to_share_memory_tensor
        from .libpaddle import _cleanup_mmap_fds
        from .libpaddle import _remove_tensor_list_mmap_fds
except Exception as e:
321
    if has_paddle_dy_lib:
322
        sys.stderr.write(
323 324 325 326 327 328 329
            'Error: Can not import paddle core while this file exists: '
            + current_path
            + os.sep
            + 'libpaddle.'
            + dy_lib_suffix
            + '\n'
        )
330 331 332
    if not avx_supported() and libpaddle.is_compiled_with_avx():
        sys.stderr.write(
            "Error: Your machine doesn't support AVX, but the installed PaddlePaddle is avx core, "
333 334
            "you should reinstall paddlepaddle with no-avx core.\n"
        )
335
    raise e
336 337


338 339 340 341 342 343 344 345 346 347 348
def set_paddle_custom_device_lib_path(lib_path):
    if os.environ.get('CUSTOM_DEVICE_ROOT', None) is not None:
        # use setted environment value
        return
    if os.path.exists(lib_path):
        # set CUSTOM_DEVICE_ROOT default path
        os.environ['CUSTOM_DEVICE_ROOT'] = os.path.normpath(lib_path)
    else:
        os.environ['CUSTOM_DEVICE_ROOT'] = ''


349 350
# set paddle lib path
def set_paddle_lib_path():
351 352 353 354 355
    site_dirs = (
        site.getsitepackages()
        if hasattr(site, 'getsitepackages')
        else [x for x in sys.path if 'site-packages' in x]
    )
356 357 358 359
    for site_dir in site_dirs:
        lib_dir = os.path.sep.join([site_dir, 'paddle', 'libs'])
        if os.path.exists(lib_dir):
            _set_paddle_lib_path(lib_dir)
360
            set_paddle_custom_device_lib_path(
361 362
                os.path.sep.join([lib_dir, '..', '..', 'paddle-plugins'])
            )
363 364 365 366 367
            return
    if hasattr(site, 'USER_SITE'):
        lib_dir = os.path.sep.join([site.USER_SITE, 'paddle', 'libs'])
        if os.path.exists(lib_dir):
            _set_paddle_lib_path(lib_dir)
368
            set_paddle_custom_device_lib_path(
369 370
                os.path.sep.join([lib_dir, '..', '..', 'paddle-plugins'])
            )
371 372 373


set_paddle_lib_path()
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407


def set_prim_forward(value):
    """set flag FLAGS_prim_forward."""
    flag = str(value)
    if flag.lower() not in ["true", "false", "debug"]:
        raise TypeError(f"flag {flag} should be string of bool or 'debug'.")
    os.environ["FLAGS_prim_forward"] = flag
    return


def enable_prim_forward():
    flag = os.getenv("FLAGS_prim_forward", "true").lower()
    if flag == "false":
        return False
    if flag == "debug":
        return "debug"
    return True


def set_prim_backward(value):
    """set flag FLAGS_prim_backward,"""
    flag = str(value)
    if flag.lower() not in ["true", "false"]:
        raise TypeError(f"flag {flag} should be bool or string of bool.")
    os.environ["FLAGS_prim_backward"] = flag
    return


def enable_prim_backward():
    flag = os.getenv("FLAGS_prim_backward", "true")
    if flag.lower() == "false":
        return False
    return True