未验证 提交 fadabbe9 编写于 作者: A Aurelius84 提交者: GitHub

[CustomOp] Automatically specify PADDLE_WITH_MKLDNN & Remove Interpreter argument (#31391)

* auto specify PADDLE_WITH_MKLDNN and remove Interpretper

* remove print

* fix check abi

* fix windows

* fix compile flags
上级 ffdd5b77
...@@ -96,14 +96,6 @@ class TestCheckCompiler(TestABIBase): ...@@ -96,14 +96,6 @@ class TestCheckCompiler(TestABIBase):
utils._expected_compiler_current_platform = raw_func utils._expected_compiler_current_platform = raw_func
class TestJITCompilerException(unittest.TestCase):
def test_exception(self):
with self.assertRaisesRegexp(RuntimeError,
"Failed to check Python interpreter"):
file_path = os.path.abspath(__file__)
utils._jit_compile(file_path, interpreter='fake_cmd', verbose=True)
class TestRunCMDException(unittest.TestCase): class TestRunCMDException(unittest.TestCase):
def test_exception(self): def test_exception(self):
for verbose in [True, False]: for verbose in [True, False]:
......
...@@ -18,7 +18,7 @@ import numpy as np ...@@ -18,7 +18,7 @@ import numpy as np
import paddle import paddle
from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension import load, get_build_directory
from utils import paddle_includes, extra_compile_args from utils import paddle_includes, extra_cc_args, extra_nvcc_args
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
# Because Windows don't use docker, the shared lib already exists in the # Because Windows don't use docker, the shared lib already exists in the
...@@ -34,7 +34,8 @@ custom_attrs = load( ...@@ -34,7 +34,8 @@ custom_attrs = load(
name='custom_attrs_jit', name='custom_attrs_jit',
sources=['attr_test_op.cc'], sources=['attr_test_op.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_compile_args, # add for Coverage CI extra_cxx_cflags=extra_cc_args, # test for cflags
extra_cuda_cflags=extra_nvcc_args, # test for cflags
verbose=True) verbose=True)
......
...@@ -21,7 +21,7 @@ from paddle import nn ...@@ -21,7 +21,7 @@ from paddle import nn
from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_compile_args from utils import paddle_includes, extra_cc_args, extra_nvcc_args
# Because Windows don't use docker, the shared lib already exists in the # Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed. # cache dir, it will not be compiled again unless the shared lib is removed.
...@@ -39,8 +39,8 @@ custom_module = load( ...@@ -39,8 +39,8 @@ custom_module = load(
name='custom_relu_for_model_jit', name='custom_relu_for_model_jit',
sources=['custom_relu_op.cc', 'custom_relu_op.cu'], sources=['custom_relu_op.cc', 'custom_relu_op.cu'],
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_compile_args, # add for Coverage CI extra_cxx_cflags=extra_cc_args, # test for cc flags
extra_cuda_cflags=extra_compile_args, # add for Coverage CI extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags
verbose=True) verbose=True)
...@@ -212,10 +212,6 @@ class TestStaticModel(unittest.TestCase): ...@@ -212,10 +212,6 @@ class TestStaticModel(unittest.TestCase):
device, use_custom_op=False, use_pe=True) device, use_custom_op=False, use_pe=True)
custom_relu_train_pe_out = self.train_model( custom_relu_train_pe_out = self.train_model(
device, use_custom_op=True, use_pe=True) device, use_custom_op=True, use_pe=True)
print(original_relu_train_out)
print(custom_relu_train_out)
print(original_relu_train_pe_out)
print(custom_relu_train_pe_out)
self.assertTrue( self.assertTrue(
np.array_equal(original_relu_train_out, custom_relu_train_out)) np.array_equal(original_relu_train_out, custom_relu_train_out))
...@@ -232,10 +228,6 @@ class TestStaticModel(unittest.TestCase): ...@@ -232,10 +228,6 @@ class TestStaticModel(unittest.TestCase):
device, use_custom_op=False, use_pe=True) device, use_custom_op=False, use_pe=True)
custom_relu_eval_pe_out = self.eval_model( custom_relu_eval_pe_out = self.eval_model(
device, use_custom_op=True, use_pe=True) device, use_custom_op=True, use_pe=True)
print(original_relu_eval_out)
print(custom_relu_eval_out)
print(original_relu_eval_pe_out)
print(custom_relu_eval_pe_out)
self.assertTrue( self.assertTrue(
np.array_equal(original_relu_eval_out, custom_relu_eval_out)) np.array_equal(original_relu_eval_out, custom_relu_eval_out))
......
...@@ -19,7 +19,7 @@ import paddle ...@@ -19,7 +19,7 @@ import paddle
import numpy as np import numpy as np
from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_compile_args, IS_WINDOWS from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_WINDOWS
from test_custom_relu_op_setup import custom_relu_dynamic, custom_relu_static from test_custom_relu_op_setup import custom_relu_dynamic, custom_relu_static
# Because Windows don't use docker, the shared lib already exists in the # Because Windows don't use docker, the shared lib already exists in the
...@@ -40,8 +40,8 @@ custom_module = load( ...@@ -40,8 +40,8 @@ custom_module = load(
'custom_relu_op.cc', 'custom_relu_op.cu', 'custom_relu_op_dup.cc' 'custom_relu_op.cc', 'custom_relu_op.cu', 'custom_relu_op_dup.cc'
], ],
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_compile_args, # add for Coverage CI extra_cxx_cflags=extra_cc_args, # test for cc flags
extra_cuda_cflags=extra_compile_args, # add for Coverage CI extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags
verbose=True) verbose=True)
......
...@@ -17,7 +17,7 @@ import unittest ...@@ -17,7 +17,7 @@ import unittest
import paddle import paddle
import numpy as np import numpy as np
from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension import load, get_build_directory
from utils import paddle_includes, extra_compile_args from utils import paddle_includes, extra_cc_args
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
# Because Windows don't use docker, the shared lib already exists in the # Because Windows don't use docker, the shared lib already exists in the
...@@ -31,7 +31,7 @@ dispatch_op = load( ...@@ -31,7 +31,7 @@ dispatch_op = load(
name='dispatch_op', name='dispatch_op',
sources=['dispatch_test_op.cc'], sources=['dispatch_test_op.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_compile_args, extra_cxx_cflags=extra_cc_args,
verbose=True) verbose=True)
......
...@@ -17,7 +17,7 @@ import unittest ...@@ -17,7 +17,7 @@ import unittest
import paddle import paddle
import numpy as np import numpy as np
from paddle.utils.cpp_extension import load from paddle.utils.cpp_extension import load
from utils import paddle_includes, extra_compile_args from utils import paddle_includes, extra_cc_args, extra_nvcc_args
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
# switch to old custom op method # switch to old custom op method
...@@ -27,10 +27,9 @@ use_new_custom_op_load_method(False) ...@@ -27,10 +27,9 @@ use_new_custom_op_load_method(False)
custom_module = load( custom_module = load(
name='custom_relu2', name='custom_relu2',
sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc', 'relu_op3.cu'], sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc', 'relu_op3.cu'],
interpreter='python', # add for unittest
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_compile_args, # add for Coverage CI, extra_cxx_cflags=extra_cc_args, # test for cc flags
extra_cuda_cflags=extra_compile_args, # add for split cpp/cuda flags extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags
verbose=True # add for unittest verbose=True # add for unittest
) )
......
...@@ -21,7 +21,7 @@ import paddle ...@@ -21,7 +21,7 @@ import paddle
from paddle.utils.cpp_extension import load from paddle.utils.cpp_extension import load
from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_compile_args from utils import paddle_includes, extra_cc_args
# Because Windows don't use docker, the shared lib already exists in the # Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed. # cache dir, it will not be compiled again unless the shared lib is removed.
...@@ -35,7 +35,7 @@ multi_out_module = load( ...@@ -35,7 +35,7 @@ multi_out_module = load(
name='multi_out_jit', name='multi_out_jit',
sources=['multi_out_test_op.cc'], sources=['multi_out_test_op.cc'],
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_compile_args, # add for Coverage CI extra_cxx_cflags=extra_cc_args, # test for cflags
verbose=True) verbose=True)
......
...@@ -27,7 +27,7 @@ paddle_includes = [ ...@@ -27,7 +27,7 @@ paddle_includes = [
os.path.join(site_packages_path, 'paddle', 'include', 'third_party') os.path.join(site_packages_path, 'paddle', 'include', 'third_party')
] ]
# TODO(Aurelius84): Memory layout is different if build paddle with PADDLE_WITH_MKLDNN=ON, # Test for extra compile args
# and will lead to ABI problem on Coverage CI. We will handle it in next PR. extra_cc_args = ['-w', '-g'] if not IS_WINDOWS else ['/w']
extra_compile_args = ['-DPADDLE_WITH_MKLDNN' extra_nvcc_args = ['-O3']
] if six.PY2 and not IS_WINDOWS else [] extra_compile_args = {'cc': extra_cc_args, 'nvcc': extra_nvcc_args}
...@@ -682,7 +682,6 @@ def load(name, ...@@ -682,7 +682,6 @@ def load(name,
extra_ldflags=None, extra_ldflags=None,
extra_include_paths=None, extra_include_paths=None,
build_directory=None, build_directory=None,
interpreter=None,
verbose=False): verbose=False):
""" """
An Interface to automatically compile C++/CUDA source files Just-In-Time An Interface to automatically compile C++/CUDA source files Just-In-Time
...@@ -731,10 +730,9 @@ def load(name, ...@@ -731,10 +730,9 @@ def load(name,
custom_op_module = load( custom_op_module = load(
name="op_shared_libary_name", # name of shared library name="op_shared_libary_name", # name of shared library
sources=['relu_op.cc', 'relu_op.cu'], # source files of cusomized op sources=['relu_op.cc', 'relu_op.cu'], # source files of cusomized op
extra_cxx_cflags=['-DPADDLE_WITH_MKLDNN'], # need to specify the flag if pre-installed Paddle supports MKLDNN extra_cxx_cflags=['-g', '-w'], # optional, specify extra flags to compile .cc/.cpp file
extra_cuda_cflags=['-DPADDLE_WITH_MKLDNN'], # need to specify the flag if pre-installed Paddle supports MKLDNN extra_cuda_cflags=['-O2'], # optional, specify extra flags to compile .cu file
interpreter='python3.7', # optional, specify another python interpreter verbose=True # optional, specify to output log information
verbose=True # output log information
) )
x = paddle.randn([4, 10], dtype='float32') x = paddle.randn([4, 10], dtype='float32')
...@@ -747,11 +745,9 @@ def load(name, ...@@ -747,11 +745,9 @@ def load(name,
and ``.cu`` for CUDA file. and ``.cu`` for CUDA file.
extra_cxx_cflags(list[str], optional): Specify additional flags used to compile CPP files. By default extra_cxx_cflags(list[str], optional): Specify additional flags used to compile CPP files. By default
all basic and framework related flags have been included. all basic and framework related flags have been included.
If your pre-insall Paddle supported MKLDNN, please add
``-DPADDLE_WITH_MKLDNN`` . Default is None.
extra_cuda_cflags(list[str], optional): Specify additional flags used to compile CUDA files. By default extra_cuda_cflags(list[str], optional): Specify additional flags used to compile CUDA files. By default
all basic and framework related flags have been included. If your pre-insall Paddle supported MKLDNN, all basic and framework related flags have been included.
please add ``-DPADDLE_WITH_MKLDNN`` . Default None. See `Cuda Compiler Driver NVCC <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html>`_ See `Cuda Compiler Driver NVCC <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html>`_
for details. Default is None. for details. Default is None.
extra_ldflags(list[str], optional): Specify additional flags used to link shared library. See extra_ldflags(list[str], optional): Specify additional flags used to link shared library. See
`GCC Link Options <https://gcc.gnu.org/onlinedocs/gcc/Link-Options.html>`_ for details. `GCC Link Options <https://gcc.gnu.org/onlinedocs/gcc/Link-Options.html>`_ for details.
...@@ -762,10 +758,6 @@ def load(name, ...@@ -762,10 +758,6 @@ def load(name,
build_directory(str, optional): Specify root directory path to put shared library file. If set None, build_directory(str, optional): Specify root directory path to put shared library file. If set None,
it will use ``PADDLE_EXTENSION_DIR`` from os.environ. Use it will use ``PADDLE_EXTENSION_DIR`` from os.environ. Use
``paddle.utils.cpp_extension.get_build_directory()`` to see the location. Default is None. ``paddle.utils.cpp_extension.get_build_directory()`` to see the location. Default is None.
interpreter(str, optional): Specify nterpreter path, supporting alias and full path.
If set None, it will use `python` as default interpreter. If local environment contains
more than one python interpreters and want to use new interpreter to apply compilation,
please specify this parameter, such as ``python3.7`` . Default is None.
verbose(bool, optional): whether to verbose compiled log information. Default is False verbose(bool, optional): whether to verbose compiled log information. Default is False
Returns: Returns:
...@@ -806,7 +798,7 @@ def load(name, ...@@ -806,7 +798,7 @@ def load(name,
_write_setup_file(name, sources, file_path, build_base_dir, _write_setup_file(name, sources, file_path, build_base_dir,
extra_include_paths, extra_cxx_cflags, extra_cuda_cflags, extra_include_paths, extra_cxx_cflags, extra_cuda_cflags,
extra_ldflags, verbose) extra_ldflags, verbose)
_jit_compile(file_path, interpreter, verbose) _jit_compile(file_path, verbose)
# import as callable python api # import as callable python api
custom_op_api = _import_module_from_library(name, build_base_dir, verbose) custom_op_api = _import_module_from_library(name, build_base_dir, verbose)
......
...@@ -48,7 +48,7 @@ MSVC_COMPILE_FLAGS = [ ...@@ -48,7 +48,7 @@ MSVC_COMPILE_FLAGS = [
MSVC_LINK_FLAGS = ['/MACHINE:X64', 'paddle_custom_op.lib'] MSVC_LINK_FLAGS = ['/MACHINE:X64', 'paddle_custom_op.lib']
COMMON_NVCC_FLAGS = ['-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU', '-O3'] COMMON_NVCC_FLAGS = ['-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU']
GCC_MINI_VERSION = (5, 4, 0) GCC_MINI_VERSION = (5, 4, 0)
MSVC_MINI_VERSION = (19, 0, 24215) MSVC_MINI_VERSION = (19, 0, 24215)
...@@ -327,7 +327,7 @@ def prepare_unix_cudaflags(cflags): ...@@ -327,7 +327,7 @@ def prepare_unix_cudaflags(cflags):
Prepare all necessary compiled flags for nvcc compiling CUDA files. Prepare all necessary compiled flags for nvcc compiling CUDA files.
""" """
cflags = COMMON_NVCC_FLAGS + [ cflags = COMMON_NVCC_FLAGS + [
'-ccbin', 'cc', '-Xcompiler', '-fPIC', '-w', '--expt-relaxed-constexpr', '-ccbin', 'cc', '-Xcompiler', '-fPIC', '--expt-relaxed-constexpr',
'-DNVCC' '-DNVCC'
] + cflags + get_cuda_arch_flags(cflags) ] + cflags + get_cuda_arch_flags(cflags)
...@@ -398,8 +398,11 @@ def normalize_extension_kwargs(kwargs, use_cuda=False): ...@@ -398,8 +398,11 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib']) extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib'])
kwargs['extra_link_args'] = extra_link_args kwargs['extra_link_args'] = extra_link_args
else: else:
# append compile flags add_compile_flag(extra_compile_args, ['-w']) # disable warning
add_compile_flag(extra_compile_args, ['-g', '-w']) # disable warnings # Note(Aurelius84): This marco will impact memory layout of `Tensor`.
# We align it automatially with pre-installed Paddle.
if core.is_compiled_with_mkldnn():
add_compile_flag(extra_compile_args, ['-DPADDLE_WITH_MKLDNN'])
# append link flags # append link flags
extra_link_args = kwargs.get('extra_link_args', []) extra_link_args = kwargs.get('extra_link_args', [])
...@@ -856,24 +859,22 @@ def list2str(args): ...@@ -856,24 +859,22 @@ def list2str(args):
return repr(args) return repr(args)
def _jit_compile(file_path, interpreter=None, verbose=False): def _jit_compile(file_path, verbose=False):
""" """
Build shared library in subprocess Build shared library in subprocess
""" """
ext_dir = os.path.dirname(file_path) ext_dir = os.path.dirname(file_path)
setup_file = os.path.basename(file_path) setup_file = os.path.basename(file_path)
if interpreter is None: # Using interpreter same with current process.
interpreter = 'python' interpreter = sys.executable
try: try:
which = 'where' if IS_WINDOWS else 'which'
py_path = subprocess.check_output([which, interpreter])
py_version = subprocess.check_output([interpreter, '-V']) py_version = subprocess.check_output([interpreter, '-V'])
if six.PY3: if six.PY3:
py_path = py_path.decode()
py_version = py_version.decode() py_version = py_version.decode()
log_v("Using Python interpreter: {}, version: {}".format( log_v("Using Python interpreter: {}, version: {}".format(
py_path.strip(), py_version.strip()), verbose) interpreter, py_version.strip()), verbose)
except Exception: except Exception:
_, error, _ = sys.exc_info() _, error, _ = sys.exc_info()
raise RuntimeError( raise RuntimeError(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册