[CustomOp] Automatically specify PADDLE_WITH_MKLDNN & Remove Interpreter argument (#31391)

* auto specify PADDLE_WITH_MKLDNN and remove Interpretper * remove print * fix check abi * fix windows * fix compile flags

[CustomOp] Automatically specify PADDLE_WITH_MKLDNN & Remove Interpreter argument (#31391)
* auto specify PADDLE_WITH_MKLDNN and remove Interpretper * remove print * fix check abi * fix windows * fix compile flags
fadabbe9 · Aurelius84 · GitHub · ffdd5b77 · fadabbe9 · fadabbe9
10 changed file
--- a/python/paddle/fluid/tests/custom_op/test_check_abi.py
+++ b/python/paddle/fluid/tests/custom_op/test_check_abi.py
@@ -96,14 +96,6 @@ class TestCheckCompiler(TestABIBase):
            utils._expected_compiler_current_platform = raw_func
-class TestJITCompilerException(unittest.TestCase):
-    def test_exception(self):
-        with self.assertRaisesRegexp(RuntimeError,
-                                     "Failed to check Python interpreter"):
-            file_path = os.path.abspath(__file__)
-            utils._jit_compile(file_path, interpreter='fake_cmd', verbose=True)
 class TestRunCMDException(unittest.TestCase):
    def test_exception(self):
        for verbose in [True, False]:

--- a/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
@@ -18,7 +18,7 @@ import numpy as np
 import paddle
 from paddle.utils.cpp_extension import load, get_build_directory
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 # Because Windows don't use docker, the shared lib already exists in the 
@@ -34,7 +34,8 @@ custom_attrs = load(
    name='custom_attrs_jit',
    sources=['attr_test_op.cc'],
    extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cflags
+    extra_cuda_cflags=extra_nvcc_args,  # test for cflags
    verbose=True)

--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py
@@ -21,7 +21,7 @@ from paddle import nn
 from paddle.utils.cpp_extension import load, get_build_directory
 from paddle.utils.cpp_extension.extension_utils import run_cmd
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args
 # Because Windows don't use docker, the shared lib already exists in the 
 # cache dir, it will not be compiled again unless the shared lib is removed.
@@ -39,8 +39,8 @@ custom_module = load(
    name='custom_relu_for_model_jit',
    sources=['custom_relu_op.cc', 'custom_relu_op.cu'],
    extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cc flags
-    extra_cuda_cflags=extra_compile_args,  # add for Coverage CI
+    extra_cuda_cflags=extra_nvcc_args,  # test for nvcc flags
    verbose=True)
@@ -212,10 +212,6 @@ class TestStaticModel(unittest.TestCase):
                device, use_custom_op=False, use_pe=True)
            custom_relu_train_pe_out = self.train_model(
                device, use_custom_op=True, use_pe=True)
-            print(original_relu_train_out)
-            print(custom_relu_train_out)
-            print(original_relu_train_pe_out)
-            print(custom_relu_train_pe_out)
            self.assertTrue(
                np.array_equal(original_relu_train_out, custom_relu_train_out))
@@ -232,10 +228,6 @@ class TestStaticModel(unittest.TestCase):
                device, use_custom_op=False, use_pe=True)
            custom_relu_eval_pe_out = self.eval_model(
                device, use_custom_op=True, use_pe=True)
-            print(original_relu_eval_out)
-            print(custom_relu_eval_out)
-            print(original_relu_eval_pe_out)
-            print(custom_relu_eval_pe_out)
            self.assertTrue(
                np.array_equal(original_relu_eval_out, custom_relu_eval_out))

--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
@@ -19,7 +19,7 @@ import paddle
 import numpy as np
 from paddle.utils.cpp_extension import load, get_build_directory
 from paddle.utils.cpp_extension.extension_utils import run_cmd
-from utils import paddle_includes, extra_compile_args, IS_WINDOWS
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_WINDOWS
 from test_custom_relu_op_setup import custom_relu_dynamic, custom_relu_static
 # Because Windows don't use docker, the shared lib already exists in the 
@@ -40,8 +40,8 @@ custom_module = load(
        'custom_relu_op.cc', 'custom_relu_op.cu', 'custom_relu_op_dup.cc'
    ],
    extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cc flags
-    extra_cuda_cflags=extra_compile_args,  # add for Coverage CI
+    extra_cuda_cflags=extra_nvcc_args,  # test for nvcc flags
    verbose=True)

--- a/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
@@ -17,7 +17,7 @@ import unittest
 import paddle
 import numpy as np
 from paddle.utils.cpp_extension import load, get_build_directory
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_cc_args
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 # Because Windows don't use docker, the shared lib already exists in the 
@@ -31,7 +31,7 @@ dispatch_op = load(
    name='dispatch_op',
    sources=['dispatch_test_op.cc'],
    extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,
+    extra_cxx_cflags=extra_cc_args,
    verbose=True)

--- a/python/paddle/fluid/tests/custom_op/test_jit_load.py
+++ b/python/paddle/fluid/tests/custom_op/test_jit_load.py
@@ -17,7 +17,7 @@ import unittest
 import paddle
 import numpy as np
 from paddle.utils.cpp_extension import load
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args
 from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
 # switch to old custom op method
@@ -27,10 +27,9 @@ use_new_custom_op_load_method(False)
 custom_module = load(
    name='custom_relu2',
    sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc', 'relu_op3.cu'],
-    interpreter='python',  # add for unittest
    extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,  # add for Coverage CI,
+    extra_cxx_cflags=extra_cc_args,  # test for cc flags
-    extra_cuda_cflags=extra_compile_args,  # add for split cpp/cuda flags
+    extra_cuda_cflags=extra_nvcc_args,  # test for nvcc flags
    verbose=True  # add for unittest
 )

--- a/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
@@ -21,7 +21,7 @@ import paddle
 from paddle.utils.cpp_extension import load
 from paddle.utils.cpp_extension import load, get_build_directory
 from paddle.utils.cpp_extension.extension_utils import run_cmd
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_cc_args
 # Because Windows don't use docker, the shared lib already exists in the 
 # cache dir, it will not be compiled again unless the shared lib is removed.
@@ -35,7 +35,7 @@ multi_out_module = load(
    name='multi_out_jit',
    sources=['multi_out_test_op.cc'],
    extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cflags 
    verbose=True)

--- a/python/paddle/fluid/tests/custom_op/utils.py
+++ b/python/paddle/fluid/tests/custom_op/utils.py
@@ -27,7 +27,7 @@ paddle_includes = [
    os.path.join(site_packages_path, 'paddle', 'include', 'third_party')
 ]
-# TODO(Aurelius84): Memory layout is different if build paddle with PADDLE_WITH_MKLDNN=ON,
+# Test for extra compile args
-# and will lead to ABI problem on Coverage CI. We will handle it in next PR.
+extra_cc_args = ['-w', '-g'] if not IS_WINDOWS else ['/w']
-extra_compile_args = ['-DPADDLE_WITH_MKLDNN'
+extra_nvcc_args = ['-O3']
-                      ] if six.PY2 and not IS_WINDOWS else []
+extra_compile_args = {'cc': extra_cc_args, 'nvcc': extra_nvcc_args}
--- a/python/paddle/utils/cpp_extension/cpp_extension.py
+++ b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -682,7 +682,6 @@ def load(name,
         extra_ldflags=None,
         extra_include_paths=None,
         build_directory=None,
-         interpreter=None,
         verbose=False):
    """
    An Interface to automatically compile C++/CUDA source files Just-In-Time
@@ -731,10 +730,9 @@ def load(name,
        custom_op_module = load(
            name="op_shared_libary_name",                # name of shared library
            sources=['relu_op.cc', 'relu_op.cu'],        # source files of cusomized op
-            extra_cxx_cflags=['-DPADDLE_WITH_MKLDNN'],   # need to specify the flag if pre-installed Paddle supports MKLDNN
+            extra_cxx_cflags=['-g', '-w'],               # optional, specify extra flags to compile .cc/.cpp file
-            extra_cuda_cflags=['-DPADDLE_WITH_MKLDNN'],  # need to specify the flag if pre-installed Paddle supports MKLDNN
+            extra_cuda_cflags=['-O2'],                   # optional, specify extra flags to compile .cu file
-            interpreter='python3.7',                     # optional, specify another python interpreter
+            verbose=True                                 # optional, specify to output log information
-            verbose=True                                 # output log information
        )
        x = paddle.randn([4, 10], dtype='float32')
@@ -747,11 +745,9 @@ def load(name,
                            and ``.cu`` for CUDA file.
        extra_cxx_cflags(list[str], optional): Specify additional flags used to compile CPP files. By default
                               all basic and framework related flags have been included.
-                               If your pre-insall Paddle supported MKLDNN, please add
-                               ``-DPADDLE_WITH_MKLDNN`` . Default is None.
        extra_cuda_cflags(list[str], optional): Specify additional flags used to compile CUDA files. By default
-                               all basic and framework related flags have been included. If your pre-insall Paddle supported MKLDNN, 
+                               all basic and framework related flags have been included. 
-                               please add ``-DPADDLE_WITH_MKLDNN`` . Default None. See `Cuda Compiler Driver NVCC <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html>`_
+                               See `Cuda Compiler Driver NVCC <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html>`_
                               for details. Default is None.
        extra_ldflags(list[str], optional): Specify additional flags used to link shared library. See
                                `GCC Link Options <https://gcc.gnu.org/onlinedocs/gcc/Link-Options.html>`_ for details.
@@ -762,10 +758,6 @@ def load(name,
        build_directory(str, optional): Specify root directory path to put shared library file. If set None,
                            it will use ``PADDLE_EXTENSION_DIR`` from os.environ. Use
                            ``paddle.utils.cpp_extension.get_build_directory()`` to see the location. Default is None.
-        interpreter(str, optional): Specify nterpreter path, supporting alias and full path.
-                           If set None, it will use `python` as default interpreter. If local environment contains
-                           more than one python interpreters and want to use new interpreter to apply compilation,
-                           please specify this parameter, such as ``python3.7`` . Default is None.
        verbose(bool, optional): whether to verbose compiled log information. Default is False
    Returns:
@@ -806,7 +798,7 @@ def load(name,
    _write_setup_file(name, sources, file_path, build_base_dir,
                      extra_include_paths, extra_cxx_cflags, extra_cuda_cflags,
                      extra_ldflags, verbose)
-    _jit_compile(file_path, interpreter, verbose)
+    _jit_compile(file_path, verbose)
    # import as callable python api
    custom_op_api = _import_module_from_library(name, build_base_dir, verbose)

--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -48,7 +48,7 @@ MSVC_COMPILE_FLAGS = [
 MSVC_LINK_FLAGS = ['/MACHINE:X64', 'paddle_custom_op.lib']
-COMMON_NVCC_FLAGS = ['-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU', '-O3']
+COMMON_NVCC_FLAGS = ['-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU']
 GCC_MINI_VERSION = (5, 4, 0)
 MSVC_MINI_VERSION = (19, 0, 24215)
@@ -327,7 +327,7 @@ def prepare_unix_cudaflags(cflags):
    Prepare all necessary compiled flags for nvcc compiling CUDA files.
    """
    cflags = COMMON_NVCC_FLAGS + [
-        '-ccbin', 'cc', '-Xcompiler', '-fPIC', '-w', '--expt-relaxed-constexpr',
+        '-ccbin', 'cc', '-Xcompiler', '-fPIC', '--expt-relaxed-constexpr',
        '-DNVCC'
    ] + cflags + get_cuda_arch_flags(cflags)
@@ -398,8 +398,11 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
            extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib'])
        kwargs['extra_link_args'] = extra_link_args
    else:
-        # append compile flags
+        add_compile_flag(extra_compile_args, ['-w'])  # disable warning
-        add_compile_flag(extra_compile_args, ['-g', '-w'])  # disable warnings
+        # Note(Aurelius84): This marco will impact memory layout of `Tensor`.
+        # We align it automatially with pre-installed Paddle.
+        if core.is_compiled_with_mkldnn():
+            add_compile_flag(extra_compile_args, ['-DPADDLE_WITH_MKLDNN'])
        # append link flags
        extra_link_args = kwargs.get('extra_link_args', [])
@@ -856,24 +859,22 @@ def list2str(args):
    return repr(args)
-def _jit_compile(file_path, interpreter=None, verbose=False):
+def _jit_compile(file_path, verbose=False):
    """
    Build shared library in subprocess
    """
    ext_dir = os.path.dirname(file_path)
    setup_file = os.path.basename(file_path)
-    if interpreter is None:
+    # Using interpreter same with current process.
-        interpreter = 'python'
+    interpreter = sys.executable
    try:
-        which = 'where' if IS_WINDOWS else 'which'
-        py_path = subprocess.check_output([which, interpreter])
        py_version = subprocess.check_output([interpreter, '-V'])
        if six.PY3:
-            py_path = py_path.decode()
            py_version = py_version.decode()
        log_v("Using Python interpreter: {}, version: {}".format(
-            py_path.strip(), py_version.strip()), verbose)
+            interpreter, py_version.strip()), verbose)
    except Exception:
        _, error, _ = sys.exc_info()
        raise RuntimeError(