From fadabbe9b06ffe34e82ecf1ab92a3b7849ca1a97 Mon Sep 17 00:00:00 2001
From: Aurelius84 <zhangliujie@baidu.com>
Date: Mon, 8 Mar 2021 11:47:05 +0800
Subject: [PATCH] [CustomOp] Automatically specify PADDLE_WITH_MKLDNN & Remove
 Interpreter argument (#31391)

* auto specify PADDLE_WITH_MKLDNN and remove Interpretper

* remove print

* fix check abi

* fix windows

* fix compile flags
---
 .../fluid/tests/custom_op/test_check_abi.py   |  8 -------
 .../tests/custom_op/test_custom_attrs_jit.py  |  5 ++--
 .../tests/custom_op/test_custom_relu_model.py | 14 +++--------
 .../custom_op/test_custom_relu_op_jit.py      |  6 ++---
 .../tests/custom_op/test_dispatch_jit.py      |  4 ++--
 .../fluid/tests/custom_op/test_jit_load.py    |  7 +++---
 .../tests/custom_op/test_multi_out_jit.py     |  4 ++--
 python/paddle/fluid/tests/custom_op/utils.py  |  8 +++----
 .../utils/cpp_extension/cpp_extension.py      | 20 +++++-----------
 .../utils/cpp_extension/extension_utils.py    | 23 ++++++++++---------
 10 files changed, 38 insertions(+), 61 deletions(-)

diff --git a/python/paddle/fluid/tests/custom_op/test_check_abi.py b/python/paddle/fluid/tests/custom_op/test_check_abi.py
index 1a38b79eb9..ed2af83b23 100644
--- a/python/paddle/fluid/tests/custom_op/test_check_abi.py
+++ b/python/paddle/fluid/tests/custom_op/test_check_abi.py
@@ -96,14 +96,6 @@ class TestCheckCompiler(TestABIBase):
             utils._expected_compiler_current_platform = raw_func
 
 
-class TestJITCompilerException(unittest.TestCase):
-    def test_exception(self):
-        with self.assertRaisesRegexp(RuntimeError,
-                                     "Failed to check Python interpreter"):
-            file_path = os.path.abspath(__file__)
-            utils._jit_compile(file_path, interpreter='fake_cmd', verbose=True)
-
-
 class TestRunCMDException(unittest.TestCase):
     def test_exception(self):
         for verbose in [True, False]:
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py b/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
index 754f76cab8..a6278e3ffc 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
@@ -18,7 +18,7 @@ import numpy as np
 
 import paddle
 from paddle.utils.cpp_extension import load, get_build_directory
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
 # Because Windows don't use docker, the shared lib already exists in the 
@@ -34,7 +34,8 @@ custom_attrs = load(
     name='custom_attrs_jit',
     sources=['attr_test_op.cc'],
     extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cflags
+    extra_cuda_cflags=extra_nvcc_args,  # test for cflags
     verbose=True)
 
 
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py
index 2052041688..1d4b2ae161 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py
@@ -21,7 +21,7 @@ from paddle import nn
 from paddle.utils.cpp_extension import load, get_build_directory
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args
 
 # Because Windows don't use docker, the shared lib already exists in the 
 # cache dir, it will not be compiled again unless the shared lib is removed.
@@ -39,8 +39,8 @@ custom_module = load(
     name='custom_relu_for_model_jit',
     sources=['custom_relu_op.cc', 'custom_relu_op.cu'],
     extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,  # add for Coverage CI
-    extra_cuda_cflags=extra_compile_args,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cc flags
+    extra_cuda_cflags=extra_nvcc_args,  # test for nvcc flags
     verbose=True)
 
 
@@ -212,10 +212,6 @@ class TestStaticModel(unittest.TestCase):
                 device, use_custom_op=False, use_pe=True)
             custom_relu_train_pe_out = self.train_model(
                 device, use_custom_op=True, use_pe=True)
-            print(original_relu_train_out)
-            print(custom_relu_train_out)
-            print(original_relu_train_pe_out)
-            print(custom_relu_train_pe_out)
 
             self.assertTrue(
                 np.array_equal(original_relu_train_out, custom_relu_train_out))
@@ -232,10 +228,6 @@ class TestStaticModel(unittest.TestCase):
                 device, use_custom_op=False, use_pe=True)
             custom_relu_eval_pe_out = self.eval_model(
                 device, use_custom_op=True, use_pe=True)
-            print(original_relu_eval_out)
-            print(custom_relu_eval_out)
-            print(original_relu_eval_pe_out)
-            print(custom_relu_eval_pe_out)
 
             self.assertTrue(
                 np.array_equal(original_relu_eval_out, custom_relu_eval_out))
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
index 4f61fb4f89..34cf38aacf 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
@@ -19,7 +19,7 @@ import paddle
 import numpy as np
 from paddle.utils.cpp_extension import load, get_build_directory
 from paddle.utils.cpp_extension.extension_utils import run_cmd
-from utils import paddle_includes, extra_compile_args, IS_WINDOWS
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_WINDOWS
 from test_custom_relu_op_setup import custom_relu_dynamic, custom_relu_static
 
 # Because Windows don't use docker, the shared lib already exists in the 
@@ -40,8 +40,8 @@ custom_module = load(
         'custom_relu_op.cc', 'custom_relu_op.cu', 'custom_relu_op_dup.cc'
     ],
     extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,  # add for Coverage CI
-    extra_cuda_cflags=extra_compile_args,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cc flags
+    extra_cuda_cflags=extra_nvcc_args,  # test for nvcc flags
     verbose=True)
 
 
diff --git a/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py b/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
index 05808d3d22..6cdbc61620 100644
--- a/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
@@ -17,7 +17,7 @@ import unittest
 import paddle
 import numpy as np
 from paddle.utils.cpp_extension import load, get_build_directory
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_cc_args
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
 # Because Windows don't use docker, the shared lib already exists in the 
@@ -31,7 +31,7 @@ dispatch_op = load(
     name='dispatch_op',
     sources=['dispatch_test_op.cc'],
     extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,
+    extra_cxx_cflags=extra_cc_args,
     verbose=True)
 
 
diff --git a/python/paddle/fluid/tests/custom_op/test_jit_load.py b/python/paddle/fluid/tests/custom_op/test_jit_load.py
index ccb9544433..4e6d74b7d6 100644
--- a/python/paddle/fluid/tests/custom_op/test_jit_load.py
+++ b/python/paddle/fluid/tests/custom_op/test_jit_load.py
@@ -17,7 +17,7 @@ import unittest
 import paddle
 import numpy as np
 from paddle.utils.cpp_extension import load
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args
 from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
 
 # switch to old custom op method
@@ -27,10 +27,9 @@ use_new_custom_op_load_method(False)
 custom_module = load(
     name='custom_relu2',
     sources=['relu_op.cc', 'relu_op.cu', 'relu_op3.cc', 'relu_op3.cu'],
-    interpreter='python',  # add for unittest
     extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,  # add for Coverage CI,
-    extra_cuda_cflags=extra_compile_args,  # add for split cpp/cuda flags
+    extra_cxx_cflags=extra_cc_args,  # test for cc flags
+    extra_cuda_cflags=extra_nvcc_args,  # test for nvcc flags
     verbose=True  # add for unittest
 )
 
diff --git a/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py b/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
index 79d366cc4a..97b37498c4 100644
--- a/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
@@ -21,7 +21,7 @@ import paddle
 from paddle.utils.cpp_extension import load
 from paddle.utils.cpp_extension import load, get_build_directory
 from paddle.utils.cpp_extension.extension_utils import run_cmd
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_cc_args
 
 # Because Windows don't use docker, the shared lib already exists in the 
 # cache dir, it will not be compiled again unless the shared lib is removed.
@@ -35,7 +35,7 @@ multi_out_module = load(
     name='multi_out_jit',
     sources=['multi_out_test_op.cc'],
     extra_include_paths=paddle_includes,  # add for Coverage CI
-    extra_cxx_cflags=extra_compile_args,  # add for Coverage CI
+    extra_cxx_cflags=extra_cc_args,  # test for cflags 
     verbose=True)
 
 
diff --git a/python/paddle/fluid/tests/custom_op/utils.py b/python/paddle/fluid/tests/custom_op/utils.py
index 52b294dc72..57ce79b1f3 100644
--- a/python/paddle/fluid/tests/custom_op/utils.py
+++ b/python/paddle/fluid/tests/custom_op/utils.py
@@ -27,7 +27,7 @@ paddle_includes = [
     os.path.join(site_packages_path, 'paddle', 'include', 'third_party')
 ]
 
-# TODO(Aurelius84): Memory layout is different if build paddle with PADDLE_WITH_MKLDNN=ON,
-# and will lead to ABI problem on Coverage CI. We will handle it in next PR.
-extra_compile_args = ['-DPADDLE_WITH_MKLDNN'
-                      ] if six.PY2 and not IS_WINDOWS else []
+# Test for extra compile args
+extra_cc_args = ['-w', '-g'] if not IS_WINDOWS else ['/w']
+extra_nvcc_args = ['-O3']
+extra_compile_args = {'cc': extra_cc_args, 'nvcc': extra_nvcc_args}
diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py
index d17647b436..d84ae67fff 100644
--- a/python/paddle/utils/cpp_extension/cpp_extension.py
+++ b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -682,7 +682,6 @@ def load(name,
          extra_ldflags=None,
          extra_include_paths=None,
          build_directory=None,
-         interpreter=None,
          verbose=False):
     """
     An Interface to automatically compile C++/CUDA source files Just-In-Time
@@ -731,10 +730,9 @@ def load(name,
         custom_op_module = load(
             name="op_shared_libary_name",                # name of shared library
             sources=['relu_op.cc', 'relu_op.cu'],        # source files of cusomized op
-            extra_cxx_cflags=['-DPADDLE_WITH_MKLDNN'],   # need to specify the flag if pre-installed Paddle supports MKLDNN
-            extra_cuda_cflags=['-DPADDLE_WITH_MKLDNN'],  # need to specify the flag if pre-installed Paddle supports MKLDNN
-            interpreter='python3.7',                     # optional, specify another python interpreter
-            verbose=True                                 # output log information
+            extra_cxx_cflags=['-g', '-w'],               # optional, specify extra flags to compile .cc/.cpp file
+            extra_cuda_cflags=['-O2'],                   # optional, specify extra flags to compile .cu file
+            verbose=True                                 # optional, specify to output log information
         )
 
         x = paddle.randn([4, 10], dtype='float32')
@@ -747,11 +745,9 @@ def load(name,
                             and ``.cu`` for CUDA file.
         extra_cxx_cflags(list[str], optional): Specify additional flags used to compile CPP files. By default
                                all basic and framework related flags have been included.
-                               If your pre-insall Paddle supported MKLDNN, please add
-                               ``-DPADDLE_WITH_MKLDNN`` . Default is None.
         extra_cuda_cflags(list[str], optional): Specify additional flags used to compile CUDA files. By default
-                               all basic and framework related flags have been included. If your pre-insall Paddle supported MKLDNN, 
-                               please add ``-DPADDLE_WITH_MKLDNN`` . Default None. See `Cuda Compiler Driver NVCC <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html>`_
+                               all basic and framework related flags have been included. 
+                               See `Cuda Compiler Driver NVCC <https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html>`_
                                for details. Default is None.
         extra_ldflags(list[str], optional): Specify additional flags used to link shared library. See
                                 `GCC Link Options <https://gcc.gnu.org/onlinedocs/gcc/Link-Options.html>`_ for details.
@@ -762,10 +758,6 @@ def load(name,
         build_directory(str, optional): Specify root directory path to put shared library file. If set None,
                             it will use ``PADDLE_EXTENSION_DIR`` from os.environ. Use
                             ``paddle.utils.cpp_extension.get_build_directory()`` to see the location. Default is None.
-        interpreter(str, optional): Specify nterpreter path, supporting alias and full path.
-                           If set None, it will use `python` as default interpreter. If local environment contains
-                           more than one python interpreters and want to use new interpreter to apply compilation,
-                           please specify this parameter, such as ``python3.7`` . Default is None.
         verbose(bool, optional): whether to verbose compiled log information. Default is False
 
     Returns:
@@ -806,7 +798,7 @@ def load(name,
     _write_setup_file(name, sources, file_path, build_base_dir,
                       extra_include_paths, extra_cxx_cflags, extra_cuda_cflags,
                       extra_ldflags, verbose)
-    _jit_compile(file_path, interpreter, verbose)
+    _jit_compile(file_path, verbose)
 
     # import as callable python api
     custom_op_api = _import_module_from_library(name, build_base_dir, verbose)
diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py
index cce1100fc8..402aaa501b 100644
--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -48,7 +48,7 @@ MSVC_COMPILE_FLAGS = [
 
 MSVC_LINK_FLAGS = ['/MACHINE:X64', 'paddle_custom_op.lib']
 
-COMMON_NVCC_FLAGS = ['-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU', '-O3']
+COMMON_NVCC_FLAGS = ['-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU']
 
 GCC_MINI_VERSION = (5, 4, 0)
 MSVC_MINI_VERSION = (19, 0, 24215)
@@ -327,7 +327,7 @@ def prepare_unix_cudaflags(cflags):
     Prepare all necessary compiled flags for nvcc compiling CUDA files.
     """
     cflags = COMMON_NVCC_FLAGS + [
-        '-ccbin', 'cc', '-Xcompiler', '-fPIC', '-w', '--expt-relaxed-constexpr',
+        '-ccbin', 'cc', '-Xcompiler', '-fPIC', '--expt-relaxed-constexpr',
         '-DNVCC'
     ] + cflags + get_cuda_arch_flags(cflags)
 
@@ -398,8 +398,11 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
             extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib'])
         kwargs['extra_link_args'] = extra_link_args
     else:
-        # append compile flags
-        add_compile_flag(extra_compile_args, ['-g', '-w'])  # disable warnings
+        add_compile_flag(extra_compile_args, ['-w'])  # disable warning
+        # Note(Aurelius84): This marco will impact memory layout of `Tensor`.
+        # We align it automatially with pre-installed Paddle.
+        if core.is_compiled_with_mkldnn():
+            add_compile_flag(extra_compile_args, ['-DPADDLE_WITH_MKLDNN'])
 
         # append link flags
         extra_link_args = kwargs.get('extra_link_args', [])
@@ -856,24 +859,22 @@ def list2str(args):
     return repr(args)
 
 
-def _jit_compile(file_path, interpreter=None, verbose=False):
+def _jit_compile(file_path, verbose=False):
     """
     Build shared library in subprocess
     """
     ext_dir = os.path.dirname(file_path)
     setup_file = os.path.basename(file_path)
 
-    if interpreter is None:
-        interpreter = 'python'
+    # Using interpreter same with current process.
+    interpreter = sys.executable
+
     try:
-        which = 'where' if IS_WINDOWS else 'which'
-        py_path = subprocess.check_output([which, interpreter])
         py_version = subprocess.check_output([interpreter, '-V'])
         if six.PY3:
-            py_path = py_path.decode()
             py_version = py_version.decode()
         log_v("Using Python interpreter: {}, version: {}".format(
-            py_path.strip(), py_version.strip()), verbose)
+            interpreter, py_version.strip()), verbose)
     except Exception:
         _, error, _ = sys.exc_info()
         raise RuntimeError(
-- 
GitLab